C++: Fix cplusplus-keywordgen to generate Keywords.cpp

Fix kewordgen to generate current Keywords.cpp with minimal diff,
but also to be compatible with old usage and kwgen files.

Add new parameters:
%no-namespace-for-tokens - not use namespace for tokens
%pre-check-argument - set type and name of additional variable to use
%function-name - name of a function ("classify" by default)
%pre-check - name of a variable to check for a keyword (i.e. cxxEnabled)
Now "%%" is also used to end and restart keywords parsing to be able to
generate additional keywords for classifyOperator()

Automatically add checks for LanguageFeatures to generated code
Pass input and output files with command line arguments, print usage

Update Keywords.kwgen to include all keywords currently used

Add missing aliases to Token.h

Change-Id: I6cc84e150e0d797277204032fc05ce9cfbd01f58
Reviewed-by: Nikolai Kosjar <nikolai.kosjar@qt.io>
This commit is contained in:
Volodymyr Zibarov
2020-05-26 22:05:11 +03:00
parent 3563c457a5
commit df6d95c3e1
4 changed files with 660 additions and 341 deletions

View File

@@ -23,6 +23,12 @@
namespace CPlusPlus {
// === following code is generated with cplusplus-keywordgen tool
// === from source file: Keywords.kwgen
// === keywords begin
static inline int classify2(const char *s, LanguageFeatures)
{
if (s[0] == 'd') {
@@ -132,13 +138,6 @@ static inline int classify4(const char *s, LanguageFeatures features)
}
}
}
else if (s[1] == 'n') {
if (s[2] == 'u') {
if (s[3] == 'm') {
return T_ENUM;
}
}
}
else if (features.qtKeywordsEnabled && s[1] == 'm') {
if (s[2] == 'i') {
if (s[3] == 't') {
@@ -146,6 +145,13 @@ static inline int classify4(const char *s, LanguageFeatures features)
}
}
}
else if (s[1] == 'n') {
if (s[2] == 'u') {
if (s[3] == 'm') {
return T_ENUM;
}
}
}
}
else if (s[0] == 'g') {
if (s[1] == 'o') {
@@ -165,15 +171,15 @@ static inline int classify4(const char *s, LanguageFeatures features)
}
}
}
else if (s[0] == 't') {
if (features.cxxEnabled && s[1] == 'h') {
else if (features.cxxEnabled && s[0] == 't') {
if (s[1] == 'h') {
if (s[2] == 'i') {
if (s[3] == 's') {
return T_THIS;
}
}
}
else if (features.cxxEnabled && s[1] == 'r') {
else if (s[1] == 'r') {
if (s[2] == 'u') {
if (s[3] == 'e') {
return T_TRUE;
@@ -285,13 +291,11 @@ static inline int classify5(const char *s, LanguageFeatures features)
}
}
}
else if (features.qtKeywordsEnabled) {
if (s[1] == 'l') {
if (s[2] == 'o') {
if (s[3] == 't') {
if (s[4] == 's') {
return T_Q_SLOTS;
}
else if (features.qtKeywordsEnabled && s[1] == 'l') {
if (s[2] == 'o') {
if (s[3] == 't') {
if (s[4] == 's') {
return T_SLOTS;
}
}
}
@@ -513,22 +517,18 @@ static inline int classify6(const char *s, LanguageFeatures features)
}
}
}
else if (features.qtKeywordsEnabled && s[0] == 'S') {
if (s[1] == 'I') {
if (s[2] == 'G') {
if (s[3] == 'N') {
if (s[4] == 'A') {
if (s[5] == 'L') {
return T_SIGNAL;
else if (features.qtKeywordsEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'E') {
if (s[3] == 'M') {
if (s[4] == 'I') {
if (s[5] == 'T') {
return T_Q_EMIT;
}
}
}
}
}
}
else if (features.qtKeywordsEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'S') {
else if (s[2] == 'S') {
if (s[3] == 'L') {
if (s[4] == 'O') {
if (s[5] == 'T') {
@@ -537,11 +537,15 @@ static inline int classify6(const char *s, LanguageFeatures features)
}
}
}
else if (s[2] == 'E') {
if (s[3] == 'M') {
if (s[4] == 'I') {
if (s[5] == 'T') {
return T_Q_EMIT;
}
}
else if (features.qtKeywordsEnabled && s[0] == 'S') {
if (s[1] == 'I') {
if (s[2] == 'G') {
if (s[3] == 'N') {
if (s[4] == 'A') {
if (s[5] == 'L') {
return T_SIGNAL;
}
}
}
@@ -614,6 +618,21 @@ static inline int classify7(const char *s, LanguageFeatures features)
}
}
}
else if (features.qtKeywordsEnabled && s[0] == 'f') {
if (s[1] == 'o') {
if (s[2] == 'r') {
if (s[3] == 'e') {
if (s[4] == 'a') {
if (s[5] == 'c') {
if (s[6] == 'h') {
return T_FOREACH;
}
}
}
}
}
}
}
else if (features.cxxEnabled && s[0] == 'm') {
if (s[1] == 'u') {
if (s[2] == 't') {
@@ -659,21 +678,6 @@ static inline int classify7(const char *s, LanguageFeatures features)
}
}
}
else if (features.qtKeywordsEnabled && s[0] == 'f') {
if (s[1] == 'o') {
if (s[2] == 'r') {
if (s[3] == 'e') {
if (s[4] == 'a') {
if (s[5] == 'c') {
if (s[6] == 'h') {
return T_Q_FOREACH;
}
}
}
}
}
}
}
else if (features.qtKeywordsEnabled && s[0] == 's') {
if (s[1] == 'i') {
if (s[2] == 'g') {
@@ -681,7 +685,7 @@ static inline int classify7(const char *s, LanguageFeatures features)
if (s[4] == 'a') {
if (s[5] == 'l') {
if (s[6] == 's') {
return T_Q_SIGNALS;
return T_SIGNALS;
}
}
}
@@ -736,18 +740,7 @@ static inline int classify7(const char *s, LanguageFeatures features)
}
else if (features.qtEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'S') {
if (s[3] == 'L') {
if (s[4] == 'O') {
if (s[5] == 'T') {
if (s[6] == 'S') {
return T_Q_SLOTS;
}
}
}
}
}
else if (s[2] == 'E') {
if (s[2] == 'E') {
if (s[3] == 'N') {
if (s[4] == 'U') {
if (s[5] == 'M') {
@@ -769,6 +762,17 @@ static inline int classify7(const char *s, LanguageFeatures features)
}
}
}
else if (s[2] == 'S') {
if (s[3] == 'L') {
if (s[4] == 'O') {
if (s[5] == 'T') {
if (s[6] == 'S') {
return T_Q_SLOTS;
}
}
}
}
}
}
}
return T_IDENTIFIER;
@@ -792,18 +796,7 @@ static inline int classify8(const char *s, LanguageFeatures features)
}
}
else if (s[2] == 't') {
if (s[3] == 'y') {
if (s[4] == 'p') {
if (s[5] == 'e') {
if (s[6] == 'o') {
if (s[7] == 'f') {
return T___TYPEOF;
}
}
}
}
}
else if (s[3] == 'h') {
if (s[3] == 'h') {
if (s[4] == 'r') {
if (s[5] == 'e') {
if (s[6] == 'a') {
@@ -814,11 +807,46 @@ static inline int classify8(const char *s, LanguageFeatures features)
}
}
}
else if (s[3] == 'y') {
if (s[4] == 'p') {
if (s[5] == 'e') {
if (s[6] == 'o') {
if (s[7] == 'f') {
return T___TYPEOF;
}
}
}
}
}
}
}
}
else if (s[0] == 'c') {
if (s[1] == 'o') {
if (features.cxx11Enabled && s[1] == 'h') {
if (s[2] == 'a') {
if (s[3] == 'r') {
if (s[4] == '1') {
if (s[5] == '6') {
if (s[6] == '_') {
if (s[7] == 't') {
return T_CHAR16_T;
}
}
}
}
else if (s[4] == '3') {
if (s[5] == '2') {
if (s[6] == '_') {
if (s[7] == 't') {
return T_CHAR32_T;
}
}
}
}
}
}
}
else if (s[1] == 'o') {
if (s[2] == 'n') {
if (s[3] == 't') {
if (s[4] == 'i') {
@@ -832,28 +860,6 @@ static inline int classify8(const char *s, LanguageFeatures features)
}
}
}
} else if (features.cxx11Enabled && s[1] == 'h') {
if (s[2] == 'a') {
if (s[3] == 'r') {
if (s[4] == '1') {
if (s[5] == '6') {
if (s[6] == '_') {
if (s[7] == 't') {
return T_CHAR16_T;
}
}
}
} else if (s[4] == '3') {
if (s[5] == '2') {
if (s[6] == '_') {
if (s[7] == 't') {
return T_CHAR32_T;
}
}
}
}
}
}
}
}
else if (features.cxx11Enabled && s[0] == 'd') {
@@ -1133,21 +1139,7 @@ static inline int classify9(const char *s, LanguageFeatures features)
}
else if (features.qtEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'S') {
if (s[3] == 'I') {
if (s[4] == 'G') {
if (s[5] == 'N') {
if (s[6] == 'A') {
if (s[7] == 'L') {
if (s[8] == 'S') {
return T_Q_SIGNALS;
}
}
}
}
}
}
} else if (s[2] == 'F') {
if (s[2] == 'F') {
if (s[3] == 'O') {
if (s[4] == 'R') {
if (s[5] == 'E') {
@@ -1162,6 +1154,21 @@ static inline int classify9(const char *s, LanguageFeatures features)
}
}
}
else if (s[2] == 'S') {
if (s[3] == 'I') {
if (s[4] == 'G') {
if (s[5] == 'N') {
if (s[6] == 'A') {
if (s[7] == 'L') {
if (s[8] == 'S') {
return T_Q_SIGNALS;
}
}
}
}
}
}
}
}
}
return T_IDENTIFIER;
@@ -1171,24 +1178,7 @@ static inline int classify10(const char *s, LanguageFeatures features)
{
if (s[0] == '_') {
if (s[1] == '_') {
if (s[2] == 'i') {
if (s[3] == 'n') {
if (s[4] == 'l') {
if (s[5] == 'i') {
if (s[6] == 'n') {
if (s[7] == 'e') {
if (s[8] == '_') {
if (s[9] == '_') {
return T___INLINE__;
}
}
}
}
}
}
}
}
else if (features.cxxEnabled && s[2] == 'd') {
if (features.cxxEnabled && s[2] == 'd') {
if (s[3] == 'e') {
if (s[4] == 'c') {
if (s[5] == 'l') {
@@ -1205,6 +1195,23 @@ static inline int classify10(const char *s, LanguageFeatures features)
}
}
}
else if (s[2] == 'i') {
if (s[3] == 'n') {
if (s[4] == 'l') {
if (s[5] == 'i') {
if (s[6] == 'n') {
if (s[7] == 'e') {
if (s[8] == '_') {
if (s[9] == '_') {
return T___INLINE__;
}
}
}
}
}
}
}
}
else if (s[2] == 't') {
if (s[3] == 'y') {
if (s[4] == 'p') {
@@ -1272,7 +1279,7 @@ static inline int classify10(const char *s, LanguageFeatures features)
if (s[7] == 'I') {
if (s[8] == 'D') {
if (s[9] == 'E') {
return T_Q_PROPERTY; // Q_OVERRIDE is just an alias for Q_PROPERTY
return T_Q_OVERRIDE;
}
}
}
@@ -1308,23 +1315,7 @@ static inline int classify11(const char *s, LanguageFeatures features)
if (s[0] == '_') {
if (s[1] == '_') {
if (s[2] == 'a') {
if (s[3] == 't') {
if (s[4] == 't') {
if (s[5] == 'r') {
if (s[6] == 'i') {
if (s[7] == 'b') {
if (s[8] == 'u') {
if (s[9] == 't') {
if (s[10] == 'e') {
return T___ATTRIBUTE;
}
}
}
}
}
}
}
} else if (s[3] == 'l') {
if (s[3] == 'l') {
if (s[4] == 'i') {
if (s[5] == 'g') {
if (s[6] == 'n') {
@@ -1341,6 +1332,23 @@ static inline int classify11(const char *s, LanguageFeatures features)
}
}
}
else if (s[3] == 't') {
if (s[4] == 't') {
if (s[5] == 'r') {
if (s[6] == 'i') {
if (s[7] == 'b') {
if (s[8] == 'u') {
if (s[9] == 't') {
if (s[10] == 'e') {
return T___ATTRIBUTE;
}
}
}
}
}
}
}
}
}
}
}
@@ -1420,31 +1428,6 @@ static inline int classify12(const char *s, LanguageFeatures features)
}
}
}
else if (features.qtEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'I') {
if (s[3] == 'N') {
if (s[4] == 'T') {
if (s[5] == 'E') {
if (s[6] == 'R') {
if (s[7] == 'F') {
if (s[8] == 'A') {
if (s[9] == 'C') {
if (s[10] == 'E') {
if (s[11] == 'S') {
return T_Q_INTERFACES;
}
}
}
}
}
}
}
}
}
}
}
}
else if (features.cxxEnabled && s[0] == 'd') {
if (s[1] == 'y') {
if (s[2] == 'n') {
@@ -1495,6 +1478,31 @@ static inline int classify12(const char *s, LanguageFeatures features)
}
}
}
else if (features.qtEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'I') {
if (s[3] == 'N') {
if (s[4] == 'T') {
if (s[5] == 'E') {
if (s[6] == 'R') {
if (s[7] == 'F') {
if (s[8] == 'A') {
if (s[9] == 'C') {
if (s[10] == 'E') {
if (s[11] == 'S') {
return T_Q_INTERFACES;
}
}
}
}
}
}
}
}
}
}
}
}
return T_IDENTIFIER;
}
@@ -1526,7 +1534,8 @@ static inline int classify13(const char *s, LanguageFeatures features)
}
}
}
} else if (features.cxx11Enabled && s[0] == 's') {
}
else if (features.cxx11Enabled && s[0] == 's') {
if (s[1] == 't') {
if (s[2] == 'a') {
if (s[3] == 't') {
@@ -1556,6 +1565,40 @@ static inline int classify13(const char *s, LanguageFeatures features)
return T_IDENTIFIER;
}
static inline int classify14(const char *s, LanguageFeatures features)
{
if (features.qtEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'P') {
if (s[3] == 'R') {
if (s[4] == 'I') {
if (s[5] == 'V') {
if (s[6] == 'A') {
if (s[7] == 'T') {
if (s[8] == 'E') {
if (s[9] == '_') {
if (s[10] == 'S') {
if (s[11] == 'L') {
if (s[12] == 'O') {
if (s[13] == 'T') {
return T_Q_PRIVATE_SLOT;
}
}
}
}
}
}
}
}
}
}
}
}
}
}
return T_IDENTIFIER;
}
static inline int classify16(const char *s, LanguageFeatures features)
{
if (features.cxxEnabled && s[0] == 'r') {
@@ -1594,40 +1637,6 @@ static inline int classify16(const char *s, LanguageFeatures features)
return T_IDENTIFIER;
}
static inline int classify14(const char *s, LanguageFeatures features)
{
if (features.qtEnabled && s[0] == 'Q') {
if (s[1] == '_') {
if (s[2] == 'P') {
if (s[3] == 'R') {
if (s[4] == 'I') {
if (s[5] == 'V') {
if (s[6] == 'A') {
if (s[7] == 'T') {
if (s[8] == 'E') {
if (s[9] == '_') {
if (s[10] == 'S') {
if (s[11] == 'L') {
if (s[12] == 'O') {
if (s[13] == 'T') {
return T_Q_PRIVATE_SLOT;
}
}
}
}
}
}
}
}
}
}
}
}
}
}
return T_IDENTIFIER;
}
static inline int classify18(const char *s, LanguageFeatures features)
{
if (features.qtEnabled && s[0] == 'Q') {
@@ -1714,8 +1723,8 @@ static inline int classify19(const char *s, LanguageFeatures features)
return T_IDENTIFIER;
}
int Lexer::classify(const char *s, int n, LanguageFeatures features) {
int Lexer::classify(const char *s, int n, LanguageFeatures features)
{
switch (n) {
case 2: return classify2(s, features);
case 3: return classify3(s, features);
@@ -1737,7 +1746,13 @@ int Lexer::classify(const char *s, int n, LanguageFeatures features) {
} // switch
}
static inline int classifyOperator2(const char *s) {
// === keywords end
// === keywords begin
static inline int classifyOperator2(const char *s)
{
if (s[0] == 'o') {
if (s[1] == 'r') {
return T_OR;
@@ -1746,7 +1761,8 @@ static inline int classifyOperator2(const char *s) {
return T_IDENTIFIER;
}
static inline int classifyOperator3(const char *s) {
static inline int classifyOperator3(const char *s)
{
if (s[0] == 'a') {
if (s[1] == 'n') {
if (s[2] == 'd') {
@@ -1771,7 +1787,8 @@ static inline int classifyOperator3(const char *s) {
return T_IDENTIFIER;
}
static inline int classifyOperator5(const char *s) {
static inline int classifyOperator5(const char *s)
{
if (s[0] == 'b') {
if (s[1] == 'i') {
if (s[2] == 't') {
@@ -1808,7 +1825,8 @@ static inline int classifyOperator5(const char *s) {
return T_IDENTIFIER;
}
static inline int classifyOperator6(const char *s) {
static inline int classifyOperator6(const char *s)
{
if (s[0] == 'a') {
if (s[1] == 'n') {
if (s[2] == 'd') {
@@ -1864,7 +1882,8 @@ static inline int classifyOperator6(const char *s) {
return T_IDENTIFIER;
}
int Lexer::classifyOperator(const char *s, int n) {
int Lexer::classifyOperator(const char *s, int n)
{
switch (n) {
case 2: return classifyOperator2(s);
case 3: return classifyOperator3(s);
@@ -1874,5 +1893,6 @@ int Lexer::classifyOperator(const char *s, int n) {
} // switch
}
// === keywords end
} // namespace CPlusPlus

View File

@@ -1,55 +1,99 @@
// Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
#include "Lexer.h"
#include "Token.h"
namespace CPlusPlus {
%token-prefix=T_
%toupper
%no-enums
%namespace=Lexer
%no-namespace-for-tokens
%pre-check-argument=LanguageFeatures features
%function-name=classify
%%
__asm
__asm__
__attribute
__attribute__
__alignof__
__const
__const__
__inline
__inline__
__thread
__typeof
__typeof__
__volatile
__volatile__
asm
auto
bool
break
case
catch
char
class
const
const_cast
continue
default
delete
do
double
dynamic_cast
else
enum
explicit
export
extern
false
float
for
friend
goto
if
inline
int
long
register
return
short
signed
sizeof
static
struct
switch
typedef
typeof
union
unsigned
void
volatile
while
%pre-check=features.cxxEnabled
__decltype
auto
bool
catch
class
const_cast
delete
dynamic_cast
explicit
export
false
friend
mutable
namespace
new
@@ -57,30 +101,79 @@ operator
private
protected
public
register
reinterpret_cast
return
short
signed
sizeof
static
static_cast
struct
switch
template
this
throw
true
try
typedef
typeid
typename
typeof
union
unsigned
using
virtual
void
volatile
wchar_t
while
%pre-check=features.cxx11Enabled
alignas
alignof
char16_t
char32_t
constexpr
decltype
noexcept
nullptr
static_assert
thread_local
%pre-check=features.qtKeywordsEnabled
emit
foreach
Q_EMIT
Q_SLOT
SIGNAL
signals
slots
%pre-check=features.qtMocRunEnabled
Q_Q
Q_D
%pre-check=features.qtEnabled
Q_DECLARE_INTERFACE
Q_ENUMS
Q_FLAGS
Q_FOREACH
Q_GADGET
Q_INVOKABLE
Q_INTERFACES
Q_OBJECT
Q_OVERRIDE
Q_PRIVATE_PROPERTY
Q_PRIVATE_SLOT
Q_PROPERTY
Q_SIGNAL
Q_SIGNALS
Q_SLOTS
SLOT
%%
%pre-check-argument=
%function-name=classifyOperator
%%
or
and
not
xor
bitor
compl
or_eq
and_eq
bitand
not_eq
xor_eq
%%
} // namespace CPlusPlus

View File

@@ -293,6 +293,11 @@ enum Kind {
T___ATTRIBUTE = T___ATTRIBUTE__,
T___ALIGNOF__ = T_ALIGNOF,
T_SLOTS = T_Q_SLOTS,
T_FOREACH = T_Q_FOREACH,
T_SIGNALS = T_Q_SIGNALS,
T_Q_OVERRIDE = T_Q_PROPERTY,
};
class CPLUSPLUS_EXPORT Token

View File

@@ -17,23 +17,29 @@
// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// ### TODO: Rewrite me.
#include <algorithm>
#include <cctype>
#include <cstdlib>
#include <fstream>
#include <functional>
#include <iostream>
#include <list>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
class State;
class DottedItem;
typedef std::list<std::string> RuleList;
struct Rule
{
std::string keyword;
std::string preCheck;
};
typedef std::list<Rule> RuleList;
typedef RuleList::iterator RulePtr;
typedef std::list<State> StateList;
typedef StateList::iterator StatePtr;
@@ -60,7 +66,7 @@ public:
bool operator!=(const DottedItem &other) const { return !operator==(other); }
bool terminal() const { return dot == rule->end(); }
bool terminal() const { return dot == rule->keyword.end(); }
DottedItem next() const
{
@@ -101,14 +107,52 @@ public:
return intern(State(n.begin(), n.end()));
}
std::set<char> firsts()
std::vector<char> firsts()
{
std::set<char> s;
std::set<char> charsSet;
for (DottedItemPtr it = first_item(); it != last_item(); ++it) {
if (!it->terminal())
s.insert(*it->dot);
charsSet.insert(*it->dot);
}
return s;
std::vector<char> charsOrderedUpperToBack; // to minimize Keywords.cpp diff
charsOrderedUpperToBack.reserve(charsSet.size());
for (char c : charsSet) {
charsOrderedUpperToBack.push_back(c);
}
std::stable_partition(charsOrderedUpperToBack.begin(),
charsOrderedUpperToBack.end(),
[](char c) {
return !std::isupper(static_cast<unsigned char>(c));
});
return charsOrderedUpperToBack;
}
bool hasPreChecks()
{
for (DottedItemPtr it = first_item(); it != last_item(); ++it) {
if (!it->rule->preCheck.empty()) {
return true;
}
}
return false;
}
std::string commonPreCheck(char ch)
{
std::string result;
for (DottedItemPtr it = first_item(); it != last_item(); ++it) {
if (!it->terminal() && *it->dot == ch) {
if (result.empty()) {
if (it->rule->preCheck.empty()) {
return "";
}
result = (it->rule->preCheck);
} else if (result != it->rule->preCheck) {
return "";
}
}
}
return result;
}
size_t item_count() const { return _items.size(); }
@@ -127,7 +171,7 @@ public:
{
std::vector<DottedItem> items;
for (; first != last; ++first)
items.push_back(DottedItem(first, first->begin()));
items.push_back(DottedItem(first, first->keyword.begin()));
return intern(State(items.begin(), items.end()));
}
@@ -151,6 +195,15 @@ static std::string option_token_prefix = "Token_";
static std::string option_char_type = "char";
static std::string option_unicode_function = "";
static std::string option_preCheck_arg_type;
static std::string option_preCheck_arg_name;
static std::string option_tokens_namespace;
static std::string option_function_name = "classify";
std::stringstream input;
std::stringstream output;
std::string token_id(const std::string &id)
{
std::string token = option_token_prefix;
@@ -167,7 +220,7 @@ std::string token_id(const std::string &id)
bool starts_with(const std::string &line, const std::string &text)
{
if (text.length() < line.length()) {
if (text.length() <= line.length()) {
return std::equal(line.begin(), line.begin() + text.size(), text.begin());
}
return false;
@@ -176,16 +229,27 @@ bool starts_with(const std::string &line, const std::string &text)
void doit(State &state)
{
static int depth{0};
static int preCheckDepth{0};
++depth;
std::string indent(depth * 2, ' ');
std::set<char> firsts = state.firsts();
for (std::set<char>::iterator it = firsts.begin(); it != firsts.end(); ++it) {
std::vector<char> firsts = state.firsts();
for (std::vector<char>::iterator it = firsts.begin(); it != firsts.end(); ++it) {
std::string _else = it == firsts.begin() ? "" : "else ";
std::cout << indent << _else << "if (s[" << (depth - 1) << "]" << option_unicode_function
<< " == '" << *it << "') {" << std::endl;
output << indent << _else << "if (";
if (preCheckDepth == 0) {
std::string commonPreCheck = state.commonPreCheck(*it);
if (!commonPreCheck.empty()) {
output << commonPreCheck << " && ";
preCheckDepth++;
}
} else if (preCheckDepth > 0) {
preCheckDepth++;
}
output << "s[" << (depth - 1) << "]" << option_unicode_function << " == '" << *it << "'";
output << ") {" << std::endl;
State &next_state = state.next(*it);
bool found = false;
@@ -196,58 +260,76 @@ void doit(State &state)
exit(EXIT_FAILURE);
}
found = true;
std::cout << indent << " return " << option_namespace_name << token_id(*item->rule)
<< ";" << std::endl;
output << indent << " return " << option_tokens_namespace
<< token_id(item->rule->keyword) << ";" << std::endl;
}
}
if (!found)
doit(next_state);
std::cout << indent << "}" << std::endl;
if (preCheckDepth > 0)
preCheckDepth--;
output << indent << "}" << std::endl;
}
--depth;
}
void gen_classify_n(State &start_state, int N)
void gen_classify_n(State &start_state, size_t N)
{
std::cout << "static inline int classify" << N << "(const " << option_char_type << " *s) {"
<< std::endl;
output << "static inline int " << option_function_name << N << "(const " << option_char_type
<< " *s";
if (!option_preCheck_arg_type.empty()) {
output << ", " << option_preCheck_arg_type;
if (start_state.hasPreChecks()) {
output << " " << option_preCheck_arg_name;
}
}
output << ")" << std::endl << "{" << std::endl;
doit(start_state);
std::cout << " return " << option_namespace_name << token_id("identifier") << ";" << std::endl
<< "}" << std::endl
<< std::endl;
output << " return " << option_tokens_namespace << token_id("identifier") << ";" << std::endl
<< "}" << std::endl
<< std::endl;
}
void gen_classify(const std::multimap<size_t, std::string> &keywords)
void gen_classify(const std::multimap<size_t, Rule> &keywords)
{
std::cout << "int " << option_namespace_name << "classify(const " << option_char_type
<< " *s, int n) {" << std::endl
<< " switch (n) {" << std::endl;
std::multimap<size_t, std::string>::const_iterator it = keywords.begin();
output << "int " << option_namespace_name << option_function_name << "(const "
<< option_char_type << " *s, int n";
if (!option_preCheck_arg_type.empty()) {
output << ", " << option_preCheck_arg_type << " " << option_preCheck_arg_name;
}
output << ")" << std::endl;
output << "{" << std::endl << " switch (n) {" << std::endl;
std::multimap<size_t, Rule>::const_iterator it = keywords.begin();
while (it != keywords.end()) {
size_t size = it->first;
std::cout << " case " << size << ": return classify" << size << "(s);" << std::endl;
output << " case " << size << ": return " << option_function_name << size << "(s";
if (!option_preCheck_arg_type.empty()) {
output << ", " << option_preCheck_arg_name;
}
output << ");" << std::endl;
do {
++it;
} while (it != keywords.end() && it->first == size);
}
std::cout << " default: return " << option_namespace_name << token_id("identifier") << ";"
<< std::endl
<< " } // switch" << std::endl
<< "}" << std::endl
<< std::endl;
output << " default: return " << option_tokens_namespace << token_id("identifier") << ";"
<< std::endl
<< " } // switch" << std::endl
<< "}" << std::endl
<< std::endl;
}
void gen_enums(const std::multimap<size_t, std::string> &keywords)
void gen_enums(const std::multimap<size_t, Rule> &keywords)
{
std::cout << "enum {" << std::endl;
std::multimap<size_t, std::string>::const_iterator it = keywords.begin();
output << "enum {" << std::endl;
std::multimap<size_t, Rule>::const_iterator it = keywords.begin();
for (; it != keywords.end(); ++it) {
std::cout << " " << token_id(it->second) << "," << std::endl;
output << " " << token_id(it->second.keyword) << "," << std::endl;
}
std::cout << " " << token_id("identifier") << std::endl << "};" << std::endl << std::endl;
output << " " << token_id("identifier") << std::endl << "};" << std::endl << std::endl;
}
inline bool not_whitespace_p(char ch)
@@ -258,6 +340,11 @@ inline bool not_whitespace_p(char ch)
int main(int argc, char *argv[])
{
const std::string ns = "--namespace=";
const std::string inputFileOpt = "--input";
const std::string outputFileOpt = "--output";
std::string inputFilename;
std::string outputFilename;
for (int i = 0; i < argc; ++i) {
const std::string arg(argv[i]);
@@ -266,13 +353,33 @@ int main(int argc, char *argv[])
else if (starts_with(arg, ns)) {
option_namespace_name.assign(arg.begin() + ns.size(), arg.end());
option_namespace_name += "::";
} else if (arg == inputFileOpt && i + 1 < argc) {
inputFilename = argv[i + 1];
++i;
} else if (arg == outputFileOpt && i + 1 < argc) {
outputFilename = argv[i + 1];
++i;
}else if (arg == "--help" || arg == "-h") {
std::cout << "usage: cplusplus-keywordgen [--input <kwgen file>] [--output <cpp file>]"
<< std::endl;
std::cout << "\t If no input or output specified: std::cin/cout will be used"
<< std::endl;
exit(EXIT_SUCCESS);
}
}
std::multimap<size_t, std::string> keywords;
std::string textline;
bool readKeywords = false;
if (inputFilename.empty()) {
std::string textline;
while (getline(std::cin, textline)) {
input << textline << std::endl;
}
} else {
std::ifstream fileInput(inputFilename, std::ios_base::in);
std::string textline;
while (getline(fileInput, textline)) {
input << textline << std::endl;
}
}
const std::string opt_no_enums = "%no-enums";
const std::string opt_toupper = "%toupper";
@@ -281,73 +388,167 @@ int main(int argc, char *argv[])
const std::string opt_char_type = "%char-type=";
const std::string opt_unicode_function = "%unicode-function=";
while (getline(std::cin, textline)) {
// remove trailing spaces
textline.assign(textline.begin(),
const std::string opt_preCheck_arg = "%pre-check-argument=";
const std::string opt_function_name = "%function-name=";
const std::string opt_no_namespace_for_tokens = "%no-namespace-for-tokens";
// this may be only in keywords section
const std::string preCheckOpt = "%pre-check=";
bool useNamespaceForTokens = true;
bool finished = false;
while (!finished) {
finished = true;
bool readKeywords = false;
std::string preCheckValue;
std::multimap<size_t, Rule> keywords;
std::string textline;
while (getline(input, textline)) {
// remove trailing spaces
textline
.assign(textline.begin(),
std::find_if(textline.rbegin(), textline.rend(), not_whitespace_p).base());
if (!readKeywords) {
if (textline.size() >= 2 && textline[0] == '%') {
if (textline[1] == '%') {
readKeywords = true;
} else if (textline == opt_no_enums) {
option_no_enums = true;
} else if (textline == opt_toupper) {
option_toupper = true;
} else if (starts_with(textline, opt_tok_prefix)) {
option_token_prefix.assign(textline.begin() + opt_tok_prefix.size(),
textline.end());
} else if (starts_with(textline, opt_char_type)) {
option_char_type.assign(textline.begin() + opt_char_type.size(), textline.end());
} else if (starts_with(textline, opt_unicode_function)) {
option_unicode_function.assign(textline.begin() + opt_unicode_function.size(),
if (!readKeywords) {
if (textline.size() >= 2 && textline[0] == '%') {
if (textline[1] == '%') {
readKeywords = true;
static bool generatedMessageAdded=false;
if(!generatedMessageAdded){
generatedMessageAdded=true;
output
<< "// === following code is generated with cplusplus-keywordgen tool"
<< std::endl;
for (auto it = inputFilename.rbegin(); it != inputFilename.rend(); ++it) {
if (*it == '\\' || *it == '/') {
output
<< "// === from source file: "
<< inputFilename.substr(std::distance(it, inputFilename.rend()))
<< std::endl;
break;
}
}
output << std::endl;
}
output << "// === keywords begin" << std::endl;
output << std::endl;
} else if (textline == opt_no_enums) {
option_no_enums = true;
} else if (textline == opt_toupper) {
option_toupper = true;
} else if (starts_with(textline, opt_tok_prefix)) {
option_token_prefix.assign(textline.begin() + opt_tok_prefix.size(),
textline.end());
} else if (starts_with(textline, opt_ns)) {
option_namespace_name.assign(textline.begin() + opt_ns.size(), textline.end());
option_namespace_name += "::";
} else if (starts_with(textline, opt_char_type)) {
option_char_type.assign(textline.begin() + opt_char_type.size(),
textline.end());
} else if (starts_with(textline, opt_unicode_function)) {
option_unicode_function.assign(textline.begin()
+ opt_unicode_function.size(),
textline.end());
} else if (starts_with(textline, opt_ns)) {
option_namespace_name.assign(textline.begin() + opt_ns.size(),
textline.end());
option_namespace_name += "::";
if (useNamespaceForTokens) {
option_tokens_namespace = option_namespace_name;
}
} else if (starts_with(textline, opt_preCheck_arg)) {
std::string::size_type spacePos = textline.find(' ',
opt_preCheck_arg.size());
if (spacePos == std::string::npos) {
option_preCheck_arg_type.clear();
option_preCheck_arg_name.clear();
} else {
option_preCheck_arg_type
= textline.substr(opt_preCheck_arg.size(),
spacePos - opt_preCheck_arg.size());
option_preCheck_arg_name = textline.substr(spacePos + 1);
}
} else if (starts_with(textline, opt_function_name)) {
option_function_name.assign(textline.begin() + opt_function_name.size(),
textline.end());
} else if (textline == opt_no_namespace_for_tokens) {
useNamespaceForTokens = false;
option_tokens_namespace.clear();
}
continue;
}
output << textline << std::endl;
} else {
if (textline.empty())
continue;
if (textline == "%%") {
finished = false;
break;
}
continue;
}
std::cout << textline << std::endl;
} else {
if (textline.empty())
continue;
std::string::iterator start = textline.begin();
while (start != textline.end() && std::isspace(*start))
++start;
std::string::iterator stop = start;
while (stop != textline.end() && (std::isalnum(*stop) || *stop == '_'))
++stop;
if (start != stop) {
std::string keyword(start, stop);
if (keyword == "identifier") {
std::cerr << "*** Error. `identifier' is reserved" << std::endl;
exit(EXIT_FAILURE);
if (starts_with(textline, preCheckOpt)) {
preCheckValue = textline.substr(preCheckOpt.size());
}
keywords.insert(std::make_pair(keyword.size(), keyword));
std::string::iterator start = textline.begin();
while (start != textline.end() && std::isspace(*start))
++start;
std::string::iterator stop = start;
while (stop != textline.end() && (std::isalnum(*stop) || *stop == '_'))
++stop;
if (start != stop) {
Rule rule;
rule.keyword.assign(start, stop);
if (rule.keyword == "identifier") {
std::cerr << "*** Error. `identifier' is reserved" << std::endl;
exit(EXIT_FAILURE);
}
rule.preCheck = preCheckValue;
keywords.insert(std::make_pair(rule.keyword.size(), rule));
}
}
}
if (readKeywords) {
if (!option_no_enums)
gen_enums(keywords);
std::multimap<size_t, Rule>::iterator it = keywords.begin();
while (it != keywords.end()) {
size_t size = it->first;
RuleList rules;
do {
rules.push_back(it->second);
++it;
} while (it != keywords.end() && it->first == size);
gen_classify_n(State::start(rules.begin(), rules.end()), size);
State::reset();
}
gen_classify(keywords);
output << "// === keywords end" << std::endl;
}
}
if (!option_no_enums)
gen_enums(keywords);
std::multimap<size_t, std::string>::iterator it = keywords.begin();
while (it != keywords.end()) {
size_t size = it->first;
RuleList rules;
do {
rules.push_back(it->second);
++it;
} while (it != keywords.end() && it->first == size);
gen_classify_n(State::start(rules.begin(), rules.end()), size);
State::reset();
if (outputFilename.empty()) {
std::string textline;
while (getline(output, textline)) {
std::cout << textline << std::endl;
}
} else {
std::ofstream outFile(outputFilename, std::ios_base::out);
std::string textline;
while (getline(output, textline)) {
outFile << textline << std::endl;
}
std::cout << "Generated: " << outputFilename << std::endl;
}
gen_classify(keywords);
}