2008-12-02 12:01:29 +01:00
|
|
|
// Copyright (c) 2008 Roberto Raggi <roberto.raggi@gmail.com>
|
|
|
|
|
//
|
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
|
//
|
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
|
//
|
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
// THE SOFTWARE.
|
|
|
|
|
|
|
|
|
|
#include "Lexer.h"
|
|
|
|
|
#include "Control.h"
|
|
|
|
|
#include "TranslationUnit.h"
|
2009-08-04 12:18:25 +02:00
|
|
|
#include "Literals.h"
|
2013-05-13 10:20:00 +02:00
|
|
|
|
|
|
|
|
#include "cppassert.h"
|
|
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
#include <cctype>
|
|
|
|
|
|
2009-10-20 11:21:25 +02:00
|
|
|
using namespace CPlusPlus;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2014-02-25 13:44:11 -03:00
|
|
|
/*!
|
|
|
|
|
\class Lexer
|
|
|
|
|
\brief The Lexer generates tokens from an UTF-8 encoded source text.
|
|
|
|
|
|
|
|
|
|
\sa Token
|
|
|
|
|
*/
|
|
|
|
|
|
2014-05-09 10:04:13 -04:00
|
|
|
/*!
|
|
|
|
|
\fn static void Lexer::yyinp_utf8(const char *¤tSourceChar, unsigned char &yychar, unsigned &utf16charCounter)
|
|
|
|
|
|
|
|
|
|
Process a single unicode code point in an UTF-8 encoded source.
|
|
|
|
|
|
|
|
|
|
\a currentSourceChar points to the UTF-8 encoded source.
|
|
|
|
|
\a yychar must be the byte pointed to by \a currentSourceChar.
|
|
|
|
|
|
|
|
|
|
Points \a currentSourceChar to the byte of the next code point
|
|
|
|
|
and modifies \a yychar to the value pointed by the updated
|
|
|
|
|
\a currentSourceChar. \a utf16charCounter will be incremented by
|
|
|
|
|
the number of UTF-16 code units that were needed for that code
|
|
|
|
|
point.
|
|
|
|
|
*/
|
|
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
Lexer::Lexer(TranslationUnit *unit)
|
|
|
|
|
: _translationUnit(unit),
|
2013-04-16 13:15:47 +02:00
|
|
|
_control(unit->control()),
|
2014-01-23 22:16:43 +02:00
|
|
|
_state(0),
|
2008-12-02 12:01:29 +01:00
|
|
|
_flags(0),
|
|
|
|
|
_currentLine(1)
|
|
|
|
|
{
|
2009-07-27 21:47:03 +02:00
|
|
|
f._scanKeywords = true;
|
2008-12-02 12:01:29 +01:00
|
|
|
setSource(_translationUnit->firstSourceChar(),
|
|
|
|
|
_translationUnit->lastSourceChar());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Lexer::Lexer(const char *firstChar, const char *lastChar)
|
2019-07-31 17:21:41 +02:00
|
|
|
: _translationUnit(nullptr),
|
|
|
|
|
_control(nullptr),
|
2014-01-23 22:16:43 +02:00
|
|
|
_state(0),
|
2008-12-02 12:01:29 +01:00
|
|
|
_flags(0),
|
|
|
|
|
_currentLine(1)
|
|
|
|
|
{
|
2009-07-27 21:47:03 +02:00
|
|
|
f._scanKeywords = true;
|
2008-12-02 12:01:29 +01:00
|
|
|
setSource(firstChar, lastChar);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Lexer::~Lexer()
|
|
|
|
|
{ }
|
|
|
|
|
|
|
|
|
|
void Lexer::setSource(const char *firstChar, const char *lastChar)
|
|
|
|
|
{
|
|
|
|
|
_firstChar = firstChar;
|
|
|
|
|
_lastChar = lastChar;
|
|
|
|
|
_currentChar = _firstChar - 1;
|
2015-08-14 16:11:12 -07:00
|
|
|
_currentCharUtf16 = ~0;
|
2008-12-02 12:01:29 +01:00
|
|
|
_tokenStart = _currentChar;
|
|
|
|
|
_yychar = '\n';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Lexer::setStartWithNewline(bool enabled)
|
|
|
|
|
{
|
|
|
|
|
if (enabled)
|
|
|
|
|
_yychar = '\n';
|
|
|
|
|
else
|
|
|
|
|
_yychar = ' ';
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int Lexer::state() const
|
|
|
|
|
{ return _state; }
|
|
|
|
|
|
|
|
|
|
void Lexer::setState(int state)
|
|
|
|
|
{ _state = state; }
|
|
|
|
|
|
|
|
|
|
bool Lexer::scanCommentTokens() const
|
2009-07-27 21:47:03 +02:00
|
|
|
{ return f._scanCommentTokens; }
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
void Lexer::setScanCommentTokens(bool onoff)
|
2009-07-27 21:47:03 +02:00
|
|
|
{ f._scanCommentTokens = onoff; }
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
bool Lexer::scanKeywords() const
|
2009-07-27 21:47:03 +02:00
|
|
|
{ return f._scanKeywords; }
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
void Lexer::setScanKeywords(bool onoff)
|
2009-07-27 21:47:03 +02:00
|
|
|
{ f._scanKeywords = onoff; }
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
void Lexer::setScanAngleStringLiteralTokens(bool onoff)
|
2009-07-27 21:47:03 +02:00
|
|
|
{ f._scanAngleStringLiteralTokens = onoff; }
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
void Lexer::pushLineStartOffset()
|
|
|
|
|
{
|
|
|
|
|
++_currentLine;
|
|
|
|
|
|
|
|
|
|
if (_translationUnit)
|
2014-05-06 14:48:24 -04:00
|
|
|
_translationUnit->pushLineOffset(_currentCharUtf16);
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Lexer::scan(Token *tok)
|
|
|
|
|
{
|
|
|
|
|
tok->reset();
|
|
|
|
|
scan_helper(tok);
|
2013-12-13 18:41:15 +01:00
|
|
|
tok->f.bytes = _currentChar - _tokenStart;
|
2014-02-25 13:44:11 -03:00
|
|
|
tok->f.utf16chars = _currentCharUtf16 - _tokenStartUtf16;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
|
2015-09-21 11:46:47 +02:00
|
|
|
static bool isRawStringLiteral(unsigned char kind)
|
|
|
|
|
{
|
|
|
|
|
return kind >= T_FIRST_RAW_STRING_LITERAL
|
|
|
|
|
&& kind <= T_LAST_RAW_STRING_LITERAL;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-21 16:13:39 +02:00
|
|
|
static bool isMultiLineToken(unsigned char kind)
|
|
|
|
|
{
|
|
|
|
|
return kind == T_EOF_SYMBOL
|
|
|
|
|
|| kind == T_COMMENT
|
2015-09-21 11:46:47 +02:00
|
|
|
|| kind == T_DOXY_COMMENT
|
|
|
|
|
|| isRawStringLiteral(kind);
|
2015-09-21 16:13:39 +02:00
|
|
|
}
|
|
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
void Lexer::scan_helper(Token *tok)
|
|
|
|
|
{
|
2014-08-28 14:56:04 +02:00
|
|
|
again:
|
2008-12-02 12:01:29 +01:00
|
|
|
while (_yychar && std::isspace(_yychar)) {
|
2010-02-10 11:04:31 +01:00
|
|
|
if (_yychar == '\n') {
|
2014-01-23 22:16:43 +02:00
|
|
|
tok->f.joined = s._newlineExpected;
|
|
|
|
|
tok->f.newline = !s._newlineExpected;
|
|
|
|
|
|
2015-09-21 16:13:39 +02:00
|
|
|
if (s._newlineExpected)
|
2014-01-23 22:16:43 +02:00
|
|
|
s._newlineExpected = false;
|
2015-09-21 16:13:39 +02:00
|
|
|
else if (!isMultiLineToken(s._tokenKind))
|
|
|
|
|
_state = 0;
|
2010-02-10 11:04:31 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.whitespace = true;
|
2010-02-10 11:04:31 +01:00
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
yyinp();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (! _translationUnit)
|
|
|
|
|
tok->lineno = _currentLine;
|
|
|
|
|
|
2012-02-03 11:46:13 +01:00
|
|
|
_tokenStart = _currentChar;
|
2013-12-13 18:41:15 +01:00
|
|
|
tok->byteOffset = _currentChar - _firstChar;
|
2012-02-03 11:46:13 +01:00
|
|
|
|
2014-02-25 13:44:11 -03:00
|
|
|
_tokenStartUtf16 = _currentCharUtf16;
|
|
|
|
|
tok->utf16charOffset = _currentCharUtf16;
|
|
|
|
|
|
2014-01-23 22:16:43 +02:00
|
|
|
if (_yychar) {
|
|
|
|
|
s._newlineExpected = false;
|
|
|
|
|
} else if (s._tokenKind) {
|
2014-01-19 22:24:14 +02:00
|
|
|
tok->f.kind = T_EOF_SYMBOL;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-21 16:13:39 +02:00
|
|
|
if (s._tokenKind == T_EOF_SYMBOL) {
|
|
|
|
|
// skip
|
|
|
|
|
} else if (s._tokenKind == T_COMMENT || s._tokenKind == T_DOXY_COMMENT) {
|
2014-02-04 23:33:08 +02:00
|
|
|
const int originalKind = s._tokenKind;
|
2009-02-20 16:34:48 +01:00
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
while (_yychar) {
|
|
|
|
|
if (_yychar != '*')
|
|
|
|
|
yyinp();
|
|
|
|
|
else {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '/') {
|
|
|
|
|
yyinp();
|
2014-01-23 22:16:43 +02:00
|
|
|
_state = 0;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-07-27 21:47:03 +02:00
|
|
|
if (! f._scanCommentTokens)
|
2014-08-28 14:56:04 +02:00
|
|
|
goto again;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2014-02-04 23:33:08 +02:00
|
|
|
tok->f.kind = originalKind;
|
2015-09-21 16:13:39 +02:00
|
|
|
return;
|
|
|
|
|
} else if (s._tokenKind == T_CPP_COMMENT || s._tokenKind == T_CPP_DOXY_COMMENT) {
|
2014-02-04 23:33:08 +02:00
|
|
|
const Kind originalKind = (Kind)s._tokenKind;
|
2014-01-23 22:16:43 +02:00
|
|
|
tok->f.joined = true;
|
2014-02-04 23:33:08 +02:00
|
|
|
if (f._scanCommentTokens)
|
|
|
|
|
tok->f.kind = originalKind;
|
2014-01-23 22:16:43 +02:00
|
|
|
_state = 0;
|
2014-02-04 23:33:08 +02:00
|
|
|
scanCppComment(originalKind);
|
2014-01-19 22:24:14 +02:00
|
|
|
return;
|
2017-09-19 14:36:21 +02:00
|
|
|
} else if (!control() && isRawStringLiteral(s._tokenKind)) {
|
2015-09-21 11:46:47 +02:00
|
|
|
tok->f.kind = s._tokenKind;
|
|
|
|
|
if (scanUntilRawStringLiteralEndSimple())
|
|
|
|
|
_state = 0;
|
|
|
|
|
return;
|
|
|
|
|
} else { // non-raw strings
|
2014-01-23 22:16:43 +02:00
|
|
|
tok->f.joined = true;
|
|
|
|
|
tok->f.kind = s._tokenKind;
|
|
|
|
|
_state = 0;
|
2014-01-19 22:24:14 +02:00
|
|
|
scanUntilQuote(tok, '"');
|
|
|
|
|
return;
|
2014-01-18 19:51:57 +02:00
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
if (! _yychar) {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_EOF_SYMBOL;
|
2008-12-02 12:01:29 +01:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2008-12-03 09:31:29 +01:00
|
|
|
unsigned char ch = _yychar;
|
2008-12-02 12:01:29 +01:00
|
|
|
yyinp();
|
|
|
|
|
|
|
|
|
|
switch (ch) {
|
|
|
|
|
case '\\':
|
2014-01-23 22:16:43 +02:00
|
|
|
s._newlineExpected = true;
|
2014-08-28 14:56:04 +02:00
|
|
|
goto again;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
case '"':
|
|
|
|
|
scanStringLiteral(tok);
|
|
|
|
|
break;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
case '\'':
|
|
|
|
|
scanCharLiteral(tok);
|
|
|
|
|
break;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
|
|
|
|
case '{':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LBRACE;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '}':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_RBRACE;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '[':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LBRACKET;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ']':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_RBRACKET;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '#':
|
|
|
|
|
if (_yychar == '#') {
|
|
|
|
|
yyinp();
|
2015-03-12 23:15:38 +01:00
|
|
|
tok->f.kind = T_POUND_POUND;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_POUND;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '(':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LPAREN;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ')':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_RPAREN;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ';':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_SEMICOLON;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ':':
|
|
|
|
|
if (_yychar == ':') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_COLON_COLON;
|
2013-10-05 20:49:54 +02:00
|
|
|
} else if (_yychar == '>') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_RBRACKET;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_COLON;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '.':
|
|
|
|
|
if (_yychar == '*') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_DOT_STAR;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '.') {
|
|
|
|
|
yyinp();
|
2013-05-13 10:20:00 +02:00
|
|
|
// ### CPP_CHECK(_yychar);
|
2008-12-02 12:01:29 +01:00
|
|
|
if (_yychar == '.') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_DOT_DOT_DOT;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_ERROR;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
} else if (std::isdigit(_yychar)) {
|
2014-02-07 15:24:30 +01:00
|
|
|
if (f._ppMode) {
|
|
|
|
|
scanPreprocessorNumber(tok, true);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
const char *yytext = _currentChar - 2;
|
2014-02-07 15:24:30 +01:00
|
|
|
yyinp();
|
|
|
|
|
scanDigitSequence(); // this is optional: we already skipped over the first digit
|
|
|
|
|
scanExponentPart();
|
2014-11-02 14:42:23 +01:00
|
|
|
if (!scanOptionalFloatingSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
if (std::isalnum(_yychar) || _yychar == '_') {
|
|
|
|
|
do {
|
2008-12-02 12:01:29 +01:00
|
|
|
yyinp();
|
2014-02-07 15:24:30 +01:00
|
|
|
} while (std::isalnum(_yychar) || _yychar == '_');
|
|
|
|
|
tok->f.kind = T_ERROR;
|
|
|
|
|
} else {
|
|
|
|
|
int yylen = _currentChar - yytext;
|
|
|
|
|
tok->f.kind = T_NUMERIC_LITERAL;
|
|
|
|
|
if (control())
|
|
|
|
|
tok->number = control()->numericLiteral(yytext, yylen);
|
|
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_DOT;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '?':
|
2014-10-24 14:55:43 +02:00
|
|
|
if (_yychar == '?' && f._ppMode) {
|
2013-11-26 15:23:28 +01:00
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '(') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_LBRACKET;
|
2014-10-24 14:55:43 +02:00
|
|
|
tok->f.trigraph = true;
|
2013-11-26 15:23:28 +01:00
|
|
|
} else if (_yychar == ')') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_RBRACKET;
|
2014-10-24 14:55:43 +02:00
|
|
|
tok->f.trigraph = true;
|
2013-11-26 15:23:28 +01:00
|
|
|
} else if (_yychar == '<') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_LBRACE;
|
2014-10-24 14:55:43 +02:00
|
|
|
tok->f.trigraph = true;
|
2013-11-26 15:23:28 +01:00
|
|
|
} else if (_yychar == '>') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_RBRACE;
|
2014-10-24 14:55:43 +02:00
|
|
|
tok->f.trigraph = true;
|
|
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.trigraph = true;
|
|
|
|
|
if (_yychar == '?' && *(_currentChar + 1) == '?' && *(_currentChar + 2) == '=') {
|
|
|
|
|
yyinp();
|
|
|
|
|
yyinp();
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_POUND_POUND;
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_POUND;
|
|
|
|
|
}
|
|
|
|
|
} else if (_yychar == '\'') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_CARET_EQUAL;
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_CARET;
|
|
|
|
|
}
|
|
|
|
|
tok->f.trigraph = true;
|
|
|
|
|
} else if (_yychar == '!') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_PIPE_EQUAL;
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_PIPE;
|
|
|
|
|
}
|
|
|
|
|
tok->f.trigraph = true;
|
|
|
|
|
} else if (_yychar == '-') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_TILDE_EQUAL;
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_TILDE;
|
|
|
|
|
}
|
|
|
|
|
tok->f.trigraph = true;
|
2013-11-26 15:23:28 +01:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_QUESTION;
|
|
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '+':
|
|
|
|
|
if (_yychar == '+') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PLUS_PLUS;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PLUS_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PLUS;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '-':
|
|
|
|
|
if (_yychar == '-') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_MINUS_MINUS;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_MINUS_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '>') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '*') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_ARROW_STAR;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_ARROW;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_MINUS;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '*':
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_STAR_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_STAR;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '/':
|
|
|
|
|
if (_yychar == '/') {
|
2009-02-20 11:52:27 +01:00
|
|
|
yyinp();
|
|
|
|
|
|
2014-01-19 22:24:14 +02:00
|
|
|
Kind commentType = T_CPP_COMMENT;
|
2009-02-20 11:52:27 +01:00
|
|
|
|
|
|
|
|
if (_yychar == '/' || _yychar == '!') {
|
|
|
|
|
yyinp();
|
2014-01-19 22:24:14 +02:00
|
|
|
commentType = T_CPP_DOXY_COMMENT;
|
2009-02-20 11:52:27 +01:00
|
|
|
}
|
|
|
|
|
|
2014-01-19 22:24:14 +02:00
|
|
|
scanCppComment(commentType);
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2009-07-27 21:47:03 +02:00
|
|
|
if (! f._scanCommentTokens)
|
2014-08-28 14:56:04 +02:00
|
|
|
goto again;
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2014-01-19 22:24:14 +02:00
|
|
|
tok->f.kind = commentType;
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '*') {
|
|
|
|
|
yyinp();
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2014-01-18 19:51:57 +02:00
|
|
|
Kind commentKind = T_COMMENT;
|
2009-03-02 10:09:07 +01:00
|
|
|
|
|
|
|
|
if (_yychar == '*' || _yychar == '!') {
|
2009-03-02 19:00:57 +01:00
|
|
|
const char ch = _yychar;
|
|
|
|
|
|
2009-03-02 10:09:07 +01:00
|
|
|
yyinp();
|
|
|
|
|
|
2009-03-02 19:00:57 +01:00
|
|
|
if (ch == '*' && _yychar == '/')
|
2014-08-28 14:56:04 +02:00
|
|
|
goto done;
|
2009-03-02 19:00:57 +01:00
|
|
|
|
2009-03-20 14:16:47 +01:00
|
|
|
if (_yychar == '<')
|
|
|
|
|
yyinp();
|
|
|
|
|
|
2009-03-02 10:09:07 +01:00
|
|
|
if (! _yychar || std::isspace(_yychar))
|
2014-01-18 19:51:57 +02:00
|
|
|
commentKind = T_DOXY_COMMENT;
|
2009-03-02 10:09:07 +01:00
|
|
|
}
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
while (_yychar) {
|
|
|
|
|
if (_yychar != '*') {
|
|
|
|
|
yyinp();
|
|
|
|
|
} else {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '/')
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-08-28 14:56:04 +02:00
|
|
|
done:
|
2008-12-02 12:01:29 +01:00
|
|
|
if (_yychar)
|
|
|
|
|
yyinp();
|
|
|
|
|
else
|
2014-01-23 22:16:43 +02:00
|
|
|
s._tokenKind = commentKind;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2009-07-27 21:47:03 +02:00
|
|
|
if (! f._scanCommentTokens)
|
2014-08-28 14:56:04 +02:00
|
|
|
goto again;
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2014-01-18 19:51:57 +02:00
|
|
|
tok->f.kind = commentKind;
|
2009-02-20 11:52:27 +01:00
|
|
|
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_SLASH_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_SLASH;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '%':
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PERCENT_EQUAL;
|
2013-10-05 20:49:54 +02:00
|
|
|
} else if (_yychar == '>') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_RBRACE;
|
|
|
|
|
} else if (_yychar == ':') {
|
|
|
|
|
yyinp();
|
2015-03-12 23:15:38 +01:00
|
|
|
if (_yychar == '%' && *(_currentChar + 1) == ':') {
|
|
|
|
|
yyinp();
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_POUND_POUND;
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_POUND;
|
|
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PERCENT;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '^':
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_CARET_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_CARET;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '&':
|
|
|
|
|
if (_yychar == '&') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_AMPER_AMPER;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_AMPER_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_AMPER;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '|':
|
|
|
|
|
if (_yychar == '|') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PIPE_PIPE;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PIPE_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_PIPE;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '~':
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_TILDE_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_TILDE;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '!':
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_EXCLAIM_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_EXCLAIM;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '=':
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_EQUAL_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '<':
|
2009-07-27 21:47:03 +02:00
|
|
|
if (f._scanAngleStringLiteralTokens) {
|
2008-12-02 12:01:29 +01:00
|
|
|
const char *yytext = _currentChar;
|
|
|
|
|
while (_yychar && _yychar != '>')
|
|
|
|
|
yyinp();
|
|
|
|
|
int yylen = _currentChar - yytext;
|
2013-05-13 10:20:00 +02:00
|
|
|
// ### CPP_CHECK(_yychar == '>');
|
2008-12-02 12:01:29 +01:00
|
|
|
if (_yychar == '>')
|
|
|
|
|
yyinp();
|
|
|
|
|
if (control())
|
2010-08-11 14:24:28 +02:00
|
|
|
tok->string = control()->stringLiteral(yytext, yylen);
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_ANGLE_STRING_LITERAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '<') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LESS_LESS_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LESS_LESS;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LESS_EQUAL;
|
2013-10-05 20:49:54 +02:00
|
|
|
} else if (_yychar == ':') {
|
2015-03-14 12:53:43 +01:00
|
|
|
if (*(_currentChar+1) != ':' || *(_currentChar+2) == ':' || *(_currentChar+2) == '>') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_LBRACKET;
|
|
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_LESS;
|
|
|
|
|
}
|
2013-10-05 20:49:54 +02:00
|
|
|
} else if (_yychar == '%') {
|
|
|
|
|
yyinp();
|
|
|
|
|
tok->f.kind = T_LBRACE;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_LESS;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '>':
|
|
|
|
|
if (_yychar == '>') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_GREATER_GREATER_EQUAL;
|
2015-09-22 23:21:08 +02:00
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_GREATER_GREATER;
|
|
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
} else if (_yychar == '=') {
|
|
|
|
|
yyinp();
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_GREATER_EQUAL;
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_GREATER;
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ',':
|
2009-07-27 21:47:03 +02:00
|
|
|
tok->f.kind = T_COMMA;
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
|
2010-02-14 14:41:51 +01:00
|
|
|
default: {
|
2013-10-06 02:41:22 +02:00
|
|
|
if (_languageFeatures.objCEnabled) {
|
2010-02-15 12:23:48 +01:00
|
|
|
if (ch == '@' && _yychar >= 'a' && _yychar <= 'z') {
|
|
|
|
|
const char *yytext = _currentChar;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (! (isalnum(_yychar) || _yychar == '_' || _yychar == '$'))
|
|
|
|
|
break;
|
|
|
|
|
} while (_yychar);
|
|
|
|
|
|
|
|
|
|
const int yylen = _currentChar - yytext;
|
|
|
|
|
tok->f.kind = classifyObjCAtKeyword(yytext, yylen);
|
|
|
|
|
break;
|
|
|
|
|
} else if (ch == '@' && _yychar == '"') {
|
|
|
|
|
yyinp();
|
2012-06-06 13:41:22 +02:00
|
|
|
scanStringLiteral(tok, '"');
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-02-15 12:23:48 +01:00
|
|
|
|
2012-08-16 21:17:41 +02:00
|
|
|
if (ch == 'L' || ch == 'u' || ch == 'U' || ch == 'R') {
|
2012-06-06 13:41:22 +02:00
|
|
|
// Either a literal or still an identifier.
|
|
|
|
|
if (_yychar == '"') {
|
|
|
|
|
yyinp();
|
2012-08-16 21:17:41 +02:00
|
|
|
if (ch == 'R')
|
|
|
|
|
scanRawStringLiteral(tok);
|
|
|
|
|
else
|
|
|
|
|
scanStringLiteral(tok, ch);
|
2012-06-06 13:41:22 +02:00
|
|
|
} else if (_yychar == '\'') {
|
|
|
|
|
yyinp();
|
|
|
|
|
scanCharLiteral(tok, ch);
|
2012-08-16 21:17:41 +02:00
|
|
|
} else if (ch != 'R' && _yychar == 'R') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '"') {
|
|
|
|
|
yyinp();
|
|
|
|
|
scanRawStringLiteral(tok, ch);
|
|
|
|
|
} else {
|
|
|
|
|
scanIdentifier(tok, 1);
|
|
|
|
|
}
|
2012-08-16 09:33:45 +02:00
|
|
|
} else if (ch == 'u' && _yychar == '8') {
|
2012-08-16 19:18:20 +02:00
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '"') {
|
2012-08-16 09:33:45 +02:00
|
|
|
yyinp();
|
|
|
|
|
scanStringLiteral(tok, '8');
|
2012-08-16 19:18:20 +02:00
|
|
|
} else if (_yychar == '\'') {
|
2012-08-16 09:33:45 +02:00
|
|
|
yyinp();
|
|
|
|
|
scanCharLiteral(tok, '8');
|
2012-08-16 21:17:41 +02:00
|
|
|
} else if (_yychar == 'R') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '"') {
|
|
|
|
|
yyinp();
|
|
|
|
|
scanRawStringLiteral(tok, '8');
|
|
|
|
|
} else {
|
|
|
|
|
scanIdentifier(tok, 2);
|
|
|
|
|
}
|
2012-06-06 13:41:22 +02:00
|
|
|
} else {
|
2012-08-16 19:18:20 +02:00
|
|
|
scanIdentifier(tok, 1);
|
2010-02-15 12:23:48 +01:00
|
|
|
}
|
2012-08-16 09:33:45 +02:00
|
|
|
} else {
|
|
|
|
|
scanIdentifier(tok);
|
2010-02-15 12:23:48 +01:00
|
|
|
}
|
2014-02-25 13:44:11 -03:00
|
|
|
} else if (std::isalpha(ch) || ch == '_' || ch == '$' || isByteOfMultiByteCodePoint(ch)) {
|
|
|
|
|
scanIdentifier(tok, _currentChar - _tokenStart - 1);
|
2012-06-06 13:41:22 +02:00
|
|
|
} else if (std::isdigit(ch)) {
|
2014-02-07 15:24:30 +01:00
|
|
|
if (f._ppMode)
|
|
|
|
|
scanPreprocessorNumber(tok, false);
|
|
|
|
|
else
|
|
|
|
|
scanNumericLiteral(tok);
|
2012-06-06 13:41:22 +02:00
|
|
|
} else {
|
|
|
|
|
tok->f.kind = T_ERROR;
|
2010-02-15 12:23:48 +01:00
|
|
|
}
|
2012-06-06 13:41:22 +02:00
|
|
|
break;
|
|
|
|
|
} // default
|
2010-02-15 12:23:48 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
} // switch
|
|
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
void Lexer::scanStringLiteral(Token *tok, unsigned char hint)
|
|
|
|
|
{
|
|
|
|
|
if (hint == 'L')
|
|
|
|
|
tok->f.kind = T_WIDE_STRING_LITERAL;
|
|
|
|
|
else if (hint == 'U')
|
|
|
|
|
tok->f.kind = T_UTF32_STRING_LITERAL;
|
|
|
|
|
else if (hint == 'u')
|
|
|
|
|
tok->f.kind = T_UTF16_STRING_LITERAL;
|
|
|
|
|
else if (hint == '8')
|
|
|
|
|
tok->f.kind = T_UTF8_STRING_LITERAL;
|
|
|
|
|
else if (hint == '@')
|
|
|
|
|
tok->f.kind = T_AT_STRING_LITERAL;
|
|
|
|
|
else
|
|
|
|
|
tok->f.kind = T_STRING_LITERAL;
|
2014-01-19 22:24:14 +02:00
|
|
|
|
|
|
|
|
scanUntilQuote(tok, '"');
|
2014-11-02 14:42:23 +01:00
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2012-06-06 13:41:22 +02:00
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-08-16 21:17:41 +02:00
|
|
|
void Lexer::scanRawStringLiteral(Token *tok, unsigned char hint)
|
|
|
|
|
{
|
|
|
|
|
const char *yytext = _currentChar;
|
|
|
|
|
|
|
|
|
|
int delimLength = -1;
|
2019-07-31 17:21:41 +02:00
|
|
|
const char *closingDelimCandidate = nullptr;
|
2017-05-18 12:46:19 +02:00
|
|
|
bool closed = false;
|
2012-08-16 21:17:41 +02:00
|
|
|
while (_yychar) {
|
|
|
|
|
if (_yychar == '(' && delimLength == -1) {
|
|
|
|
|
delimLength = _currentChar - yytext;
|
|
|
|
|
yyinp();
|
|
|
|
|
} else if (_yychar == ')') {
|
|
|
|
|
yyinp();
|
2017-09-19 14:36:21 +02:00
|
|
|
if (delimLength == -1) {
|
|
|
|
|
tok->f.kind = T_ERROR;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2012-08-16 21:17:41 +02:00
|
|
|
closingDelimCandidate = _currentChar;
|
|
|
|
|
} else {
|
|
|
|
|
if (delimLength == -1) {
|
2017-09-19 14:36:21 +02:00
|
|
|
if (_yychar == '\\' || std::isspace(_yychar)) {
|
|
|
|
|
tok->f.kind = T_ERROR;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2012-08-16 21:17:41 +02:00
|
|
|
yyinp();
|
|
|
|
|
} else {
|
|
|
|
|
if (!closingDelimCandidate) {
|
|
|
|
|
yyinp();
|
|
|
|
|
} else {
|
|
|
|
|
if (_yychar == '"') {
|
|
|
|
|
if (delimLength == _currentChar - closingDelimCandidate) {
|
|
|
|
|
// Got a matching closing delimiter.
|
2017-05-18 12:46:19 +02:00
|
|
|
closed = true;
|
2012-08-16 21:17:41 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Make sure this continues to be a valid candidate.
|
|
|
|
|
if (_yychar != *(yytext + (_currentChar - closingDelimCandidate)))
|
2019-07-31 17:21:41 +02:00
|
|
|
closingDelimCandidate = nullptr;
|
2012-08-16 21:17:41 +02:00
|
|
|
|
|
|
|
|
yyinp();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int yylen = _currentChar - yytext;
|
|
|
|
|
|
|
|
|
|
if (_yychar == '"')
|
|
|
|
|
yyinp();
|
|
|
|
|
|
|
|
|
|
if (control())
|
|
|
|
|
tok->string = control()->stringLiteral(yytext, yylen);
|
|
|
|
|
|
|
|
|
|
if (hint == 'L')
|
|
|
|
|
tok->f.kind = T_RAW_WIDE_STRING_LITERAL;
|
|
|
|
|
else if (hint == 'U')
|
|
|
|
|
tok->f.kind = T_RAW_UTF32_STRING_LITERAL;
|
|
|
|
|
else if (hint == 'u')
|
|
|
|
|
tok->f.kind = T_RAW_UTF16_STRING_LITERAL;
|
|
|
|
|
else if (hint == '8')
|
|
|
|
|
tok->f.kind = T_RAW_UTF8_STRING_LITERAL;
|
|
|
|
|
else
|
|
|
|
|
tok->f.kind = T_RAW_STRING_LITERAL;
|
2015-09-21 11:46:47 +02:00
|
|
|
|
2017-09-19 14:36:21 +02:00
|
|
|
if (!control() && !closed)
|
2015-09-21 11:46:47 +02:00
|
|
|
s._tokenKind = tok->f.kind;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// In the highlighting case we don't have any further information
|
|
|
|
|
// like the delimiter or its length, so just match for: ...)..."
|
|
|
|
|
bool Lexer::scanUntilRawStringLiteralEndSimple()
|
|
|
|
|
{
|
|
|
|
|
bool closingParenthesisPassed = false;
|
|
|
|
|
|
|
|
|
|
while (_yychar) {
|
|
|
|
|
if (_yychar == ')') {
|
|
|
|
|
yyinp();
|
|
|
|
|
closingParenthesisPassed = true;
|
|
|
|
|
} else {
|
|
|
|
|
if (closingParenthesisPassed && _yychar == '"') {
|
|
|
|
|
yyinp();
|
|
|
|
|
return true;
|
|
|
|
|
} else {
|
|
|
|
|
yyinp();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
2012-08-16 21:17:41 +02:00
|
|
|
}
|
|
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
void Lexer::scanCharLiteral(Token *tok, unsigned char hint)
|
|
|
|
|
{
|
|
|
|
|
if (hint == 'L')
|
|
|
|
|
tok->f.kind = T_WIDE_CHAR_LITERAL;
|
|
|
|
|
else if (hint == 'U')
|
|
|
|
|
tok->f.kind = T_UTF32_CHAR_LITERAL;
|
|
|
|
|
else if (hint == 'u')
|
|
|
|
|
tok->f.kind = T_UTF16_CHAR_LITERAL;
|
|
|
|
|
else
|
|
|
|
|
tok->f.kind = T_CHAR_LITERAL;
|
2014-01-19 22:24:14 +02:00
|
|
|
|
|
|
|
|
scanUntilQuote(tok, '\'');
|
2014-11-02 14:42:23 +01:00
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2012-06-06 13:41:22 +02:00
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
void Lexer::scanUntilQuote(Token *tok, unsigned char quote)
|
|
|
|
|
{
|
2013-05-13 10:20:00 +02:00
|
|
|
CPP_CHECK(quote == '"' || quote == '\'');
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
const char *yytext = _currentChar;
|
2012-06-08 16:52:16 +02:00
|
|
|
while (_yychar
|
|
|
|
|
&& _yychar != quote
|
|
|
|
|
&& _yychar != '\n') {
|
2014-01-19 22:24:14 +02:00
|
|
|
if (_yychar == '\\')
|
|
|
|
|
scanBackslash((Kind)tok->f.kind);
|
|
|
|
|
else
|
2012-06-06 13:41:22 +02:00
|
|
|
yyinp();
|
|
|
|
|
}
|
|
|
|
|
int yylen = _currentChar - yytext;
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
if (_yychar == quote)
|
|
|
|
|
yyinp();
|
2010-02-15 12:23:48 +01:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
if (control())
|
|
|
|
|
tok->string = control()->stringLiteral(yytext, yylen);
|
|
|
|
|
}
|
2008-12-10 13:27:59 +01:00
|
|
|
|
2014-02-07 15:24:30 +01:00
|
|
|
bool Lexer::scanDigitSequence()
|
|
|
|
|
{
|
|
|
|
|
if (!std::isdigit(_yychar))
|
|
|
|
|
return false;
|
|
|
|
|
yyinp();
|
|
|
|
|
while (std::isdigit(_yychar))
|
|
|
|
|
yyinp();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool Lexer::scanExponentPart()
|
|
|
|
|
{
|
|
|
|
|
if (_yychar != 'e' && _yychar != 'E')
|
|
|
|
|
return false;
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '+' || _yychar == '-')
|
|
|
|
|
yyinp();
|
|
|
|
|
return scanDigitSequence();
|
|
|
|
|
}
|
|
|
|
|
|
2014-11-02 14:42:23 +01:00
|
|
|
bool Lexer::scanOptionalFloatingSuffix()
|
2014-02-07 15:24:30 +01:00
|
|
|
{
|
2014-11-02 14:42:23 +01:00
|
|
|
if (_yychar == 'f' || _yychar == 'l' || _yychar == 'F' || _yychar == 'L') {
|
2014-02-07 15:24:30 +01:00
|
|
|
yyinp();
|
2014-11-02 14:42:23 +01:00
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2014-02-07 15:24:30 +01:00
|
|
|
}
|
|
|
|
|
|
2014-11-02 14:42:23 +01:00
|
|
|
bool Lexer::scanOptionalIntegerSuffix(bool allowU)
|
2014-02-07 15:24:30 +01:00
|
|
|
{
|
|
|
|
|
switch(_yychar) {
|
|
|
|
|
case 'u':
|
|
|
|
|
case 'U':
|
|
|
|
|
if (allowU) {
|
|
|
|
|
yyinp();
|
|
|
|
|
scanOptionalIntegerSuffix(false);
|
|
|
|
|
}
|
2014-11-02 14:42:23 +01:00
|
|
|
return true;
|
2015-12-30 16:47:12 +01:00
|
|
|
case 'i':
|
|
|
|
|
case 'I':
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '6') {
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == '4') {
|
|
|
|
|
yyinp();
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
2014-02-07 15:24:30 +01:00
|
|
|
case 'l':
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == 'l')
|
|
|
|
|
yyinp();
|
2014-11-02 14:42:23 +01:00
|
|
|
return true;
|
2014-02-07 15:24:30 +01:00
|
|
|
case 'L':
|
|
|
|
|
yyinp();
|
|
|
|
|
if (_yychar == 'L')
|
|
|
|
|
yyinp();
|
2014-11-02 14:42:23 +01:00
|
|
|
return true;
|
2014-02-07 15:24:30 +01:00
|
|
|
default:
|
2014-11-02 14:42:23 +01:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Lexer::scanOptionalUserDefinedLiteral(Token *tok)
|
|
|
|
|
{
|
2020-05-27 15:09:04 +02:00
|
|
|
if (_languageFeatures.cxx11Enabled && (_yychar == '_' || std::isalpha(_yychar))) {
|
2014-11-02 14:42:23 +01:00
|
|
|
tok->f.userDefinedLiteral = true;
|
|
|
|
|
while (std::isalnum(_yychar) || _yychar == '_' || isByteOfMultiByteCodePoint(_yychar))
|
|
|
|
|
yyinp();
|
2014-02-07 15:24:30 +01:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
void Lexer::scanNumericLiteral(Token *tok)
|
|
|
|
|
{
|
|
|
|
|
const char *yytext = _currentChar - 1;
|
2014-02-07 15:24:30 +01:00
|
|
|
if (*yytext == '0' && _yychar) {
|
|
|
|
|
if (_yychar == 'x' || _yychar == 'X') {
|
|
|
|
|
yyinp();
|
|
|
|
|
while (std::isdigit(_yychar) ||
|
|
|
|
|
(_yychar >= 'a' && _yychar <= 'f') ||
|
2019-05-12 20:49:00 -07:00
|
|
|
(_yychar >= 'A' && _yychar <= 'F') ||
|
|
|
|
|
((_yychar == '\'') && _languageFeatures.cxx14Enabled)) {
|
2014-02-07 15:24:30 +01:00
|
|
|
yyinp();
|
|
|
|
|
}
|
2014-11-02 14:42:23 +01:00
|
|
|
if (!scanOptionalIntegerSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
goto theEnd;
|
|
|
|
|
} else if (_yychar == 'b' || _yychar == 'B') { // see n3472
|
|
|
|
|
yyinp();
|
2019-05-12 20:49:00 -07:00
|
|
|
while (_yychar == '0' || _yychar == '1' ||
|
|
|
|
|
((_yychar == '\'') && _languageFeatures.cxx14Enabled))
|
2014-02-07 15:24:30 +01:00
|
|
|
yyinp();
|
2014-11-02 14:42:23 +01:00
|
|
|
if (!scanOptionalIntegerSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
goto theEnd;
|
|
|
|
|
} else if (_yychar >= '0' && _yychar <= '7') {
|
|
|
|
|
do {
|
|
|
|
|
yyinp();
|
2019-05-12 20:49:00 -07:00
|
|
|
} while ((_yychar >= '0' && _yychar <= '7') ||
|
|
|
|
|
((_yychar == '\'') && _languageFeatures.cxx14Enabled));
|
2014-11-02 14:42:23 +01:00
|
|
|
if (!scanOptionalIntegerSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
goto theEnd;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (_yychar) {
|
|
|
|
|
if (_yychar == '.') {
|
|
|
|
|
yyinp();
|
|
|
|
|
scanDigitSequence(); // this is optional: "1." is a valid floating point number
|
|
|
|
|
scanExponentPart();
|
2014-11-02 14:42:23 +01:00
|
|
|
if (!scanOptionalFloatingSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
break;
|
|
|
|
|
} else if (_yychar == 'e' || _yychar == 'E') {
|
2014-11-02 14:42:23 +01:00
|
|
|
if (scanExponentPart() && !scanOptionalFloatingSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
break;
|
2019-05-12 20:49:00 -07:00
|
|
|
} else if (std::isdigit(_yychar) ||
|
|
|
|
|
((_yychar == '\'') && _languageFeatures.cxx14Enabled)) {
|
2014-02-07 15:24:30 +01:00
|
|
|
yyinp();
|
|
|
|
|
} else {
|
2014-11-02 14:42:23 +01:00
|
|
|
if (!scanOptionalIntegerSuffix())
|
|
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2014-02-07 15:24:30 +01:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
theEnd:
|
|
|
|
|
if (std::isalnum(_yychar) || _yychar == '_') {
|
|
|
|
|
do {
|
|
|
|
|
yyinp();
|
|
|
|
|
} while (std::isalnum(_yychar) || _yychar == '_');
|
|
|
|
|
tok->f.kind = T_ERROR;
|
|
|
|
|
} else {
|
|
|
|
|
int yylen = _currentChar - yytext;
|
|
|
|
|
tok->f.kind = T_NUMERIC_LITERAL;
|
|
|
|
|
if (control())
|
|
|
|
|
tok->number = control()->numericLiteral(yytext, yylen);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Lexer::scanPreprocessorNumber(Token *tok, bool dotAlreadySkipped)
|
|
|
|
|
{
|
|
|
|
|
const char *yytext = _currentChar - (dotAlreadySkipped ? 2 : 1);
|
|
|
|
|
if (dotAlreadySkipped &&
|
|
|
|
|
(!_yychar || (_yychar && !std::isdigit(_yychar)))) {
|
|
|
|
|
tok->f.kind = T_DOT;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
while (_yychar) {
|
|
|
|
|
if (_yychar == 'e' || _yychar == 'E') {
|
|
|
|
|
yyinp();
|
2014-02-07 15:24:30 +01:00
|
|
|
if (_yychar == '+' || _yychar == '-')
|
2012-06-06 13:41:22 +02:00
|
|
|
yyinp();
|
2014-02-07 15:24:30 +01:00
|
|
|
} else if (std::isalnum(_yychar) || _yychar == '_' || _yychar == '.') {
|
2012-06-06 13:41:22 +02:00
|
|
|
yyinp();
|
2008-12-02 12:01:29 +01:00
|
|
|
} else {
|
2014-11-02 14:42:23 +01:00
|
|
|
scanOptionalUserDefinedLiteral(tok);
|
2008-12-02 12:01:29 +01:00
|
|
|
break;
|
|
|
|
|
}
|
2012-06-06 13:41:22 +02:00
|
|
|
}
|
2008-12-02 12:01:29 +01:00
|
|
|
|
2014-02-07 15:24:30 +01:00
|
|
|
int yylen = _currentChar - yytext;
|
2012-06-06 13:41:22 +02:00
|
|
|
tok->f.kind = T_NUMERIC_LITERAL;
|
|
|
|
|
if (control())
|
|
|
|
|
tok->number = control()->numericLiteral(yytext, yylen);
|
2008-12-02 12:01:29 +01:00
|
|
|
}
|
|
|
|
|
|
2012-08-16 19:18:20 +02:00
|
|
|
void Lexer::scanIdentifier(Token *tok, unsigned extraProcessedChars)
|
2012-06-06 13:41:22 +02:00
|
|
|
{
|
2012-08-16 19:18:20 +02:00
|
|
|
const char *yytext = _currentChar - 1 - extraProcessedChars;
|
2014-02-25 13:44:11 -03:00
|
|
|
while (std::isalnum(_yychar) || _yychar == '_' || _yychar == '$'
|
|
|
|
|
|| isByteOfMultiByteCodePoint(_yychar)) {
|
2012-06-06 13:41:22 +02:00
|
|
|
yyinp();
|
2014-02-25 13:44:11 -03:00
|
|
|
}
|
2012-06-06 13:41:22 +02:00
|
|
|
int yylen = _currentChar - yytext;
|
2020-05-14 23:07:05 +03:00
|
|
|
if (f._scanKeywords) {
|
2013-10-06 02:41:22 +02:00
|
|
|
tok->f.kind = classify(yytext, yylen, _languageFeatures);
|
2020-05-14 23:07:05 +03:00
|
|
|
|
|
|
|
|
if (tok->f.kind == T_FALSE || tok->f.kind == T_TRUE) {
|
|
|
|
|
if (control()) {
|
|
|
|
|
tok->number = control()->numericLiteral(yytext, yylen);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2012-06-06 13:41:22 +02:00
|
|
|
tok->f.kind = T_IDENTIFIER;
|
2020-05-14 23:07:05 +03:00
|
|
|
}
|
2012-06-06 13:41:22 +02:00
|
|
|
|
|
|
|
|
if (tok->f.kind == T_IDENTIFIER) {
|
|
|
|
|
tok->f.kind = classifyOperator(yytext, yylen);
|
2009-10-20 11:21:25 +02:00
|
|
|
|
2012-06-06 13:41:22 +02:00
|
|
|
if (control())
|
|
|
|
|
tok->identifier = control()->identifier(yytext, yylen);
|
|
|
|
|
}
|
|
|
|
|
}
|
2014-01-19 22:24:14 +02:00
|
|
|
|
|
|
|
|
void Lexer::scanBackslash(Kind type)
|
|
|
|
|
{
|
|
|
|
|
yyinp(); // skip '\\'
|
|
|
|
|
if (_yychar && !std::isspace(_yychar)) {
|
|
|
|
|
yyinp();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
while (_yychar != '\n' && std::isspace(_yychar))
|
|
|
|
|
yyinp();
|
|
|
|
|
if (!_yychar) {
|
2014-01-23 22:16:43 +02:00
|
|
|
s._tokenKind = type;
|
|
|
|
|
s._newlineExpected = true;
|
2014-01-19 22:24:14 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if (_yychar == '\n') {
|
|
|
|
|
yyinp();
|
|
|
|
|
while (_yychar != '\n' && std::isspace(_yychar))
|
|
|
|
|
yyinp();
|
|
|
|
|
if (!_yychar)
|
2014-01-23 22:16:43 +02:00
|
|
|
s._tokenKind = type;
|
2014-01-19 22:24:14 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void Lexer::scanCppComment(Kind type)
|
|
|
|
|
{
|
|
|
|
|
while (_yychar && _yychar != '\n') {
|
|
|
|
|
if (_yychar == '\\')
|
|
|
|
|
scanBackslash(type);
|
|
|
|
|
else if (_yychar)
|
|
|
|
|
yyinp();
|
|
|
|
|
}
|
|
|
|
|
}
|