From d3c5fff66de034e46e825b63943909d36067405f Mon Sep 17 00:00:00 2001 From: Nikolai Kosjar Date: Thu, 17 Jul 2014 12:56:28 +0200 Subject: [PATCH] C++: Fix expensive parsing of expressions For expression statements like "(g(g(g(...(g(0))...))))" we reparsed quite much again and again for nothing. The high-level trace for this expression looks like this: parseCastExpression parseTypeId parseAbstractDeclarator parseAbstractCoreDeclarator parseParameterDeclarationClause (--> DEEP) ... parseUnaryExpression ... parseCorePostfixExpression parseTypeId (--> DEEP) parsePrimaryExpression (--> DEEP) Especially parseTypeId is expensive in this case and it's called two times, both from the same token (index). With this patch, we remember for certain ASTs the parse results and re-use them when needed. Change-Id: I013d1c064c655636bc94db408097863b5e183fc2 Task-number: QTCREATORBUG-12252 Reviewed-by: Erik Verbruggen --- src/libs/3rdparty/cplusplus/Parser.cpp | 130 +++++++++++++++++++++++-- src/libs/3rdparty/cplusplus/Parser.h | 4 + tests/auto/cplusplus/ast/tst_ast.cpp | 41 ++++++++ 3 files changed, 167 insertions(+), 8 deletions(-) diff --git a/src/libs/3rdparty/cplusplus/Parser.cpp b/src/libs/3rdparty/cplusplus/Parser.cpp index 7ccc85c940d..a8ee2d9fbdc 100644 --- a/src/libs/3rdparty/cplusplus/Parser.cpp +++ b/src/libs/3rdparty/cplusplus/Parser.cpp @@ -26,8 +26,13 @@ #include "Literals.h" #include "ObjectiveCTypeQualifiers.h" #include "QtContextKeywords.h" + +#include +#include + #include #include // for putchar + #if defined(_MSC_VER) && (_MSC_VER < 1800) # define va_copy(dst, src) ((dst) = (src)) #elif defined(__INTEL_COMPILER) && !defined(va_copy) @@ -43,9 +48,9 @@ using namespace CPlusPlus; namespace { class DebugRule { +public: static int depth; -public: DebugRule(const char *name, const char *spell, unsigned idx, bool blocked) { for (int i = 0; i <= depth; ++i) @@ -150,12 +155,93 @@ inline bool isRightAssociative(int tokenKind) } // end of anonymous namespace +class Parser::ASTCache +{ + ASTCache(const ASTCache &other); + void operator =(const ASTCache &other); + +public: + enum ASTKind { + Expression, + ExpressionList, + ParameterDeclarationClause, + TypeId + }; + +public: + ASTCache() {} + + void insert(ASTKind astKind, unsigned tokenIndexBeforeParsing, + AST *resultingAST, unsigned resultingTokenIndex) + { + const auto key = std::make_pair(astKind, tokenIndexBeforeParsing); + const auto value = std::make_pair(resultingAST, resultingTokenIndex); + const auto keyValue = std::make_pair(key, value); + _cache.insert(keyValue); + } + + AST *find(ASTKind astKind, unsigned tokenIndex, + unsigned *resultingTokenIndex, bool *foundInCache) const + { + const auto key = std::make_pair(astKind, tokenIndex); + const auto it = _cache.find(key); + if (it == _cache.end()) { + *foundInCache = false; + return 0; + } else { + *foundInCache = true; + *resultingTokenIndex = it->second.second; + return it->second.first; + } + } + + void clear() + { + _cache.clear(); + } + +private: + struct KeyHasher { + size_t operator()(const std::pair &key) const + { return std::hash()(key.first) ^ std::hash()(key.second); } + }; + + typedef std::pair ASTKindAndTokenIndex; + typedef std::pair ASTAndTokenIndex; + std::unordered_map _cache; +}; + #ifndef CPLUSPLUS_NO_DEBUG_RULE # define DEBUG_THIS_RULE() DebugRule __debug_rule__(__func__, tok().spell(), cursor(), _translationUnit->blockErrors()) +inline void debugPrintCheckCache(bool goodCase) +{ + for (int i = 0; i <= DebugRule::depth - 1; ++i) + fputc('-', stderr); + if (goodCase) + fprintf(stderr, " CACHE: Re-using AST from Cache.\n"); + else + fprintf(stderr, " CACHE: Already tried to parse this, skipping.\n"); +} #else # define DEBUG_THIS_RULE() do {} while (0) +inline void debugPrintCheckCache(bool) {} #endif +#define CHECK_CACHE(ASTKind, ASTType, returnValueInBadCase) \ + do { \ + bool foundInCache; \ + unsigned newTokenIndex; \ + if (AST *ast = _astCache->find(ASTKind, cursor(), &newTokenIndex, &foundInCache)) { \ + debugPrintCheckCache(true); \ + node = (ASTType *) ast; \ + _tokenIndex = newTokenIndex; \ + return true; \ + } else if (foundInCache) { \ + debugPrintCheckCache(false); \ + return returnValueInBadCase; \ + } \ + } while (0) + #define PARSE_EXPRESSION_WITH_OPERATOR_PRECEDENCE(node, minPrecedence) { \ if (LA() == T_THROW) { \ if (!parseThrowExpression(node)) \ @@ -177,11 +263,16 @@ Parser::Parser(TranslationUnit *unit) _inFunctionBody(false), _inExpressionStatement(false), _expressionDepth(0), - _statementDepth(0) + _statementDepth(0), + _astCache(new ASTCache), + _expressionStatementAstCache(new ASTCache) { } Parser::~Parser() -{ } +{ + delete _expressionStatementAstCache; + delete _astCache; +} bool Parser::switchTemplateArguments(bool templateArguments) { @@ -1841,6 +1932,8 @@ bool Parser::parseTypeParameter(DeclarationAST *&node) bool Parser::parseTypeId(ExpressionAST *&node) { DEBUG_THIS_RULE(); + CHECK_CACHE(ASTCache::TypeId, ExpressionAST, false); + SpecifierListAST *type_specifier = 0; if (parseTypeSpecifier(type_specifier)) { TypeIdAST *ast = new (_pool) TypeIdAST; @@ -1857,6 +1950,8 @@ bool Parser::parseParameterDeclarationClause(ParameterDeclarationClauseAST *&nod DEBUG_THIS_RULE(); if (LA() == T_RPAREN) return true; // nothing to do + CHECK_CACHE(ASTCache::ParameterDeclarationClause, ParameterDeclarationClauseAST, true); + const unsigned initialCursor = cursor(); ParameterDeclarationListAST *parameter_declarations = 0; @@ -1881,6 +1976,7 @@ bool Parser::parseParameterDeclarationClause(ParameterDeclarationClauseAST *&nod node = ast; } + _astCache->insert(ASTCache::ParameterDeclarationClause, initialCursor, node, cursor()); return true; } @@ -2797,9 +2893,14 @@ bool Parser::parseTypeIdList(ExpressionListAST *&node) bool Parser::parseExpressionList(ExpressionListAST *&node) { DEBUG_THIS_RULE(); + CHECK_CACHE(ASTCache::ExpressionList, ExpressionListAST, false); + unsigned initialCursor = cursor(); - if (_languageFeatures.cxx11Enabled) - return parseInitializerList0x(node); + if (_languageFeatures.cxx11Enabled) { + bool result = parseInitializerList0x(node); + _astCache->insert(ASTCache::ExpressionList, initialCursor, (AST *) node, cursor()); + return result; + } ExpressionListAST **expression_list_ptr = &node; ExpressionAST *expression = 0; @@ -2816,9 +2917,11 @@ bool Parser::parseExpressionList(ExpressionListAST *&node) expression_list_ptr = &(*expression_list_ptr)->next; } } + _astCache->insert(ASTCache::ExpressionList, initialCursor, (AST *) node, cursor()); return true; } + _astCache->insert(ASTCache::ExpressionList, initialCursor, 0, cursor()); return false; } @@ -2981,9 +3084,11 @@ bool Parser::parseExpressionStatement(StatementAST *&node) const bool wasInExpressionStatement = _inExpressionStatement; _inExpressionStatement = true; - // switch to the temp pool + // switch to the temp pool and cache MemoryPool *previousPool = _pool; _pool = &_expressionStatementTempPool; + ASTCache *previousASTCache = _astCache; + _astCache = _expressionStatementAstCache; bool parsed = false; @@ -3000,12 +3105,15 @@ bool Parser::parseExpressionStatement(StatementAST *&node) _inExpressionStatement = wasInExpressionStatement; if (! _inExpressionStatement) { - // rewind the memory pool after parsing a toplevel expression statement. + // rewind the memory pool and cache after parsing a toplevel expression statement. _expressionStatementTempPool.reset(); + _astCache->clear(); } - // restore the pool + // restore the pool and cache _pool = previousPool; + _astCache = previousASTCache; + return parsed; } @@ -5246,6 +5354,7 @@ bool Parser::parseCastExpression(ExpressionAST *&node) DEBUG_THIS_RULE(); if (LA() == T_LPAREN) { unsigned lparen_token = consumeToken(); + unsigned initialCursor = cursor(); ExpressionAST *type_id = 0; if (parseTypeId(type_id) && LA() == T_RPAREN) { @@ -5300,6 +5409,7 @@ bool Parser::parseCastExpression(ExpressionAST *&node) } parse_as_unary_expression: + _astCache->insert(ASTCache::TypeId, initialCursor, 0, cursor()); rewind(lparen_token); } @@ -5400,6 +5510,8 @@ bool Parser::parseConstantExpression(ExpressionAST *&node) bool Parser::parseExpression(ExpressionAST *&node) { DEBUG_THIS_RULE(); + CHECK_CACHE(ASTCache::Expression, ExpressionAST, false); + unsigned initialCursor = cursor(); if (_expressionDepth > MAX_EXPRESSION_DEPTH) return false; @@ -5407,6 +5519,8 @@ bool Parser::parseExpression(ExpressionAST *&node) ++_expressionDepth; bool success = parseCommaExpression(node); --_expressionDepth; + + _astCache->insert(ASTCache::Expression, initialCursor, node, cursor()); return success; } diff --git a/src/libs/3rdparty/cplusplus/Parser.h b/src/libs/3rdparty/cplusplus/Parser.h index 87007c8f753..4a2a6dfb7e0 100644 --- a/src/libs/3rdparty/cplusplus/Parser.h +++ b/src/libs/3rdparty/cplusplus/Parser.h @@ -325,6 +325,10 @@ private: MemoryPool _expressionStatementTempPool; std::map _templateArgumentList; + class ASTCache; + ASTCache *_astCache; + ASTCache *_expressionStatementAstCache; + private: Parser(const Parser& source); void operator =(const Parser& source); diff --git a/tests/auto/cplusplus/ast/tst_ast.cpp b/tests/auto/cplusplus/ast/tst_ast.cpp index 56371e477bc..ebd9e468e14 100644 --- a/tests/auto/cplusplus/ast/tst_ast.cpp +++ b/tests/auto/cplusplus/ast/tst_ast.cpp @@ -193,6 +193,7 @@ private slots: void incomplete_ast(); void unnamed_class(); void unnamed_class_data(); + void expensiveExpression(); }; void tst_AST::gcc_attributes_1() @@ -1790,6 +1791,46 @@ void tst_AST::unnamed_class_data() QTest::newRow("unnamed-__attribute__") << _("class __attribute__((aligned(8))){};"); } +void tst_AST::expensiveExpression() +{ + QSharedPointer unit(parseStatement( + "void f()\n" + "{\n" + "(g1\n" + "(g2\n" + "(g3\n" + "(g4\n" + "(g5\n" + "(g6\n" + "(g7\n" + "(g8\n" + "(g9\n" + "(g10\n" + "(g11\n" + "(g12\n" + "(g13\n" + "(g14\n" + "(g15\n" + "(g16\n" + "(g17\n" + "(g18\n" + "(g19\n" + "(g20\n" + "(g21\n" + "(g22\n" + "(g23\n" + "(g24\n" + "(g25\n" + "(g26\n" + "(g27\n" + "(g28\n" + "(g29\n" + "(g30\n" + "(g31(0))))))))))))))))))))))))))))))));\n" + "}\n")); + QVERIFY(unit->ast()); +} + void tst_AST::initTestCase() { control.setDiagnosticClient(&diag);