Author: Utkarsh Saxena Date: 2021-03-25T18:54:15+01:00 New Revision: aa979084dffba86a3e170826b4e89d90820bb78b
URL: https://github.com/llvm/llvm-project/commit/aa979084dffba86a3e170826b4e89d90820bb78b DIFF: https://github.com/llvm/llvm-project/commit/aa979084dffba86a3e170826b4e89d90820bb78b.diff LOG: [clang][Syntax] Optimize expandedTokens for token ranges. `expandedTokens(SourceRange)` used to do a binary search to get the expanded tokens belonging to a source range. Each binary search uses `isBeforeInTranslationUnit` to order two source locations. This is inherently very slow. By profiling clangd we found out that users like clangd::SelectionTree spend 95% of time in `isBeforeInTranslationUnit`. Also it is worth noting that users of `expandedTokens(SourceRange)` majorly use ranges provided by AST to query this funciton. The ranges provided by AST are token ranges (starting at the beginning of a token and ending at the beginning of another token). Therefore we can avoid the binary search in majority of the cases by maintaining an index of ExpandedToken by their SourceLocations. We still do binary search for ranges which are not token ranges but such instances are quite low. Performance: `~/build/bin/clangd --check=clang/lib/Serialization/ASTReader.cpp` Before: Took 2:10s to complete. Now: Took 1:13s to complete. Differential Revision: https://reviews.llvm.org/D99086 Added: Modified: clang-tools-extra/clangd/ParsedAST.cpp clang/include/clang/Tooling/Syntax/Tokens.h clang/lib/Tooling/Syntax/Tokens.cpp clang/unittests/Tooling/Syntax/TokensTest.cpp Removed: ################################################################################ diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 119263f0a891c..fca19428192e9 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -423,6 +423,8 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, // tokens from running the preprocessor inside the checks (only // modernize-use-trailing-return-type does that today). syntax::TokenBuffer Tokens = std::move(CollectTokens).consume(); + // Makes SelectionTree build much faster. + Tokens.indexExpandedTokens(); std::vector<Decl *> ParsedDecls = Action->takeTopLevelDecls(); // AST traversals should exclude the preamble, to avoid performance cliffs. Clang->getASTContext().setTraversalScope(ParsedDecls); diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h index 98320bd54d6f6..e4bc1553c2d60 100644 --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -34,6 +34,7 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -192,8 +193,13 @@ class TokenBuffer { return ExpandedTokens; } + /// Builds a cache to make future calls to expandedToken(SourceRange) faster. + /// Creates an index only once. Further calls to it will be no-op. + void indexExpandedTokens(); + /// Returns the subrange of expandedTokens() corresponding to the closed /// token range R. + /// Consider calling indexExpandedTokens() before for faster lookups. llvm::ArrayRef<syntax::Token> expandedTokens(SourceRange R) const; /// Returns the subrange of spelled tokens corresponding to AST node spanning @@ -366,6 +372,8 @@ class TokenBuffer { /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). std::vector<syntax::Token> ExpandedTokens; + // Index of ExpandedTokens for faster lookups by SourceLocation. + llvm::DenseMap<SourceLocation, unsigned> ExpandedTokIndex; llvm::DenseMap<FileID, MarkedFile> Files; // The value is never null, pointer instead of reference to avoid disabling // implicit assignment operator. diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 234df9cb7182b..2326e89ea48ba 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -183,7 +183,31 @@ llvm::StringRef FileRange::text(const SourceManager &SM) const { return Text.substr(Begin, length()); } +void TokenBuffer::indexExpandedTokens() { + // No-op if the index is already created. + if (!ExpandedTokIndex.empty()) + return; + ExpandedTokIndex.reserve(ExpandedTokens.size()); + // Index ExpandedTokens for faster lookups by SourceLocation. + for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) + ExpandedTokIndex[ExpandedTokens[I].location()] = I; +} + llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const { + if (!ExpandedTokIndex.empty()) { + // Quick lookup if `R` is a token range. + // This is a huge win since majority of the users use ranges provided by an + // AST. Ranges in AST are token ranges from expanded token stream. + const auto B = ExpandedTokIndex.find(R.getBegin()); + const auto E = ExpandedTokIndex.find(R.getEnd()); + if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) { + // Add 1 to End to make a half-open range. + return {ExpandedTokens.data() + B->getSecond(), + ExpandedTokens.data() + E->getSecond() + 1}; + } + } + // Slow case. Use `isBeforeInTranslationUnit` to binary search for the + // required range. return getTokensCovering(expandedTokens(), R, *SourceMgr); } diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 6a21be632d426..1768529e0a620 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -106,6 +106,7 @@ class TokenCollectorTest : public ::testing::Test { void EndSourceFileAction() override { assert(Collector && "BeginSourceFileAction was never called"); Result = std::move(*Collector).consume(); + Result.indexExpandedTokens(); } std::unique_ptr<ASTConsumer> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits