hokein updated this revision to Diff 443737.
hokein added a comment.

more update


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128411/new/

https://reviews.llvm.org/D128411

Files:
  clang/include/clang/Tooling/Syntax/Mutations.h
  clang/include/clang/Tooling/Syntax/Nodes.h
  clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h
  clang/include/clang/Tooling/Syntax/TokenManager.h
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/CMakeLists.txt
  clang/lib/Tooling/Syntax/ComputeReplacements.cpp
  clang/lib/Tooling/Syntax/Synthesis.cpp
  clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/tools/clang-check/ClangCheck.cpp
  clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
  clang/unittests/Tooling/Syntax/MutationsTest.cpp
  clang/unittests/Tooling/Syntax/SynthesisTest.cpp
  clang/unittests/Tooling/Syntax/TreeTest.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.cpp
  clang/unittests/Tooling/Syntax/TreeTestBase.h

Index: clang/unittests/Tooling/Syntax/TreeTestBase.h
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTestBase.h
+++ clang/unittests/Tooling/Syntax/TreeTestBase.h
@@ -17,6 +17,7 @@
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Testing/TestClangConfig.h"
 #include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/StringRef.h"
@@ -51,6 +52,7 @@
   std::shared_ptr<CompilerInvocation> Invocation;
   // Set after calling buildTree().
   std::unique_ptr<syntax::TokenBuffer> TB;
+  std::unique_ptr<syntax::TokenBufferTokenManager> TM;
   std::unique_ptr<syntax::Arena> Arena;
 };
 
Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTestBase.cpp
+++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp
@@ -35,13 +35,14 @@
 using namespace clang::syntax;
 
 namespace {
-ArrayRef<syntax::Token> tokens(syntax::Node *N) {
+ArrayRef<syntax::Token> tokens(syntax::Node *N,
+                               const TokenBufferTokenManager &STM) {
   assert(N->isOriginal() && "tokens of modified nodes are not well-defined");
   if (auto *L = dyn_cast<syntax::Leaf>(N))
-    return llvm::makeArrayRef(L->getToken(), 1);
+    return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1);
   auto *T = cast<syntax::Tree>(N);
-  return llvm::makeArrayRef(T->findFirstLeaf()->getToken(),
-                            T->findLastLeaf()->getToken() + 1);
+  return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()),
+                            STM.getToken(T->findLastLeaf()->getTokenKey()) + 1);
 }
 } // namespace
 
@@ -70,23 +71,26 @@
   public:
     BuildSyntaxTree(syntax::TranslationUnit *&Root,
                     std::unique_ptr<syntax::TokenBuffer> &TB,
+                    std::unique_ptr<syntax::TokenBufferTokenManager> &TM,
                     std::unique_ptr<syntax::Arena> &Arena,
                     std::unique_ptr<syntax::TokenCollector> Tokens)
-        : Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) {
+        : Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) {
       assert(this->Tokens);
     }
 
     void HandleTranslationUnit(ASTContext &Ctx) override {
       TB = std::make_unique<syntax::TokenBuffer>(std::move(*Tokens).consume());
       Tokens = nullptr; // make sure we fail if this gets called twice.
-      Arena = std::make_unique<syntax::Arena>(Ctx.getSourceManager(),
-                                              Ctx.getLangOpts(), *TB);
+      TM = std::make_unique<syntax::TokenBufferTokenManager>(
+          *TB, Ctx.getLangOpts(), Ctx.getSourceManager());
+      Arena = std::make_unique<syntax::Arena>(*TM);
       Root = syntax::buildSyntaxTree(*Arena, Ctx);
     }
 
   private:
     syntax::TranslationUnit *&Root;
     std::unique_ptr<syntax::TokenBuffer> &TB;
+    std::unique_ptr<syntax::TokenBufferTokenManager> &TM;
     std::unique_ptr<syntax::Arena> &Arena;
     std::unique_ptr<syntax::TokenCollector> Tokens;
   };
@@ -94,21 +98,23 @@
   class BuildSyntaxTreeAction : public ASTFrontendAction {
   public:
     BuildSyntaxTreeAction(syntax::TranslationUnit *&Root,
+                          std::unique_ptr<syntax::TokenBufferTokenManager> &TM,
                           std::unique_ptr<syntax::TokenBuffer> &TB,
                           std::unique_ptr<syntax::Arena> &Arena)
-        : Root(Root), TB(TB), Arena(Arena) {}
+        : Root(Root), TM(TM), TB(TB), Arena(Arena) {}
 
     std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
                                                    StringRef InFile) override {
       // We start recording the tokens, ast consumer will take on the result.
       auto Tokens =
           std::make_unique<syntax::TokenCollector>(CI.getPreprocessor());
-      return std::make_unique<BuildSyntaxTree>(Root, TB, Arena,
+      return std::make_unique<BuildSyntaxTree>(Root, TB, TM, Arena,
                                                std::move(Tokens));
     }
 
   private:
     syntax::TranslationUnit *&Root;
+    std::unique_ptr<syntax::TokenBufferTokenManager> &TM;
     std::unique_ptr<syntax::TokenBuffer> &TB;
     std::unique_ptr<syntax::Arena> &Arena;
   };
@@ -149,7 +155,7 @@
   Compiler.setSourceManager(SourceMgr.get());
 
   syntax::TranslationUnit *Root = nullptr;
-  BuildSyntaxTreeAction Recorder(Root, this->TB, this->Arena);
+  BuildSyntaxTreeAction Recorder(Root, this->TM, this->TB, this->Arena);
 
   // Action could not be executed but the frontend didn't identify any errors
   // in the code ==> problem in setting up the action.
@@ -163,7 +169,7 @@
 
 syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R,
                                           syntax::Node *Root) {
-  ArrayRef<syntax::Token> Toks = tokens(Root);
+  ArrayRef<syntax::Token> Toks = tokens(Root, *TM);
 
   if (Toks.front().location().isFileID() && Toks.back().location().isFileID() &&
       syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==
Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -112,7 +112,7 @@
                                      createLeaf(*Arena, tok::r_paren)};
   for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) {
     ASSERT_TRUE(Tree->findFirstLeaf() != nullptr);
-    EXPECT_EQ(Tree->findFirstLeaf()->getToken()->kind(), tok::l_paren);
+    EXPECT_EQ(TM->getToken(Tree->findFirstLeaf()->getTokenKey())->kind(), tok::l_paren);
   }
 }
 
@@ -122,7 +122,7 @@
                                      createLeaf(*Arena, tok::r_paren)};
   for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) {
     ASSERT_TRUE(Tree->findLastLeaf() != nullptr);
-    EXPECT_EQ(Tree->findLastLeaf()->getToken()->kind(), tok::r_paren);
+    EXPECT_EQ(TM->getToken(Tree->findLastLeaf()->getTokenKey())->kind(), tok::r_paren);
   }
 }
 
@@ -180,7 +180,7 @@
 private:
   std::string dumpQuotedTokensOrNull(const Node *N) {
     return N ? "'" +
-                   StringRef(N->dumpTokens(Arena->getSourceManager()))
+                   StringRef(N->dumpTokens(Arena->getTokenManager()))
                        .trim()
                        .str() +
                    "'"
Index: clang/unittests/Tooling/Syntax/SynthesisTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/SynthesisTest.cpp
+++ clang/unittests/Tooling/Syntax/SynthesisTest.cpp
@@ -27,7 +27,7 @@
       return ::testing::AssertionFailure()
              << "Root was not built successfully.";
 
-    auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str();
+    auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str();
     auto Expected = Dump.trim().str();
     // EXPECT_EQ shows the diff between the two strings if they are different.
     EXPECT_EQ(Expected, Actual);
@@ -175,7 +175,7 @@
 
   auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue);
   EXPECT_TRUE(
-      treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager())));
+      treeDumpEqual(Copy, StatementContinue->dump(Arena->getTokenManager())));
   // FIXME: Test that copy is independent of original, once the Mutations API is
   // more developed.
 }
Index: clang/unittests/Tooling/Syntax/MutationsTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/MutationsTest.cpp
+++ clang/unittests/Tooling/Syntax/MutationsTest.cpp
@@ -30,7 +30,7 @@
 
     Transform(Source, Root);
 
-    auto Replacements = syntax::computeReplacements(*Arena, *Root);
+    auto Replacements = syntax::computeReplacements(*TM, *Root);
     auto Output = tooling::applyAllReplacements(Source.code(), Replacements);
     if (!Output) {
       ADD_FAILURE() << "could not apply replacements: "
Index: clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
+++ clang/unittests/Tooling/Syntax/BuildTreeTest.cpp
@@ -26,7 +26,7 @@
     auto ErrorOK = errorOK(Code);
     if (!ErrorOK)
       return ErrorOK;
-    auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str();
+    auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str();
     // EXPECT_EQ shows the diff between the two strings if they are different.
     EXPECT_EQ(Tree.trim().str(), Actual);
     if (Actual != Tree.trim().str()) {
@@ -59,7 +59,7 @@
       auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root);
       assert(AnnotatedNode);
       auto AnnotatedNodeDump =
-          StringRef(AnnotatedNode->dump(Arena->getSourceManager()))
+          StringRef(AnnotatedNode->dump(Arena->getTokenManager()))
               .trim()
               .str();
       // EXPECT_EQ shows the diff between the two strings if they are different.
Index: clang/tools/clang-check/ClangCheck.cpp
===================================================================
--- clang/tools/clang-check/ClangCheck.cpp
+++ clang/tools/clang-check/ClangCheck.cpp
@@ -25,6 +25,7 @@
 #include "clang/StaticAnalyzer/Frontend/FrontendActions.h"
 #include "clang/Tooling/CommonOptionsParser.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "clang/Tooling/Tooling.h"
@@ -157,9 +158,10 @@
         clang::syntax::TokenBuffer TB = std::move(Collector).consume();
         if (TokensDump)
           llvm::outs() << TB.dumpForTests();
-        clang::syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), TB);
-        llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(
-            AST.getSourceManager());
+        clang::syntax::TokenBufferTokenManager TBTM(TB, AST.getLangOpts(),
+                                                    AST.getSourceManager());
+        clang::syntax::Arena A(TBTM);
+        llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(TBTM);
       }
 
     private:
Index: clang/lib/Tooling/Syntax/Tree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -33,25 +33,7 @@
 }
 } // namespace
 
-syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
-                     const TokenBuffer &Tokens)
-    : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {}
-
-const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const {
-  return Tokens;
-}
-
-std::pair<FileID, ArrayRef<syntax::Token>>
-syntax::Arena::lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
-  auto FID = SourceMgr.createFileID(std::move(Input));
-  auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
-  assert(It.second && "duplicate FileID");
-  return {FID, It.first->second};
-}
-
-syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) {
-  assert(Tok != nullptr);
-}
+syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {}
 
 syntax::Node::Node(NodeKind Kind)
     : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr),
@@ -190,20 +172,8 @@
 }
 
 namespace {
-static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
-                     const SourceManager &SM) {
-  assert(L);
-  const auto *Token = L->getToken();
-  assert(Token);
-  // Handle 'eof' separately, calling text() on it produces an empty string.
-  if (Token->kind() == tok::eof)
-    OS << "<eof>";
-  else
-    OS << Token->text(SM);
-}
-
 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
-                     const SourceManager &SM, llvm::BitVector IndentMask) {
+                     const syntax::TokenManager &TM, llvm::BitVector IndentMask) {
   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
     if (N->getRole() != syntax::NodeRole::Unknown)
       OS << " " << N->getRole();
@@ -216,7 +186,7 @@
   assert(N);
   if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
     OS << "'";
-    dumpLeaf(OS, L, SM);
+    OS << TM.getText(L->getTokenKey());
     OS << "'";
     DumpExtraInfo(N);
     OS << "\n";
@@ -242,25 +212,25 @@
       OS << "|-";
       IndentMask.push_back(true);
     }
-    dumpNode(OS, &It, SM, IndentMask);
+    dumpNode(OS, &It, TM, IndentMask);
     IndentMask.pop_back();
   }
 }
 } // namespace
 
-std::string syntax::Node::dump(const SourceManager &SM) const {
+std::string syntax::Node::dump(const TokenManager &TM) const {
   std::string Str;
   llvm::raw_string_ostream OS(Str);
-  dumpNode(OS, this, SM, /*IndentMask=*/{});
+  dumpNode(OS, this, TM, /*IndentMask=*/{});
   return std::move(OS.str());
 }
 
-std::string syntax::Node::dumpTokens(const SourceManager &SM) const {
+std::string syntax::Node::dumpTokens(const TokenManager &TM) const {
   std::string Storage;
   llvm::raw_string_ostream OS(Storage);
   traverse(this, [&](const syntax::Node *N) {
     if (const auto *L = dyn_cast<syntax::Leaf>(N)) {
-      dumpLeaf(OS, L, SM);
+      OS << TM.getText(L->getTokenKey());
       OS << " ";
     }
   });
@@ -297,7 +267,8 @@
            C.getRole() == NodeRole::ListDelimiter);
     if (C.getRole() == NodeRole::ListDelimiter) {
       assert(isa<Leaf>(C));
-      assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
+      // FIXME: re-enable it when there is way to retrieve token kind in Leaf.
+      // assert(cast<Leaf>(C).getToken()->kind() == L->getDelimiterTokenKind());
     }
   }
 
Index: clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp
@@ -0,0 +1,25 @@
+//===- TokenBufferTokenManager.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
+
+namespace clang {
+namespace syntax {
+constexpr llvm::StringLiteral syntax::TokenBufferTokenManager::Kind;
+
+std::pair<FileID, ArrayRef<syntax::Token>>
+syntax::TokenBufferTokenManager::lexBuffer(
+    std::unique_ptr<llvm::MemoryBuffer> Input) {
+  auto FID = SM.createFileID(std::move(Input));
+  auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM, LangOpts));
+  assert(It.second && "duplicate FileID");
+  return {FID, It.first->second};
+}
+
+} // namespace syntax
+} // namespace clang
Index: clang/lib/Tooling/Syntax/Synthesis.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Synthesis.cpp
+++ clang/lib/Tooling/Syntax/Synthesis.cpp
@@ -8,6 +8,8 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
 #include "clang/Tooling/Syntax/Tree.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
 
 using namespace clang;
 
@@ -28,7 +30,8 @@
 
   static std::pair<FileID, ArrayRef<Token>>
   lexBuffer(syntax::Arena &A, std::unique_ptr<llvm::MemoryBuffer> Buffer) {
-    return A.lexBuffer(std::move(Buffer));
+    auto& STM = llvm::cast<TokenBufferTokenManager>(A.getTokenManager());
+    return STM.lexBuffer(std::move(Buffer));
   }
 };
 
@@ -43,7 +46,8 @@
   assert(Tokens.front().kind() == K &&
          "spelling is not lexed into the expected kind of token");
 
-  auto *Leaf = new (A.getAllocator()) syntax::Leaf(Tokens.begin());
+  auto *Leaf = new (A.getAllocator()) syntax::Leaf(
+    reinterpret_cast<TokenManager::Key>(Tokens.begin()));
   syntax::FactoryImpl::setCanModify(Leaf);
   Leaf->assertInvariants();
   return Leaf;
@@ -209,11 +213,12 @@
 
 syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A,
                                                      const syntax::Node *N) {
+  const auto& TBTM = llvm::cast<TokenBufferTokenManager>(A.getTokenManager());
   if (const auto *L = dyn_cast<syntax::Leaf>(N))
     // `L->getToken()` gives us the expanded token, thus we implicitly expand
     // any macros here.
-    return createLeaf(A, L->getToken()->kind(),
-                      L->getToken()->text(A.getSourceManager()));
+    return createLeaf(A, TBTM.getToken(L->getTokenKey())->kind(),
+                       TBTM.getText(L->getTokenKey()));
 
   const auto *T = cast<syntax::Tree>(N);
   std::vector<std::pair<syntax::Node *, syntax::NodeRole>> Children;
Index: clang/lib/Tooling/Syntax/ComputeReplacements.cpp
===================================================================
--- clang/lib/Tooling/Syntax/ComputeReplacements.cpp
+++ clang/lib/Tooling/Syntax/ComputeReplacements.cpp
@@ -7,7 +7,9 @@
 //===----------------------------------------------------------------------===//
 #include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/Mutations.h"
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
 #include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/Support/Error.h"
 
 using namespace clang;
@@ -16,10 +18,13 @@
 using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>,
                                                 bool /*IsOriginal*/)>;
 /// Enumerates spans of tokens from the tree consecutively laid out in memory.
-void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
+void enumerateTokenSpans(const syntax::Tree *Root,
+                         const syntax::TokenBufferTokenManager &STM,
+                         ProcessTokensFn Callback) {
   struct Enumerator {
-    Enumerator(ProcessTokensFn Callback)
-        : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
+    Enumerator(const syntax::TokenBufferTokenManager &STM,
+               ProcessTokensFn Callback)
+        : STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
           Callback(Callback) {}
 
     void run(const syntax::Tree *Root) {
@@ -39,7 +44,8 @@
       }
 
       auto *L = cast<syntax::Leaf>(N);
-      if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) {
+      if (SpanEnd == STM.getToken(L->getTokenKey()) &&
+          SpanIsOriginal == L->isOriginal()) {
         // Extend the current span.
         ++SpanEnd;
         return;
@@ -48,24 +54,25 @@
       if (SpanBegin)
         Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
       // Start recording a new span.
-      SpanBegin = L->getToken();
+      SpanBegin = STM.getToken(L->getTokenKey());
       SpanEnd = SpanBegin + 1;
       SpanIsOriginal = L->isOriginal();
     }
 
+    const syntax::TokenBufferTokenManager &STM;
     const syntax::Token *SpanBegin;
     const syntax::Token *SpanEnd;
     bool SpanIsOriginal;
     ProcessTokensFn Callback;
   };
 
-  return Enumerator(Callback).run(Root);
+  return Enumerator(STM, Callback).run(Root);
 }
 
-syntax::FileRange rangeOfExpanded(const syntax::Arena &A,
+syntax::FileRange rangeOfExpanded(const syntax::TokenBufferTokenManager &STM,
                                   llvm::ArrayRef<syntax::Token> Expanded) {
-  const auto &Buffer = A.getTokenBuffer();
-  const auto &SM = A.getSourceManager();
+  const auto &Buffer = STM.tokenBuffer();
+  const auto &SM = STM.sourceManager();
 
   // Check that \p Expanded actually points into expanded tokens.
   assert(Buffer.expandedTokens().begin() <= Expanded.begin());
@@ -83,10 +90,10 @@
 } // namespace
 
 tooling::Replacements
-syntax::computeReplacements(const syntax::Arena &A,
+syntax::computeReplacements(const TokenBufferTokenManager &TBTM,
                             const syntax::TranslationUnit &TU) {
-  const auto &Buffer = A.getTokenBuffer();
-  const auto &SM = A.getSourceManager();
+  const auto &Buffer = TBTM.tokenBuffer();
+  const auto &SM = TBTM.sourceManager();
 
   tooling::Replacements Replacements;
   // Text inserted by the replacement we are building now.
@@ -95,13 +102,13 @@
     if (ReplacedRange.empty() && Replacement.empty())
       return;
     llvm::cantFail(Replacements.add(tooling::Replacement(
-        SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement)));
+        SM, rangeOfExpanded(TBTM, ReplacedRange).toCharRange(SM),
+        Replacement)));
     Replacement = "";
   };
-
   const syntax::Token *NextOriginal = Buffer.expandedTokens().begin();
   enumerateTokenSpans(
-      &TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
+      &TU, TBTM, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
         if (!IsOriginal) {
           Replacement +=
               syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM);
Index: clang/lib/Tooling/Syntax/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/Syntax/CMakeLists.txt
+++ clang/lib/Tooling/Syntax/CMakeLists.txt
@@ -5,6 +5,7 @@
   ComputeReplacements.cpp
   Nodes.cpp
   Mutations.cpp
+  TokenBufferTokenManager.cpp
   Synthesis.cpp
   Tokens.cpp
   Tree.cpp
Index: clang/lib/Tooling/Syntax/BuildTree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/BuildTree.cpp
+++ clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -27,6 +27,7 @@
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/LiteralSupport.h"
 #include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -365,21 +366,24 @@
 /// Call finalize() to finish building the tree and consume the root node.
 class syntax::TreeBuilder {
 public:
-  TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
-    for (const auto &T : Arena.getTokenBuffer().expandedTokens())
+  TreeBuilder(syntax::Arena &Arena)
+      : Arena(Arena),
+        TBTM(cast<TokenBufferTokenManager>(Arena.getTokenManager())),
+        Pending(Arena, TBTM.tokenBuffer()) {
+    for (const auto &T : TBTM.tokenBuffer().expandedTokens())
       LocationToToken.insert({T.location(), &T});
   }
 
   llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); }
   const SourceManager &sourceManager() const {
-    return Arena.getSourceManager();
+    return TBTM.sourceManager();
   }
 
   /// Populate children for \p New node, assuming it covers tokens from \p
   /// Range.
   void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) {
     assert(New);
-    Pending.foldChildren(Arena, Range, New);
+    Pending.foldChildren(TBTM.tokenBuffer(), Range, New);
     if (From)
       Mapping.add(From, New);
   }
@@ -392,7 +396,7 @@
   void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New,
                 NestedNameSpecifierLoc From) {
     assert(New);
-    Pending.foldChildren(Arena, Range, New);
+    Pending.foldChildren(TBTM.tokenBuffer(), Range, New);
     if (From)
       Mapping.add(From, New);
   }
@@ -403,7 +407,7 @@
                 ASTPtr From) {
     assert(New);
     auto ListRange = Pending.shrinkToFitList(SuperRange);
-    Pending.foldChildren(Arena, ListRange, New);
+    Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New);
     if (From)
       Mapping.add(From, New);
   }
@@ -434,12 +438,12 @@
 
   /// Finish building the tree and consume the root node.
   syntax::TranslationUnit *finalize() && {
-    auto Tokens = Arena.getTokenBuffer().expandedTokens();
+    auto Tokens = TBTM.tokenBuffer().expandedTokens();
     assert(!Tokens.empty());
     assert(Tokens.back().kind() == tok::eof);
 
     // Build the root of the tree, consuming all the children.
-    Pending.foldChildren(Arena, Tokens.drop_back(),
+    Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(),
                          new (Arena.getAllocator()) syntax::TranslationUnit);
 
     auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
@@ -464,7 +468,7 @@
     assert(First.isValid());
     assert(Last.isValid());
     assert(First == Last ||
-           Arena.getSourceManager().isBeforeInTranslationUnit(First, Last));
+           TBTM.sourceManager().isBeforeInTranslationUnit(First, Last));
     return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
   }
 
@@ -564,15 +568,16 @@
   ///
   /// Ensures that added nodes properly nest and cover the whole token stream.
   struct Forest {
-    Forest(syntax::Arena &A) {
-      assert(!A.getTokenBuffer().expandedTokens().empty());
-      assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof);
+    Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) {
+      assert(!TB.expandedTokens().empty());
+      assert(TB.expandedTokens().back().kind() == tok::eof);
       // Create all leaf nodes.
       // Note that we do not have 'eof' in the tree.
-      for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) {
-        auto *L = new (A.getAllocator()) syntax::Leaf(&T);
+      for (const auto &T : TB.expandedTokens().drop_back()) {
+        auto *L = new (A.getAllocator())
+            syntax::Leaf(reinterpret_cast<TokenManager::Key>(&T));
         L->Original = true;
-        L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value();
+        L->CanModify = TB.spelledForExpanded(T).has_value();
         Trees.insert(Trees.end(), {&T, L});
       }
     }
@@ -620,8 +625,8 @@
     }
 
     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
-    void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens,
-                      syntax::Tree *Node) {
+    void foldChildren(const syntax::TokenBuffer &TB,
+                      ArrayRef<syntax::Token> Tokens, syntax::Tree *Node) {
       // Attach children to `Node`.
       assert(Node->getFirstChild() == nullptr && "node already has children");
 
@@ -646,7 +651,7 @@
       // Mark that this node came from the AST and is backed by the source code.
       Node->Original = true;
       Node->CanModify =
-          A.getTokenBuffer().spelledForExpanded(Tokens).has_value();
+          TB.spelledForExpanded(Tokens).has_value();
 
       Trees.erase(BeginChildren, EndChildren);
       Trees.insert({FirstToken, Node});
@@ -660,18 +665,18 @@
       return Root;
     }
 
-    std::string str(const syntax::Arena &A) const {
+    std::string str(const syntax::TokenBufferTokenManager &STM) const {
       std::string R;
       for (auto It = Trees.begin(); It != Trees.end(); ++It) {
         unsigned CoveredTokens =
             It != Trees.end()
                 ? (std::next(It)->first - It->first)
-                : A.getTokenBuffer().expandedTokens().end() - It->first;
+                : STM.tokenBuffer().expandedTokens().end() - It->first;
 
         R += std::string(
             formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(),
-                    It->first->text(A.getSourceManager()), CoveredTokens));
-        R += It->second->dump(A.getSourceManager());
+                    It->first->text(STM.sourceManager()), CoveredTokens));
+        R += It->second->dump(STM);
       }
       return R;
     }
@@ -684,9 +689,10 @@
   };
 
   /// For debugging purposes.
-  std::string str() { return Pending.str(Arena); }
+  std::string str() { return Pending.str(TBTM); }
 
   syntax::Arena &Arena;
+  TokenBufferTokenManager& TBTM;
   /// To quickly find tokens by their start location.
   llvm::DenseMap<SourceLocation, const syntax::Token *> LocationToToken;
   Forest Pending;
@@ -1718,7 +1724,7 @@
     markExprChild(ChildExpr, NodeRole::Expression);
     ChildNode = new (allocator()) syntax::ExpressionStatement;
     // (!) 'getStmtRange()' ensures this covers a trailing semicolon.
-    Pending.foldChildren(Arena, getStmtRange(Child), ChildNode);
+    Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode);
   } else {
     ChildNode = Mapping.find(Child);
   }
Index: clang/include/clang/Tooling/Syntax/Tree.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tree.h
+++ clang/include/clang/Tooling/Syntax/Tree.h
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 // Defines the basic structure of the syntax tree. There are two kinds of nodes:
-//   - leaf nodes correspond to a token in the expanded token stream,
+//   - leaf nodes correspond to tokens,
 //   - tree nodes correspond to language grammar constructs.
 //
 // The tree is initially built from an AST. Each node of a newly built tree
@@ -21,11 +21,8 @@
 #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
 
-#include "clang/Basic/LangOptions.h"
-#include "clang/Basic/SourceLocation.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
-#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/TokenManager.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/iterator.h"
@@ -36,33 +33,17 @@
 namespace clang {
 namespace syntax {
 
-/// A memory arena for syntax trees. Also tracks the underlying token buffers,
-/// source manager, etc.
+/// A memory arena for syntax trees.
 class Arena {
 public:
-  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
-        const TokenBuffer &Tokens);
-
-  const SourceManager &getSourceManager() const { return SourceMgr; }
-  const LangOptions &getLangOptions() const { return LangOpts; }
-
-  const TokenBuffer &getTokenBuffer() const;
+  Arena(TokenManager& TokenMgr) : TokenMgr(TokenMgr) {}
   llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
 
-private:
-  /// Add \p Buffer to the underlying source manager, tokenize it and store the
-  /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
-  /// that were not written in user code.
-  std::pair<FileID, ArrayRef<Token>>
-  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
-  friend class FactoryImpl;
+  const TokenManager &getTokenManager() const { return TokenMgr; }
+  TokenManager &getTokenManager() { return TokenMgr; }
 
 private:
-  SourceManager &SourceMgr;
-  const LangOptions &LangOpts;
-  const TokenBuffer &Tokens;
-  /// IDs and storage for additional tokenized files.
-  llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
+  TokenManager& TokenMgr;
   /// Keeps all the allocated nodes and their intermediate data structures.
   llvm::BumpPtrAllocator Allocator;
 };
@@ -122,9 +103,9 @@
   Node *getPreviousSibling() { return PreviousSibling; }
 
   /// Dumps the structure of a subtree. For debugging and testing purposes.
-  std::string dump(const SourceManager &SM) const;
+  std::string dump(const TokenManager &SM) const;
   /// Dumps the tokens forming this subtree.
-  std::string dumpTokens(const SourceManager &SM) const;
+  std::string dumpTokens(const TokenManager &SM) const;
 
   /// Asserts invariants on this node of the tree and its immediate children.
   /// Will not recurse into the subtree. No-op if NDEBUG is set.
@@ -153,16 +134,17 @@
   unsigned CanModify : 1;
 };
 
-/// A leaf node points to a single token inside the expanded token stream.
+/// A leaf node points to a single token.
+// FIXME: add TokenKind field (borrow some bits from the Node::kind).
 class Leaf final : public Node {
 public:
-  Leaf(const Token *T);
+  Leaf(TokenManager::Key K);
   static bool classof(const Node *N);
 
-  const Token *getToken() const { return Tok; }
+  TokenManager::Key getTokenKey() const { return K; }
 
 private:
-  const Token *Tok;
+  TokenManager::Key K;
 };
 
 /// A node that has children and represents a syntactic language construct.
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -33,6 +33,7 @@
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Token.h"
+#include "clang/Tooling/Syntax/TokenManager.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
Index: clang/include/clang/Tooling/Syntax/TokenManager.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/TokenManager.h
@@ -0,0 +1,45 @@
+//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines Token interfaces for the clang syntax-tree. This is the level of
+// abstraction that the syntax-tree uses to operate on Token.
+//
+// TokenManager decouples the syntax-tree from a particular token
+// implementation. For example, a TokenBuffer captured from a clang parser may
+// track macro expansions and associate tokens with clang's SourceManager, while
+// a clang pseudoparser would use a flat array of raw-lexed tokens in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
+
+#include "llvm/ADT/StringRef.h"
+#include <cstdint>
+
+namespace clang {
+namespace syntax {
+
+/// Defines interfaces for operating "Token" in the clang syntax-tree.
+class TokenManager {
+public:
+  /// Describes what the exact class kind of the TokenManager is.
+  virtual llvm::StringLiteral kind() const = 0;
+
+  /// A key to identify a specific token. The token concept depends on the
+  /// underlying implementation -- it can be a spelled token from the original
+  /// source file or an expanded token.
+  /// The syntax-tree Leaf node holds a Key.
+  using Key = uintptr_t;
+  virtual llvm::StringRef getText(Key K) const = 0;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H
Index: clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h
@@ -0,0 +1,70 @@
+//===- TokenBufferTokenManager.h  -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H
+
+#include "clang/Tooling/Syntax/TokenManager.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+
+namespace clang {
+namespace syntax {
+
+/// A TokenBuffer-powered token manager.
+/// It tracks the underlying token buffers, source manager, etc.
+class TokenBufferTokenManager : public TokenManager {
+public:
+  TokenBufferTokenManager(const TokenBuffer &Tokens,
+                          const LangOptions &LangOpts, SourceManager &SourceMgr)
+      : Tokens(Tokens), LangOpts(LangOpts), SM(SourceMgr) {}
+
+  static bool classof(const TokenManager *N) { return N->kind() == Kind; }
+  llvm::StringLiteral kind() const override { return Kind; }
+
+  llvm::StringRef getText(Key I) const override {
+    const auto *Token = getToken(I);
+    assert(Token);
+    // Handle 'eof' separately, calling text() on it produces an empty string.
+    // FIXME: this special logic is for syntax::Leaf dump, move it when we
+    // have a direct way to retrive token kind in the syntax::Leaf.
+    if (Token->kind() == tok::eof)
+      return "<eof>";
+    return Token->text(SM);
+  }
+
+  const syntax::Token *getToken(Key I) const {
+    return reinterpret_cast<const syntax::Token *>(I);
+  }
+  SourceManager &sourceManager() { return SM; }
+  const SourceManager &sourceManager() const { return SM; }
+  const TokenBuffer &tokenBuffer() const { return Tokens; }
+
+private:
+  // This manager is powered by the TokenBuffer.
+  static constexpr llvm::StringLiteral Kind = "TokenBuffer";
+
+  /// Add \p Buffer to the underlying source manager, tokenize it and store the
+  /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens
+  /// that were not written in user code.
+  std::pair<FileID, ArrayRef<Token>>
+  lexBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
+  friend class FactoryImpl;
+
+  const TokenBuffer &Tokens;
+  const LangOptions &LangOpts;
+
+  /// The underlying source manager for the ExtraTokens.
+  SourceManager &SM;
+  /// IDs and storage for additional tokenized files.
+  llvm::DenseMap<FileID, std::vector<Token>> ExtraTokens;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H
Index: clang/include/clang/Tooling/Syntax/Nodes.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Nodes.h
+++ clang/include/clang/Tooling/Syntax/Nodes.h
@@ -21,9 +21,7 @@
 #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H
 #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H
 
-#include "clang/Basic/TokenKinds.h"
-#include "clang/Lex/Token.h"
-#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Tooling/Syntax/Tree.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
Index: clang/include/clang/Tooling/Syntax/Mutations.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Mutations.h
+++ clang/include/clang/Tooling/Syntax/Mutations.h
@@ -13,6 +13,7 @@
 
 #include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/TokenBufferTokenManager.h"
 #include "clang/Tooling/Syntax/Tree.h"
 
 namespace clang {
@@ -20,7 +21,7 @@
 
 /// Computes textual replacements required to mimic the tree modifications made
 /// to the syntax tree.
-tooling::Replacements computeReplacements(const Arena &A,
+tooling::Replacements computeReplacements(const TokenBufferTokenManager &TBTM,
                                           const syntax::TranslationUnit &TU);
 
 /// Removes a statement or replaces it with an empty statement where one is
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to