ilya-biryukov updated this revision to Diff 198609.
ilya-biryukov added a comment.

- s/corpus/arena
- Remove an accidental cmake change


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D61637/new/

https://reviews.llvm.org/D61637

Files:
  clang/include/clang/Tooling/Syntax/Arena.h
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/include/clang/Tooling/Syntax/Tree/Cascade.h
  clang/include/clang/Tooling/Syntax/Tree/NodeKind.h
  clang/include/clang/Tooling/Syntax/Tree/NodeList.h
  clang/include/clang/Tooling/Syntax/Tree/Nodes.h
  clang/lib/Tooling/Syntax/Arena.cpp
  clang/lib/Tooling/Syntax/BuildFromAST.cpp
  clang/lib/Tooling/Syntax/CMakeLists.txt
  clang/lib/Tooling/Syntax/Cascade.cpp
  clang/lib/Tooling/Syntax/Nodes.cpp
  clang/unittests/Tooling/Syntax/CMakeLists.txt
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -0,0 +1,155 @@
+//===- TreeTest.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/Tree.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/Decl.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Tooling/Syntax/Arena.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree/Nodes.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace clang;
+
+namespace {
+class SyntaxTreeTest : public ::testing::Test {
+protected:
+  // Build a syntax tree for the code.
+  syntax::TranslationUnit *buildTree(llvm::StringRef Code) {
+    // FIXME: this code is almost the identical to the one in TokensTest. Share
+    //        it.
+    class BuildSyntaxTree : public ASTConsumer {
+    public:
+      BuildSyntaxTree(syntax::TranslationUnit *&Root,
+                      std::unique_ptr<syntax::Arena> &Arena,
+                      std::unique_ptr<syntax::TokenCollector> Tokens)
+          : Root(Root), Arena(Arena), Tokens(std::move(Tokens)) {
+        assert(this->Tokens);
+      }
+
+      void HandleTranslationUnit(ASTContext &Ctx) override {
+        Arena = llvm::make_unique<syntax::Arena>(Ctx.getSourceManager(),
+                                                 Ctx.getLangOpts(),
+                                                 std::move(*Tokens).consume());
+        Tokens = nullptr; // make sure we fail if this gets called twice.
+        Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl());
+      }
+
+    private:
+      syntax::TranslationUnit *&Root;
+      std::unique_ptr<syntax::Arena> &Arena;
+      std::unique_ptr<syntax::TokenCollector> Tokens;
+    };
+
+    class BuildSyntaxTreeAction : public ASTFrontendAction {
+    public:
+      BuildSyntaxTreeAction(syntax::TranslationUnit *&Root,
+                            std::unique_ptr<syntax::Arena> &Arena)
+          : Root(Root), Arena(Arena) {}
+
+      std::unique_ptr<ASTConsumer>
+      CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
+        // We start recording the tokens, ast consumer will take on the result.
+        auto Tokens =
+            llvm::make_unique<syntax::TokenCollector>(CI.getPreprocessor());
+        return llvm::make_unique<BuildSyntaxTree>(Root, Arena,
+                                                  std::move(Tokens));
+      }
+
+    private:
+      syntax::TranslationUnit *&Root;
+      std::unique_ptr<syntax::Arena> &Arena;
+    };
+
+    constexpr const char *FileName = "./input.cpp";
+    FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
+    // Prepare to run a compiler.
+    std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
+                                      FileName};
+    auto CI = createInvocationFromCommandLine(Args, Diags, FS);
+    assert(CI);
+    CI->getFrontendOpts().DisableFree = false;
+    CI->getPreprocessorOpts().addRemappedFile(
+        FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
+    CompilerInstance Compiler;
+    Compiler.setInvocation(std::move(CI));
+    if (!Diags->getClient())
+      Diags->setClient(new IgnoringDiagConsumer);
+    Compiler.setDiagnostics(Diags.get());
+    Compiler.setFileManager(FileMgr.get());
+    Compiler.setSourceManager(SourceMgr.get());
+
+    syntax::TranslationUnit *Root = nullptr;
+    BuildSyntaxTreeAction Recorder(Root, this->Arena);
+    if (!Compiler.ExecuteAction(Recorder)) {
+      ADD_FAILURE() << "failed to run the frontend";
+      std::abort();
+    }
+    return Root;
+  }
+
+  // Adds a file to the test VFS.
+  void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
+    if (!FS->addFile(Path, time_t(),
+                     llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
+      ADD_FAILURE() << "could not add a file to VFS: " << Path;
+    }
+  }
+
+  // Data fields.
+  llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+      new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
+  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
+      new llvm::vfs::InMemoryFileSystem;
+  llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
+      new FileManager(FileSystemOptions(), FS);
+  llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
+      new SourceManager(*Diags, *FileMgr);
+  // Set after calling buildTree().
+  std::unique_ptr<syntax::Arena> Arena;
+};
+
+TEST_F(SyntaxTreeTest, Basic) {
+  std::pair</*Input*/ std::string, /*Expected*/ std::string> Cases[] = {{
+      R"cpp(
+int main() {}
+void foo() {}
+    )cpp",
+      R"txt(
+translation-unit
+|-top-level-decl
+| |-recovery-node
+| | `-int main ( )
+| `-compound-statment
+|   |-{
+|   `-}
+|-top-level-decl
+| |-recovery-node
+| | `-void foo ( )
+| `-compound-statment
+|   |-{
+|   `-}
+`-<eof>
+)txt"}};
+
+  for (const auto &T : Cases) {
+    auto *Root = buildTree(T.first);
+    std::string Expected = llvm::StringRef(T.second).trim().str();
+    std::string Actual = llvm::StringRef(Root->dump(*Arena)).trim();
+    EXPECT_EQ(Expected, Actual) << "the resulting dump is:\n" << Actual;
+  }
+}
+} // namespace
Index: clang/unittests/Tooling/Syntax/CMakeLists.txt
===================================================================
--- clang/unittests/Tooling/Syntax/CMakeLists.txt
+++ clang/unittests/Tooling/Syntax/CMakeLists.txt
@@ -3,11 +3,12 @@
   Support
   )
 
-add_clang_unittest(TokensTest
+add_clang_unittest(SyntaxTest
+  TreeTest.cpp
   TokensTest.cpp
 )
 
-target_link_libraries(TokensTest
+target_link_libraries(SyntaxTest
   PRIVATE
   clangAST
   clangBasic
Index: clang/lib/Tooling/Syntax/Nodes.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Nodes.cpp
@@ -0,0 +1,36 @@
+//===- Nodes.cpp ----------------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/Syntax/Tree/Nodes.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Tree/NodeKind.h"
+
+using namespace clang;
+
+llvm::StringRef syntax::toString(syntax::NodeKind K) {
+  switch (K) {
+  case NodeKind::TranslationUnit:
+    return "translation-unit";
+  case NodeKind::RecoveryNode:
+    return "recovery-node";
+  case NodeKind::Leaf:
+    return "tokens";
+  case NodeKind::CompoundStatement:
+    return "compound-statment";
+  case NodeKind::TopLevelDecl:
+    return "top-level-decl";
+  }
+  llvm_unreachable("invalid NodeKind");
+}
+
+syntax::Leaf *syntax::CompoundStatement::lbrace() {
+  return findLeaf(tok::l_brace);
+}
+
+syntax::Leaf *syntax::CompoundStatement::rbrace() {
+  return findLeaf(tok::r_brace);
+}
Index: clang/lib/Tooling/Syntax/Cascade.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Cascade.cpp
@@ -0,0 +1,108 @@
+//===- Cascade.cpp --------------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/Syntax/Tree/Cascade.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Tree.h"
+#include "clang/Tooling/Syntax/Tree/NodeList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
+
+using namespace clang;
+
+void syntax::TreeNode::appendChildLowLevel(Arena &A, Node *Child) {
+  assert(Child->Parent == nullptr);
+  Child->Parent = this;
+  Children.push_back(A.allocator(), Child);
+}
+
+namespace {
+static void traverse(const syntax::Node *N,
+                     llvm::function_ref<void(const syntax::Node *)> Visit) {
+  if (auto *T = dyn_cast<syntax::TreeNode>(N)) {
+    for (auto *C : T->children())
+      traverse(C, Visit);
+  }
+  Visit(N);
+}
+static void dumpTokens(llvm::raw_ostream &OS, ArrayRef<syntax::Token> Tokens,
+                       const SourceManager &SM) {
+  assert(!Tokens.empty());
+  bool First = true;
+  for (const auto &T : Tokens) {
+    if (!First)
+      OS << " ";
+    else
+      First = false;
+    // Handle 'eof' separately, calling text() on it produces an empty string.
+    if (T.kind() == tok::eof) {
+      OS << "<eof>";
+      continue;
+    }
+    OS << T.text(SM);
+  }
+}
+
+static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N,
+                     const syntax::Arena &A, std::vector<bool> IndentMask) {
+  if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) {
+    dumpTokens(OS, L->tokens(), A.sourceManager());
+    OS << "\n";
+    return;
+  }
+
+  auto *T = llvm::cast<syntax::TreeNode>(N);
+  OS << toString(T->kind()) << "\n";
+
+  for (auto It = T->children().begin(); It != T->children().end(); ++It) {
+    for (bool Filled : IndentMask) {
+      if (Filled)
+        OS << "| ";
+      else
+        OS << "  ";
+    }
+    if (std::next(It) == T->children().end()) {
+      OS << "`-";
+      IndentMask.push_back(false);
+    } else {
+      OS << "|-";
+      IndentMask.push_back(true);
+    }
+    dumpTree(OS, *It, A, IndentMask);
+    IndentMask.pop_back();
+  }
+}
+} // namespace
+
+std::string syntax::Node::dump(const Arena &A) const {
+  std::string Str;
+  llvm::raw_string_ostream OS(Str);
+  dumpTree(OS, this, A, /*IndentMask=*/{});
+  return std::move(OS.str());
+}
+
+std::string syntax::Node::dumpTokens(const Arena &A) const {
+  std::string Storage;
+  llvm::raw_string_ostream OS(Storage);
+  traverse(this, [&](const syntax::Node *N) {
+    auto *L = llvm::dyn_cast<syntax::Leaf>(N);
+    if (!L)
+      return;
+    ::dumpTokens(OS, L->tokens(), A.sourceManager());
+  });
+  return OS.str();
+}
+
+syntax::Leaf *syntax::TreeNode::findLeaf(tok::TokenKind K) {
+  auto It = llvm::find_if(Children, [K](syntax::Node *N) {
+    auto *L = dyn_cast<Leaf>(N);
+    return L && L->tokens().size() == 1 && L->tokens().front().kind() == K;
+  });
+  assert(It != Children.end());
+  return cast<Leaf>(*It);
+}
Index: clang/lib/Tooling/Syntax/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/Syntax/CMakeLists.txt
+++ clang/lib/Tooling/Syntax/CMakeLists.txt
@@ -1,10 +1,16 @@
 set(LLVM_LINK_COMPONENTS Support)
 
 add_clang_library(clangToolingSyntax
+  BuildFromAST.cpp
+  Cascade.cpp
+  Arena.cpp
+  Nodes.cpp
   Tokens.cpp
 
   LINK_LIBS
+  clangAST
   clangBasic
   clangFrontend
   clangLex
+  clangToolingCore
   )
Index: clang/lib/Tooling/Syntax/BuildFromAST.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/BuildFromAST.cpp
@@ -0,0 +1,221 @@
+//===- BuildFromAST.cpp ---------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Tooling/Syntax/Arena.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree.h"
+#include "clang/Tooling/Syntax/Tree/Cascade.h"
+#include "clang/Tooling/Syntax/Tree/Nodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
+
+using namespace clang;
+
+/// A helper class for constructing the syntax tree while traversing a clang
+/// AST. The tree is built left-to-right and bottom-up. At each point of the
+/// traversal we maintain a list of currently processed nodes.
+class syntax::TreeBuilder {
+public:
+  TreeBuilder(syntax::Arena &Arena) : Arena(Arena) {}
+
+  syntax::Arena &arena() { return Arena; }
+
+  /// Populate children for \p New, assuming it covers the token range with
+  /// tokens covered located at \p First and \p Last (inclusive!).
+  /// All currently processed nodes which fall into the range between \p First
+  /// and \p Last are added as children of the new node.
+  void learnNode(SourceLocation Fist, SourceLocation Last,
+                 syntax::TreeNode *New);
+
+  /// Add a leaf node for a token starting at \p Loc.
+  void learnTokenNode(SourceLocation Loc, tok::TokenKind Kind);
+
+  /// Finish building the tree and create a root TranslationUnit node. No
+  /// further calls to learn* methods are allowed after this call.
+  void learnRoot();
+
+  /// Consume the root node.
+  syntax::TranslationUnit *root() && {
+    assert(Root);
+    assert(NodesInProgress.empty());
+    return Root;
+  }
+
+private:
+  struct RangedNode {
+    RangedNode(llvm::ArrayRef<syntax::Token> Tokens, syntax::Node *Node)
+        : Tokens(Tokens), Node(Node) {}
+
+    llvm::ArrayRef<syntax::Token> Tokens;
+    syntax::Node *Node;
+  };
+  const syntax::Token *findToken(SourceLocation TokLoc) const;
+
+  void learnNodeImpl(const syntax::Token *Begin, const syntax::Token *End,
+                     syntax::TreeNode *New);
+
+  syntax::Arena &Arena;
+  std::vector<RangedNode> NodesInProgress;
+  syntax::TranslationUnit *Root = nullptr;
+};
+
+namespace {
+class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
+public:
+  explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
+      : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
+
+  bool shouldTraversePostOrder() const { return true; }
+
+  bool TraverseDecl(Decl *D) {
+    if (!D || isa<TranslationUnitDecl>(D))
+      return RecursiveASTVisitor::TraverseDecl(D);
+    if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
+      return true; // Only build top-level decls for now, do not recurse.
+    return RecursiveASTVisitor::TraverseDecl(D);
+  }
+
+  bool TraverseCompoundStmt(CompoundStmt *S) {
+    Builder.learnTokenNode(S->getLBracLoc(), tok::l_brace);
+    Builder.learnTokenNode(S->getRBracLoc(), tok::r_brace);
+
+    Builder.learnNode(S->getBeginLoc(), S->getEndLoc(),
+                      arena().construct<syntax::CompoundStatement>());
+    // (!) we do not recurse into compound statements for now.
+    return true;
+  }
+
+  bool VisitDecl(Decl *D) {
+    assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
+           "expected a top-level decl");
+    assert(!D->isImplicit());
+    Builder.learnNode(D->getBeginLoc(), D->getEndLoc(),
+                      arena().construct<syntax::TopLevelDecl>());
+    return true;
+  }
+
+  bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
+    Builder.learnRoot();
+    // (!) we do not want to call VisitDecl() at this point.
+    return true;
+  }
+
+private:
+  /// A small helper to save some typing.
+  syntax::Arena &arena() { return Builder.arena(); }
+
+  syntax::TreeBuilder &Builder;
+  const LangOptions &LangOpts;
+};
+} // namespace
+
+void syntax::TreeBuilder::learnNode(SourceLocation First, SourceLocation Last,
+                                    syntax::TreeNode *New) {
+  assert(First.isValid());
+  assert(Last.isValid());
+  assert(First == Last ||
+         Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
+
+  learnNodeImpl(findToken(First), std::next(findToken(Last)), New);
+}
+
+void syntax::TreeBuilder::learnNodeImpl(const syntax::Token *Begin,
+                                        const syntax::Token *End,
+                                        syntax::TreeNode *New) {
+  auto FirstChild =
+      std::find_if(NodesInProgress.rbegin(), NodesInProgress.rend(),
+                   [&](const RangedNode &L) {
+                     if (&L.Tokens.front() < Begin) {
+                       assert(&L.Tokens.back() <= End);
+                       return true;
+                     }
+                     return false;
+                   })
+          .base();
+
+  syntax::Arena &A = arena();
+
+  auto *NextTok = Begin;
+  auto CoverUnknownTokens = [&](const syntax::Token *UpTo) {
+    if (NextTok == UpTo)
+      return;
+    assert(NextTok < UpTo);
+    auto *Recovery = A.construct<RecoveryNode>();
+    Recovery->appendChildLowLevel(
+        A, A.construct<syntax::Leaf>(llvm::makeArrayRef(NextTok, UpTo)));
+
+    New->appendChildLowLevel(A, Recovery);
+    NextTok = UpTo;
+  };
+  for (auto It = FirstChild; It != NodesInProgress.end(); ++It) {
+    // Add non-coverred ranges as recovery nodes.
+    CoverUnknownTokens(&It->Tokens.front());
+
+    New->appendChildLowLevel(arena(), It->Node);
+    NextTok = It->Tokens.end();
+  }
+  CoverUnknownTokens(End);
+
+  NodesInProgress.erase(FirstChild, NodesInProgress.end());
+  NodesInProgress.push_back(RangedNode{llvm::makeArrayRef(Begin, End), New});
+}
+
+void syntax::TreeBuilder::learnTokenNode(SourceLocation Loc,
+                                         tok::TokenKind Kind) {
+  auto *T = findToken(Loc);
+  assert(T->kind() == Kind);
+
+  assert(NodesInProgress.empty() ||
+         NodesInProgress.back().Tokens.end() <= T &&
+             "only allowed to add a node to the end");
+
+  auto Tokens = llvm::makeArrayRef(T, 1);
+  syntax::Leaf *New = arena().construct<syntax::Leaf>(Tokens);
+  NodesInProgress.push_back(RangedNode{Tokens, New});
+}
+
+void syntax::TreeBuilder::learnRoot() {
+  auto Tokens = Arena.tokenBuffer().expandedTokens();
+  // Add 'eof' as a separate token node, it should not go into recovery node.
+  learnTokenNode(Tokens.back().location(), tok::eof);
+
+  learnNode(Tokens.front().location(), Tokens.back().location(),
+            arena().construct<syntax::TranslationUnit>());
+
+  assert(NodesInProgress.size() == 1);
+  assert(NodesInProgress.front().Node->kind() ==
+         syntax::NodeKind::TranslationUnit);
+
+  Root = cast<syntax::TranslationUnit>(NodesInProgress.front().Node);
+  NodesInProgress.clear();
+}
+
+const syntax::Token *
+syntax::TreeBuilder::findToken(SourceLocation TokLoc) const {
+  auto Tokens = Arena.tokenBuffer().expandedTokens();
+  auto &SM = Arena.sourceManager();
+  auto It =
+      std::lower_bound(Tokens.begin(), Tokens.end(), TokLoc,
+                       [&](const syntax::Token &L, SourceLocation R) {
+                         return SM.isBeforeInTranslationUnit(L.location(), R);
+                       });
+  assert(It != Tokens.end());
+  assert(SM.getFileOffset(It->location()) == SM.getFileOffset(TokLoc));
+  return &*It;
+}
+
+syntax::TranslationUnit *
+syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
+  TreeBuilder Builder(A);
+  BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
+  return std::move(Builder).root();
+}
Index: clang/lib/Tooling/Syntax/Arena.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Arena.cpp
@@ -0,0 +1,29 @@
+//===- Arena.cpp ---------------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/Syntax/Arena.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/ArrayRef.h"
+
+using namespace clang;
+
+syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
+                     TokenBuffer Tokens)
+    : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(std::move(Tokens)) {}
+
+const clang::syntax::TokenBuffer &syntax::Arena::tokenBuffer() const {
+  return Tokens;
+}
+
+std::pair<FileID, llvm::ArrayRef<syntax::Token>>
+syntax::Arena::tokenizeBuffer(std::unique_ptr<llvm::MemoryBuffer> Input) {
+  auto FID = SourceMgr.createFileID(std::move(Input));
+  auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts));
+  assert(It.second && "duplicate FileID");
+  return {FID, It.first->second};
+}
Index: clang/include/clang/Tooling/Syntax/Tree/Nodes.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Tree/Nodes.h
@@ -0,0 +1,70 @@
+//===- Nodes.h - syntax nodes for C/C++ grammar constructs ----*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Syntax tree nodes for C, C++ and Objective-C grammar constructs.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_NODES_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_NODES_H
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Lex/Token.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree/Cascade.h"
+#include "clang/Tooling/Syntax/Tree/NodeList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+namespace clang {
+namespace syntax {
+
+/// A root node for a translation unit. Parent is always null.
+class TranslationUnit final : public TreeNode {
+public:
+  TranslationUnit() : TreeNode(NodeKind::TranslationUnit) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::TranslationUnit;
+  }
+};
+
+/// A tree node of an unknown kind, i.e. a syntax error or a construct from the
+/// clang AST without a syntax counterpart.
+/// These nodes can appear at any place in the syntax tree.
+class RecoveryNode final : public TreeNode {
+public:
+  RecoveryNode() : TreeNode(NodeKind::RecoveryNode) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::RecoveryNode;
+  }
+};
+
+/// FIXME: this node is temporary and will be replaced with nodes for various
+///        'declarations' and 'declarators' from the C/C++ grammar
+///
+/// Represents any top-level declaration. Only there to give the syntax tree a
+/// bit of structure until we implement syntax nodes for declarations and
+/// declarators.
+class TopLevelDecl final : public TreeNode {
+public:
+  TopLevelDecl() : TreeNode(NodeKind::TopLevelDecl) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::TopLevelDecl;
+  }
+};
+
+/// Represents a compound statement.
+class CompoundStatement final : public TreeNode {
+public:
+  CompoundStatement() : TreeNode(NodeKind::CompoundStatement) {}
+  static bool classof(const Node *N) {
+    return N->kind() == NodeKind::CompoundStatement;
+  }
+
+  Leaf *lbrace();
+  Leaf *rbrace();
+};
+
+} // namespace syntax
+} // namespace clang
+#endif
Index: clang/include/clang/Tooling/Syntax/Tree/NodeList.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Tree/NodeList.h
@@ -0,0 +1,111 @@
+//===- NodeList.h ---------------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// A helper for storing a mutable of nodes allocated in an arena.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_NODELIST_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_NODELIST_H
+
+#include "llvm/Support/Allocator.h"
+#include <algorithm>
+#include <cstddef>
+#include <type_traits>
+
+namespace clang {
+namespace syntax {
+namespace detail {
+template <class T> class BumpAllocVector;
+}
+class Node;
+/// Like vector<Node*>, but allocates all the memory in the BumpPtrAllocator.
+/// Can be dropped without running the destructor.
+using NodeList = detail::BumpAllocVector<Node *>;
+
+namespace detail {
+/// A vector that requests memory from BumpPtrAllocator and has a trivial
+/// destructor. Syntax tree nodes use it to store children.
+/// FIXME: make implementation more sanitizer-friendly to allow catching
+///        out-of-bounds accesses.
+template <class T> class BumpAllocVector {
+  // Make sure the elements do not require their destructors to run.
+  static_assert(std::is_trivially_destructible<T>::value,
+                "T must be trivially destructible");
+  // Assumption to simplify the implementation. We only store pointers, so this
+  // should hold.
+  static_assert(std::is_trivially_copyable<T>::value,
+                "T must be trivially copyable.");
+
+public:
+  T *begin() { return Begin; }
+  T *end() { return End; }
+
+  const T *begin() const { return Begin; }
+  const T *end() const { return End; }
+
+  bool empty() const { return begin() == end(); }
+  size_t size() const { return End - Begin; }
+  size_t capacity() const { return StorageEnd - Begin; }
+
+  void push_back(llvm::BumpPtrAllocator &A, T Element) {
+    if (StorageEnd == End)
+      Grow(A);
+    *End = Element;
+    ++End;
+  }
+
+  T *erase(T *Begin, T *End) {
+    std::ptrdiff_t ErasedSize = End - Begin;
+    for (auto *It = End; It != this->End; ++It) {
+      *Begin = *It;
+      ++Begin;
+    }
+    this->End -= ErasedSize;
+    return Begin;
+  }
+
+  T *insert(llvm::BumpPtrAllocator &A, T *Pos, T *Begin, T *End) {
+    unsigned ToAdd = End - Begin;
+    if (capacity() - size() < ToAdd)
+      Grow(A, size() + ToAdd);
+    std::rotate(Pos, Pos + ToAdd, End + ToAdd);
+    std::copy(Begin, End, Pos);
+    this->End += ToAdd;
+    return Pos;
+  }
+
+private:
+  void Grow(llvm::BumpPtrAllocator &A, unsigned MinSize = 0) {
+    size_t Size = End - Begin;
+
+    size_t NewCapacity = 2 * (StorageEnd - Begin);
+    if (NewCapacity == 0)
+      NewCapacity = 1;
+    while (NewCapacity < MinSize)
+      NewCapacity = 2 * NewCapacity;
+
+    T *NewStorage = A.Allocate<T>(NewCapacity);
+    std::copy(Begin, End, NewStorage);
+
+    A.Deallocate(Begin, StorageEnd - Begin);
+
+    Begin = NewStorage;
+    End = NewStorage + Size;
+    StorageEnd = NewStorage + NewCapacity;
+  }
+
+private:
+  T *Begin = nullptr;
+  T *End = nullptr;
+  T *StorageEnd = nullptr;
+};
+} // namespace detail
+
+} // namespace syntax
+} // namespace clang
+
+#endif
Index: clang/include/clang/Tooling/Syntax/Tree/NodeKind.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Tree/NodeKind.h
@@ -0,0 +1,26 @@
+//===- NodeKind.h - an enum listing nodes of a syntax tree ----*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_NODEKIND_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_NODEKIND_H
+#include "llvm/ADT/StringRef.h"
+namespace clang {
+namespace syntax {
+/// A kind of a syntax node, used for implementing casts.
+enum class NodeKind {
+  Leaf,
+  RecoveryNode,
+  TranslationUnit,
+  TopLevelDecl,
+  CompoundStatement,
+};
+/// For debugging purposes.
+llvm::StringRef toString(NodeKind K);
+
+} // namespace syntax
+} // namespace clang
+#endif
Index: clang/include/clang/Tooling/Syntax/Tree/Cascade.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Tree/Cascade.h
@@ -0,0 +1,104 @@
+//===- Cascade.h - structure of the syntax tree ---------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Defines the basic structure of the syntax tree. There are two kinds of nodes
+// in a tree:
+//   - leaf nodes correspond to a continuous subrange of tokens in expanded
+//     token stream.
+//   - composite (or tree) nodes correspond to language grammar constructs.
+//
+// The tree is initially built from an AST. Each node of a newly built tree
+// covers a continous subrange of expanded tokens (i.e. tokens after
+// preprocessing), the specific tokens coverered are stored in the leaf nodes of
+// a tree. A post-order traversal of a tree will visit leaf nodes in an order
+// corresponding the original order of expanded tokens.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_CASCADE_H
+
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Tooling/Syntax/Arena.h"
+#include "clang/Tooling/Syntax/Tree/NodeKind.h"
+#include "clang/Tooling/Syntax/Tree/NodeList.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace clang {
+namespace syntax {
+
+class TreeNode;
+class TreeBuilder;
+class Arena;
+
+/// A node in a syntax tree. Knows only about its parent and kind.
+class Node {
+public:
+  Node(NodeKind Kind) : Parent(nullptr), Kind(Kind) {}
+  NodeKind kind() const { return Kind; }
+
+  const TreeNode *parent() const { return Parent; }
+  TreeNode *parent() { return Parent; }
+
+  /// Dumps the structure of a subtree. For debugging and testing purposes.
+  std::string dump(const Arena &A) const;
+  /// Dumps the tokens forming this subtree.
+  std::string dumpTokens(const Arena &A) const;
+
+private:
+  // TreeNode is allowed to change the Parent link.
+  friend class TreeNode;
+  TreeNode *Parent;
+  NodeKind Kind;
+};
+
+/// A leaf node points to a consecutive range of tokens in the expanded token
+/// stream.
+class Leaf final : public Node {
+public:
+  Leaf(llvm::ArrayRef<syntax::Token> Tokens)
+      : Node(NodeKind::Leaf), Tokens(Tokens) {}
+
+  static bool classof(const Node *N) { return N->kind() == NodeKind::Leaf; }
+
+  llvm::ArrayRef<syntax::Token> tokens() const { return Tokens; }
+
+private:
+  llvm::ArrayRef<Token> Tokens;
+};
+
+/// A composite tree node that has children.
+class TreeNode : public Node {
+public:
+  using Node::Node;
+  static bool classof(const Node *N) { return N->kind() > NodeKind::Leaf; }
+
+  llvm::ArrayRef<Node *> children() {
+    return llvm::makeArrayRef(Children.begin(), Children.end());
+  }
+  llvm::ArrayRef<const Node *> children() const {
+    return llvm::makeArrayRef(Children.begin(), Children.end());
+  }
+
+protected:
+  /// Find a leaf with a single token of a corresponding kind.
+  /// EXPECTS: the leaf node of a corresponding kind is found.
+  /// ENSURES: the result is non-null.
+  syntax::Leaf *findLeaf(tok::TokenKind K);
+
+private:
+  /// Appends \p Child to the list of children and and sets the parent pointer.
+  /// A very low-level operation that does not check any invariants, only used
+  /// by TreeBuilder.
+  void appendChildLowLevel(Arena &A, Node *Child);
+  friend class TreeBuilder;
+
+  NodeList Children;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif
Index: clang/include/clang/Tooling/Syntax/Tree.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Tree.h
@@ -0,0 +1,39 @@
+//===- Tree.h - syntax trees ----------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Syntax tree is a parse tree for the C++ source code designed to allow
+// mutations.
+//
+// The code is divided in the following way:
+//   - 'Tree/Cascade.h' defines the basic structure of the syntax tree,
+//   - 'Tree/Nodes.h' defines the concrete node classes, corresponding to the
+//   - 'Tree.h' (this file) defines an operation to constuct a tree from an AST.
+//
+// This is still work in progress and highly experimental, we leave room for
+// ourselves to completely change the design and/or implementation.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H
+#define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
+
+#include "clang/AST/Decl.h"
+#include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Syntax/Arena.h"
+#include "clang/Tooling/Syntax/Tree/Cascade.h"
+#include "clang/Tooling/Syntax/Tree/Nodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace syntax {
+
+/// Build a syntax tree for the main file.
+TranslationUnit *buildSyntaxTree(Arena &A,
+                                 const clang::TranslationUnitDecl &TU);
+
+} // namespace syntax
+} // namespace clang
+#endif
Index: clang/include/clang/Tooling/Syntax/Arena.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Arena.h
@@ -0,0 +1,61 @@
+//===- Arena.h - memory arena and bookkeeping for syntax trees --- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_ARENA_H
+#define LLVM_CLANG_TOOLING_SYNTAX_ARENA_H
+
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace syntax {
+/// A memory arena for syntax trees. In addition, it also tracks the underlying
+/// token buffers, source manager, etc.
+class Arena {
+public:
+  Arena(SourceManager &SourceMgr, const LangOptions &LangOpts,
+        TokenBuffer Tokens);
+
+  const SourceManager &sourceManager() const { return SourceMgr; }
+  const LangOptions &langOptions() const { return LangOpts; }
+
+  const TokenBuffer &tokenBuffer() const;
+  llvm::BumpPtrAllocator &allocator() { return Allocator; }
+
+  /// Add \p Buffer to the underlying source manager, tokenize it and store the
+  /// resulting tokens. Useful when there is a need to materialize tokens that
+  /// were not written in user code.
+  std::pair<FileID, llvm::ArrayRef<syntax::Token>>
+  tokenizeBuffer(std::unique_ptr<llvm::MemoryBuffer> Buffer);
+
+  /// Construct a new syntax node of a specified kind. The memory for a node is
+  /// owned by the arena and will be freed the arena is destroyed.
+  template <class TNode, class... Args> TNode *construct(Args &&... As) {
+    static_assert(std::is_trivially_destructible<TNode>::value,
+                  "nodes should be trivially destructible");
+    return new (Allocator) TNode(std::forward<Args>(As)...);
+  }
+
+private:
+  SourceManager &SourceMgr;
+  const LangOptions &LangOpts;
+  TokenBuffer Tokens;
+  /// IDs and storage for additional tokenized files.
+  llvm::DenseMap<FileID, std::vector<syntax::Token>> ExtraTokens;
+  /// Keeps all the allocated nodes and their intermediate data structures.
+  llvm::BumpPtrAllocator Allocator;
+};
+
+} // namespace syntax
+} // namespace clang
+
+#endif
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to