[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein updated this revision to Diff 443922. hokein added a comment. remove all TokenBufferTokenManager cast usage, make it as a contract in the APIs. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 Files: clang/include/clang/Tooling/Syntax/BuildTree.h clang/include/clang/Tooling/Syntax/Mutations.h clang/include/clang/Tooling/Syntax/Nodes.h clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h clang/include/clang/Tooling/Syntax/TokenManager.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/CMakeLists.txt clang/lib/Tooling/Syntax/ComputeReplacements.cpp clang/lib/Tooling/Syntax/Mutations.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp clang/lib/Tooling/Syntax/Tree.cpp clang/tools/clang-check/ClangCheck.cpp clang/unittests/Tooling/Syntax/BuildTreeTest.cpp clang/unittests/Tooling/Syntax/MutationsTest.cpp clang/unittests/Tooling/Syntax/SynthesisTest.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp clang/unittests/Tooling/Syntax/TreeTestBase.h Index: clang/unittests/Tooling/Syntax/TreeTestBase.h === --- clang/unittests/Tooling/Syntax/TreeTestBase.h +++ clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -17,6 +17,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/StringRef.h" @@ -51,6 +52,7 @@ std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr TB; + std::unique_ptr TM; std::unique_ptr Arena; }; Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp === --- clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,14 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, + const TokenBufferTokenManager ) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) -return llvm::makeArrayRef(L->getToken(), 1); +return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), -T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), +STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -70,23 +71,26 @@ public: BuildSyntaxTree(syntax::TranslationUnit *, std::unique_ptr , +std::unique_ptr , std::unique_ptr , std::unique_ptr Tokens) -: Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { +: Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext ) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); - Root = syntax::buildSyntaxTree(*Arena, Ctx); + TM = std::make_unique( + *TB, Ctx.getLangOpts(), Ctx.getSourceManager()); + Arena = std::make_unique(); + Root = syntax::buildSyntaxTree(*Arena, *TM, Ctx); } private: syntax::TranslationUnit * std::unique_ptr +std::unique_ptr std::unique_ptr std::unique_ptr Tokens; }; @@ -94,21 +98,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *, + std::unique_ptr , std::unique_ptr , std::unique_ptr ) -: Root(Root), TB(TB), Arena(Arena) {} +: Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance , StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TB, TM, Arena, std::move(Tokens)); } private:
[PATCH] D128411: [syntax] Introduce a BaseToken class.
sammccall added inline comments. Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:370 + TreeBuilder(syntax::Arena ) + : Arena(Arena), STM(cast(Arena.getTokenManager())), +Pending(Arena, STM.tokenBuffer()) { hokein wrote: > sammccall wrote: > > need changes to the public API to make this cast valid > Are you suggesting to change all public APIs where there is such a cast usage? > > If yes, this seems not a trivial change, I think at least we will change all > APIs (`buildSyntaxTree`, `createLeaf`, `createTree`, > `deepCopyExpandingMacros`) in `BuildTree.h` (by adding a new > `TokenBufferTokenManager` parameter). > And the `Arena` probably doesn't need to have a `TokenManager` field (it > could be simplified as a single `BumpPtrAllocator`), as the TokenManager is > passed in parallel with the Arena. > > I'm happy to do the change, but IMO, the current version doesn't seem too bad > for me (if we pass an non-SyntaxTokenManager, it will trigger an assertion in > debug mode). > Are you suggesting to change all public APIs where there is such a cast usage? Yes, at the very least they should document the requirement ("this arena must use a TokenBuffer"). An unchecked downcast with no indication on the public API that a specific subclass is required just looks like a bug. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein added inline comments. Comment at: clang/include/clang/Tooling/Syntax/SyntaxTokenManager.h:20 +/// It tracks the underlying token buffers, source manager, etc. +class SyntaxTokenManager : public TokenManager { +public: sammccall wrote: > I don't think "syntax" in "syntax token manager" is particularly > disambiguating here, both TokenBuffer and TokenManager are part of syntax, so > it's not clear what it refers to (and it doens't have any obvious > plain-english meaning). > > Maybe some combination like `TokenBufferTokenManager` or `BufferTokenManager`. > In fact I think best is `TokenBuffer::TokenManager`, still defined in a > separate header, though I'm not sure if you think that's too weird. Renamed to TokenBufferTokenManager. `BufferTokenManager` name is short, but it has `BufferToken` prefix, which seems confusing with `TokenBuffer`. `TokenBuffer::TokenManager` is weird to me, and doesn't reflect the layering IMO Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:370 + TreeBuilder(syntax::Arena ) + : Arena(Arena), STM(cast(Arena.getTokenManager())), +Pending(Arena, STM.tokenBuffer()) { sammccall wrote: > need changes to the public API to make this cast valid Are you suggesting to change all public APIs where there is such a cast usage? If yes, this seems not a trivial change, I think at least we will change all APIs (`buildSyntaxTree`, `createLeaf`, `createTree`, `deepCopyExpandingMacros`) in `BuildTree.h` (by adding a new `TokenBufferTokenManager` parameter). And the `Arena` probably doesn't need to have a `TokenManager` field (it could be simplified as a single `BumpPtrAllocator`), as the TokenManager is passed in parallel with the Arena. I'm happy to do the change, but IMO, the current version doesn't seem too bad for me (if we pass an non-SyntaxTokenManager, it will trigger an assertion in debug mode). Comment at: clang/lib/Tooling/Syntax/ComputeReplacements.cpp:94 const syntax::TranslationUnit ) { - const auto = A.getTokenBuffer(); - const auto = A.getSourceManager(); + const auto = llvm::cast(A.getTokenManager()); + const auto = STM.tokenBuffer(); sammccall wrote: > Need a change to the public interface to guarantee this cast will succeed. > The cleanest would be just to take the SyntaxTokenManager as a param (moving > the cast to the call site - I don't think Arena is otherwise used for > anything). > > Failing that we at least need to update the contract Done, this is a trivial change. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein updated this revision to Diff 443737. hokein added a comment. more update Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 Files: clang/include/clang/Tooling/Syntax/Mutations.h clang/include/clang/Tooling/Syntax/Nodes.h clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h clang/include/clang/Tooling/Syntax/TokenManager.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/CMakeLists.txt clang/lib/Tooling/Syntax/ComputeReplacements.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp clang/lib/Tooling/Syntax/Tree.cpp clang/tools/clang-check/ClangCheck.cpp clang/unittests/Tooling/Syntax/BuildTreeTest.cpp clang/unittests/Tooling/Syntax/MutationsTest.cpp clang/unittests/Tooling/Syntax/SynthesisTest.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp clang/unittests/Tooling/Syntax/TreeTestBase.h Index: clang/unittests/Tooling/Syntax/TreeTestBase.h === --- clang/unittests/Tooling/Syntax/TreeTestBase.h +++ clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -17,6 +17,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/StringRef.h" @@ -51,6 +52,7 @@ std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr TB; + std::unique_ptr TM; std::unique_ptr Arena; }; Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp === --- clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,14 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, + const TokenBufferTokenManager ) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) -return llvm::makeArrayRef(L->getToken(), 1); +return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), -T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), +STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -70,23 +71,26 @@ public: BuildSyntaxTree(syntax::TranslationUnit *, std::unique_ptr , +std::unique_ptr , std::unique_ptr , std::unique_ptr Tokens) -: Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { +: Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext ) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); + TM = std::make_unique( + *TB, Ctx.getLangOpts(), Ctx.getSourceManager()); + Arena = std::make_unique(*TM); Root = syntax::buildSyntaxTree(*Arena, Ctx); } private: syntax::TranslationUnit * std::unique_ptr +std::unique_ptr std::unique_ptr std::unique_ptr Tokens; }; @@ -94,21 +98,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *, + std::unique_ptr , std::unique_ptr , std::unique_ptr ) -: Root(Root), TB(TB), Arena(Arena) {} +: Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance , StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TB, TM, Arena, std::move(Tokens)); } private: syntax::TranslationUnit * +std::unique_ptr std::unique_ptr std::unique_ptr }; @@ -149,7 +155,7 @@ Compiler.setSourceManager(SourceMgr.get()); syntax::TranslationUnit *Root = nullptr; -
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein updated this revision to Diff 443736. hokein marked 7 inline comments as done. hokein added a comment. address comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 Files: clang/include/clang/Tooling/Syntax/Mutations.h clang/include/clang/Tooling/Syntax/Nodes.h clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h clang/include/clang/Tooling/Syntax/TokenManager.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/CMakeLists.txt clang/lib/Tooling/Syntax/ComputeReplacements.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp clang/lib/Tooling/Syntax/Tree.cpp clang/tools/clang-check/ClangCheck.cpp clang/unittests/Tooling/Syntax/BuildTreeTest.cpp clang/unittests/Tooling/Syntax/MutationsTest.cpp clang/unittests/Tooling/Syntax/SynthesisTest.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp clang/unittests/Tooling/Syntax/TreeTestBase.h Index: clang/unittests/Tooling/Syntax/TreeTestBase.h === --- clang/unittests/Tooling/Syntax/TreeTestBase.h +++ clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -17,6 +17,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/StringRef.h" @@ -51,6 +52,7 @@ std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr TB; + std::unique_ptr TM; std::unique_ptr Arena; }; Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp === --- clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,14 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, + const TokenBufferTokenManager ) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) -return llvm::makeArrayRef(L->getToken(), 1); +return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), -T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), +STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -70,23 +71,26 @@ public: BuildSyntaxTree(syntax::TranslationUnit *, std::unique_ptr , +std::unique_ptr , std::unique_ptr , std::unique_ptr Tokens) -: Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { +: Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext ) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); + TM = std::make_unique( + *TB, Ctx.getLangOpts(), Ctx.getSourceManager()); + Arena = std::make_unique(*TM); Root = syntax::buildSyntaxTree(*Arena, Ctx); } private: syntax::TranslationUnit * std::unique_ptr +std::unique_ptr std::unique_ptr std::unique_ptr Tokens; }; @@ -94,21 +98,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *, + std::unique_ptr , std::unique_ptr , std::unique_ptr ) -: Root(Root), TB(TB), Arena(Arena) {} +: Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance , StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TB, TM, Arena, std::move(Tokens)); } private: syntax::TranslationUnit * +std::unique_ptr std::unique_ptr std::unique_ptr }; @@ -149,7 +155,7 @@ Compiler.setSourceManager(SourceMgr.get());
[PATCH] D128411: [syntax] Introduce a BaseToken class.
sammccall added inline comments. Comment at: clang/include/clang/Tooling/Syntax/Nodes.h:24 #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" I think Token and TokenKinds are also no longer needed? Comment at: clang/include/clang/Tooling/Syntax/SyntaxTokenManager.h:20 +/// It tracks the underlying token buffers, source manager, etc. +class SyntaxTokenManager : public TokenManager { +public: I don't think "syntax" in "syntax token manager" is particularly disambiguating here, both TokenBuffer and TokenManager are part of syntax, so it's not clear what it refers to (and it doens't have any obvious plain-english meaning). Maybe some combination like `TokenBufferTokenManager` or `BufferTokenManager`. In fact I think best is `TokenBuffer::TokenManager`, still defined in a separate header, though I'm not sure if you think that's too weird. Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:14 +// implementation. It enables producers (e.g. clang pseudoparser) to produce a +// syntax-tree with a different underlying token implementation. +// unclear: different from what? it's not clear what "enables" means if there's no default. Maybe replace the sentence with: "For example, a TokenBuffer captured from a clang parse may track macro expansions and associate tokens with clang's SourceManager, while a pseudo-parser would use a flat array of raw-lexed tokens in memory." Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:38 + using Key = uintptr_t; + /// Gets the text of token identified by the key. + virtual llvm::StringRef getText(Key K) const = 0; This is not a useful comment, either remove it or add more content to make it useful. In particular the guarantees (or lack thereof) of exactly what this text is would be helpful. (This is some source code that would produce this token, though it may differ from exactly what was spelled in the file when preprocessing is involved)? Comment at: clang/include/clang/Tooling/Syntax/Tree.h:9 // Defines the basic structure of the syntax tree. There are two kinds of nodes: -// - leaf nodes correspond to a token in the expanded token stream, +// - leaf nodes correspond to a token key in the token manager // - tree nodes correspond to language grammar constructs. this is an implementation detail. At a high level, leaf nodes correspond to tokens. I'd just delete "expanded" from the original comment Comment at: clang/include/clang/Tooling/Syntax/Tree.h:46 private: - SourceManager - const LangOptions - const TokenBuffer - /// IDs and storage for additional tokenized files. - llvm::DenseMap> ExtraTokens; + // Manage all token-related stuff. + TokenManager& TokenMgr; this is not a helpful comment, just remove it? Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:370 + TreeBuilder(syntax::Arena ) + : Arena(Arena), STM(cast(Arena.getTokenManager())), +Pending(Arena, STM.tokenBuffer()) { need changes to the public API to make this cast valid Comment at: clang/lib/Tooling/Syntax/ComputeReplacements.cpp:94 const syntax::TranslationUnit ) { - const auto = A.getTokenBuffer(); - const auto = A.getSourceManager(); + const auto = llvm::cast(A.getTokenManager()); + const auto = STM.tokenBuffer(); Need a change to the public interface to guarantee this cast will succeed. The cleanest would be just to take the SyntaxTokenManager as a param (moving the cast to the call site - I don't think Arena is otherwise used for anything). Failing that we at least need to update the contract Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein added inline comments. Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:37 + + // FIXME: add an interface for getting token kind. +}; sammccall wrote: > I wouldn't want to prejudge this: this is a very basic attribute similar to > kind/role, and we may want to store it in Leaf and avoid the virtual stuff. > > There's certainly enough space, e.g. of the current 16-bit `kind` use the top > bit to denote leaf-or-not and the bottom 15 bits to store kind-or-tokenkind. This sounds good. Adjust the FIXME. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:463 +/// It tracks the underlying token buffers, source manager, etc. +class SyntaxTokenManager : public TokenManager { +public: ilya-biryukov wrote: > sammccall wrote: > > conceptually this is just "TokenBuffer implements TokenManager" > > > > The main reason I can see not to actually write that is to avoid the > > dependency from TokenBuffer (tokens library) to TokenManager (syntax > > library). But here you've added that dependency anyway. > > > > So I think we'd be better either with `TokenBuffer : TokenManager` or > > moving this class to its own header. > I would argue they should be separate concepts. > - `TokenBuffer` is about storing tokens and mapping between expanded and > spelled token streams. > - `SyntaxTokenManager` is about implementing relevant certain operations on > `syntax::Token` and hiding the actual token type. > Conceptually those things are different and decoupling them allows for more > flexibility and allows reasoning about them independently. In particular, one > could imagine having a `SyntaxTokenManager` without a `TokenBuffer` if we do > not need to deal with two streams of tokens. > > I would suggest keeping `SyntaxTokenManager` as a separate class. > I would suggest keeping SyntaxTokenManager as a separate class. +1. Moved to a separate file. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:465 +public: + SyntaxTokenManager(SourceManager , const LangOptions , + const TokenBuffer ) sammccall wrote: > no need to take SourceManager, TokenBuffer already includes it. > LangOpts is unused here. > no need to take SourceManager, TokenBuffer already includes it. The SourceMgr can be mutated by the class (it stores the underlying tokens for `ExtraTokens`), while the SourceManager in TokenBuffer is immutable. > LangOpts is unused here. It is used in SyntaxTokenManager::lexBuffer. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:475 +assert(Token); +// Handle 'eof' separately, calling text() on it produces an empty string. +if (Token->kind() == tok::eof) sammccall wrote: > Empty string seems like the correct return value here to me. > If you want a special case for dump, I think that belongs in dump(). > > If this is because we currently provide no way to get the token kind, then > this should be a FIXME Yeah, this special case is for the Leaf node dump. Added a FIXME. (I even double whether we need this special case at all, do we really want to build a Leaf node for eof token?) Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:504 + /// IDs and storage for additional tokenized files. + llvm::DenseMap> ExtraTokens; +}; sammccall wrote: > aren't we trying to store these on the syntax arena? > We never do lookups into this map, maybe lexbuffer just allocates storage on > the arena('s allocator) instead of using this map? > aren't we trying to store these on the syntax arena? We could do that, but I'd try to avoid that. Now the allocator of syntax::Arena is a storage for syntax-tree nodes only. Allocation for token-related stuff is on the `SyntaxTokenManager`. > We never do lookups into this map, maybe lexbuffer just allocates storage on > the arena('s allocator) instead of using this map? This map is moved from the syntax::Arena. You're right, there is no usage of the key at the moment, and the only use-case is to create a syntax leaf node that not backed by the source code (for refactoring usecase), it is unclear whether we will use it in the future. I will keep it as-is (this is not the scope of this patch). Comment at: clang/unittests/Tooling/Syntax/TreeTestBase.cpp:116 syntax::TranslationUnit * +std::unique_ptr std::unique_ptr ilya-biryukov wrote: > NIT: it´s not breaking anything now, but I suggest putting SyntaxTokenManager > after TokenBuffer. > The reason is that it´s the right destruction order, TokenManager has > references to TokenBuffer, so it could potentially access it in destructor > some time in the future (e.g. imagine asserting something on tokens). > > Not that it actually breaks today, but seems like a potential surprising bug > in the future if we happen to refactor code in a
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein updated this revision to Diff 443173. hokein marked 9 inline comments as done. hokein added a comment. Herald added a subscriber: mgorny. address review comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 Files: clang/include/clang/Tooling/Syntax/Nodes.h clang/include/clang/Tooling/Syntax/SyntaxTokenManager.h clang/include/clang/Tooling/Syntax/TokenManager.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/CMakeLists.txt clang/lib/Tooling/Syntax/ComputeReplacements.cpp clang/lib/Tooling/Syntax/SyntaxTokenManager.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/Tree.cpp clang/tools/clang-check/ClangCheck.cpp clang/unittests/Tooling/Syntax/BuildTreeTest.cpp clang/unittests/Tooling/Syntax/SynthesisTest.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp clang/unittests/Tooling/Syntax/TreeTestBase.h Index: clang/unittests/Tooling/Syntax/TreeTestBase.h === --- clang/unittests/Tooling/Syntax/TreeTestBase.h +++ clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -17,6 +17,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/SyntaxTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/StringRef.h" @@ -51,6 +52,7 @@ std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr TB; + std::unique_ptr TM; std::unique_ptr Arena; }; Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp === --- clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -22,6 +22,7 @@ #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/SyntaxTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" @@ -35,13 +36,13 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, const SyntaxTokenManager ) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) -return llvm::makeArrayRef(L->getToken(), 1); +return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), -T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), +STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -70,23 +71,26 @@ public: BuildSyntaxTree(syntax::TranslationUnit *, std::unique_ptr , +std::unique_ptr , std::unique_ptr , std::unique_ptr Tokens) -: Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { +: Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext ) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); + TM = std::make_unique(*TB, Ctx.getLangOpts(), +Ctx.getSourceManager()); + Arena = std::make_unique(*TM); Root = syntax::buildSyntaxTree(*Arena, Ctx); } private: syntax::TranslationUnit * std::unique_ptr +std::unique_ptr std::unique_ptr std::unique_ptr Tokens; }; @@ -94,21 +98,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *, + std::unique_ptr , std::unique_ptr , std::unique_ptr ) -: Root(Root), TB(TB), Arena(Arena) {} +: Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance , StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TB, TM, Arena,
[PATCH] D128411: [syntax] Introduce a BaseToken class.
ilya-biryukov added inline comments. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:463 +/// It tracks the underlying token buffers, source manager, etc. +class SyntaxTokenManager : public TokenManager { +public: sammccall wrote: > conceptually this is just "TokenBuffer implements TokenManager" > > The main reason I can see not to actually write that is to avoid the > dependency from TokenBuffer (tokens library) to TokenManager (syntax > library). But here you've added that dependency anyway. > > So I think we'd be better either with `TokenBuffer : TokenManager` or moving > this class to its own header. I would argue they should be separate concepts. - `TokenBuffer` is about storing tokens and mapping between expanded and spelled token streams. - `SyntaxTokenManager` is about implementing relevant certain operations on `syntax::Token` and hiding the actual token type. Conceptually those things are different and decoupling them allows for more flexibility and allows reasoning about them independently. In particular, one could imagine having a `SyntaxTokenManager` without a `TokenBuffer` if we do not need to deal with two streams of tokens. I would suggest keeping `SyntaxTokenManager` as a separate class. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
sammccall added inline comments. Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:9 +// +// Defines Token interfaces for the syntax-tree, decoupling the syntax-tree from +// the TokenBuffer. It enables producers (e.g. clang pseudoparser) to produce a It's important that we have comments explaining what the concept is *now*, rather than how it changes the code structure from the previous state (decouples tokenbuffer, enables pseudoparser). (I think this comment is actually fine, but be careful when writing the class comment for TokenManager) Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:33 + /// The syntax-tree Leaf node holds a Key. + using Key = const void *; + /// Gets the text of token identified by the key. hokein wrote: > ilya-biryukov wrote: > > I have just realized that we were discussing having opaque index as a key, > > but there may also be tokens in the syntax tree that are not backed by the > > `TokenBuffer`. > > Those can be synthesized, e.g. imagine someone wants to change `!(A && B)` > > to `!A || !B`. They will need to synthesize at least the `||` token as it's > > not in the source code. There is a way to do this now and it prohibits the > > use of an index to the `TokenBuffer`. > > > > So having the opaque pointer is probably ok for now, it should enable the > > pseudo-parser to build syntax trees. > > We might want to add an operation to synthesize tokens into the > > `TokenManager` at some point, but that's a discussion for another day. > > > Those can be synthesized, e.g. imagine someone wants to change !(A && B) to > > !A || !B. They will need to synthesize at least the || token as it's not in > > the source code. There is a way to do this now and it prohibits the use of > > an index to the TokenBuffer. > > Yes, this is the exact reason why the Key is an opaque pointer, my first > attempt was to use an index integer, but failed -- we already have some APIs > doing this stuff (see `createLeaf` in BuildTree.h), the token can be a > synthesized token backed up by the SourceManager... > > Personally, I don't like the Key to be an opaque pointer as well, but > considering the effort, it seems to be the best approach so far -- it enables > the pseudoparser to build syntax trees with a different Token implementation > while keeping the rest syntax stuff unchanged. > > > We might want to add an operation to synthesize tokens into the > > TokenManager at some point, but that's a discussion for another day. > > Agree, we will encounter this in the future, but we're still far away from > there (the layering mutation/syntax-tree is not perfect at the moment, > mutation still depends on the TokenBuffer). And our initial application of > syntax-tree in pseudoparser focuses on the read use-case, we should be fine > now. Consider `uintptr_t` instead, which more naturally supports both approaches. (Stashing an integer in a void* is incredibly weird) Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:37 + + // FIXME: add an interface for getting token kind. +}; I wouldn't want to prejudge this: this is a very basic attribute similar to kind/role, and we may want to store it in Leaf and avoid the virtual stuff. There's certainly enough space, e.g. of the current 16-bit `kind` use the top bit to denote leaf-or-not and the bottom 15 bits to store kind-or-tokenkind. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:463 +/// It tracks the underlying token buffers, source manager, etc. +class SyntaxTokenManager : public TokenManager { +public: conceptually this is just "TokenBuffer implements TokenManager" The main reason I can see not to actually write that is to avoid the dependency from TokenBuffer (tokens library) to TokenManager (syntax library). But here you've added that dependency anyway. So I think we'd be better either with `TokenBuffer : TokenManager` or moving this class to its own header. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:465 +public: + SyntaxTokenManager(SourceManager , const LangOptions , + const TokenBuffer ) no need to take SourceManager, TokenBuffer already includes it. LangOpts is unused here. Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:475 +assert(Token); +// Handle 'eof' separately, calling text() on it produces an empty string. +if (Token->kind() == tok::eof) Empty string seems like the correct return value here to me. If you want a special case for dump, I think that belongs in dump(). If this is because we currently provide no way to get the token kind, then this should be a FIXME Comment at: clang/include/clang/Tooling/Syntax/Tokens.h:487 + const SourceManager () const { return SM; } + const
[PATCH] D128411: [syntax] Introduce a BaseToken class.
ilya-biryukov added inline comments. Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:569 struct Forest { -Forest(syntax::Arena ) { - assert(!A.getTokenBuffer().expandedTokens().empty()); - assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); +Forest(syntax::Arena , const syntax::SyntaxTokenManager ) { + assert(!STM.getTokenBuffer().expandedTokens().empty()); Could we accept a TokenBuffer here directly? If TokenManager is needed, we can use it directly in the arena. Comment at: clang/lib/Tooling/Syntax/BuildTree.cpp:625 /// Add \p Node to the forest and attach child nodes based on \p Tokens. -void foldChildren(const syntax::Arena , ArrayRef Tokens, +void foldChildren(const syntax::SyntaxTokenManager , ArrayRef Tokens, syntax::Tree *Node) { Same suggestion here: accept TokenBuffer instead of TokenManager Comment at: clang/lib/Tooling/Syntax/Tree.cpp:271 + // FIXME: can be fixed by adding an tok::Kind in the Leaf node. + // assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); } hokein wrote: > ilya-biryukov wrote: > > Maybe add `TokenManager::getKind(Key)` right away and remove this FIXME. > > This should as simple as `cast(T)->Kind`, right? Or am I > > missing some complications? > Yeah, the main problem is that we don't have the `TokenManager` object in the > `syntax::Node`, we somehow need to pass it (e.g. a function parameter), which > seems a heavy change. I'm not sure it is worth for this small assert. That makes sense. WDYT about the alternative fix: to pass ̀TokenManager` to `assertInvariants`? Not necessary to do it now, just thinking about changing the FIXME Comment at: clang/unittests/Tooling/Syntax/TreeTestBase.cpp:116 syntax::TranslationUnit * +std::unique_ptr std::unique_ptr NIT: it´s not breaking anything now, but I suggest putting SyntaxTokenManager after TokenBuffer. The reason is that it´s the right destruction order, TokenManager has references to TokenBuffer, so it could potentially access it in destructor some time in the future (e.g. imagine asserting something on tokens). Not that it actually breaks today, but seems like a potential surprising bug in the future if we happen to refactor code in a certain way. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein added inline comments. Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:33 + /// The syntax-tree Leaf node holds a Key. + using Key = const void *; + /// Gets the text of token identified by the key. ilya-biryukov wrote: > I have just realized that we were discussing having opaque index as a key, > but there may also be tokens in the syntax tree that are not backed by the > `TokenBuffer`. > Those can be synthesized, e.g. imagine someone wants to change `!(A && B)` to > `!A || !B`. They will need to synthesize at least the `||` token as it's not > in the source code. There is a way to do this now and it prohibits the use of > an index to the `TokenBuffer`. > > So having the opaque pointer is probably ok for now, it should enable the > pseudo-parser to build syntax trees. > We might want to add an operation to synthesize tokens into the > `TokenManager` at some point, but that's a discussion for another day. > Those can be synthesized, e.g. imagine someone wants to change !(A && B) to > !A || !B. They will need to synthesize at least the || token as it's not in > the source code. There is a way to do this now and it prohibits the use of an > index to the TokenBuffer. Yes, this is the exact reason why the Key is an opaque pointer, my first attempt was to use an index integer, but failed -- we already have some APIs doing this stuff (see `createLeaf` in BuildTree.h), the token can be a synthesized token backed up by the SourceManager... Personally, I don't like the Key to be an opaque pointer as well, but considering the effort, it seems to be the best approach so far -- it enables the pseudoparser to build syntax trees with a different Token implementation while keeping the rest syntax stuff unchanged. > We might want to add an operation to synthesize tokens into the TokenManager > at some point, but that's a discussion for another day. Agree, we will encounter this in the future, but we're still far away from there (the layering mutation/syntax-tree is not perfect at the moment, mutation still depends on the TokenBuffer). And our initial application of syntax-tree in pseudoparser focuses on the read use-case, we should be fine now. Comment at: clang/lib/Tooling/Syntax/Tree.cpp:271 + // FIXME: can be fixed by adding an tok::Kind in the Leaf node. + // assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); } ilya-biryukov wrote: > Maybe add `TokenManager::getKind(Key)` right away and remove this FIXME. > This should as simple as `cast(T)->Kind`, right? Or am I > missing some complications? Yeah, the main problem is that we don't have the `TokenManager` object in the `syntax::Node`, we somehow need to pass it (e.g. a function parameter), which seems a heavy change. I'm not sure it is worth for this small assert. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
ilya-biryukov added inline comments. Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:33 + /// The syntax-tree Leaf node holds a Key. + using Key = const void *; + /// Gets the text of token identified by the key. I have just realized that we were discussing having opaque index as a key, but there may also be tokens in the syntax tree that are not backed by the `TokenBuffer`. Those can be synthesized, e.g. imagine someone wants to change `!(A && B)` to `!A || !B`. They will need to synthesize at least the `||` token as it's not in the source code. There is a way to do this now and it prohibits the use of an index to the `TokenBuffer`. So having the opaque pointer is probably ok for now, it should enable the pseudo-parser to build syntax trees. We might want to add an operation to synthesize tokens into the `TokenManager` at some point, but that's a discussion for another day. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
ilya-biryukov added inline comments. Comment at: clang/include/clang/Tooling/Syntax/TokenManager.h:23 + +/// Base token interfaces for the syntax-tree. +class TokenManager { NIT: maybe explain the `TokenManager` concept here, the comment seems to be a leftover from the previous revision. Comment at: clang/lib/Tooling/Syntax/Tree.cpp:271 + // FIXME: can be fixed by adding an tok::Kind in the Leaf node. + // assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); } Maybe add `TokenManager::getKind(Key)` right away and remove this FIXME. This should as simple as `cast(T)->Kind`, right? Or am I missing some complications? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein added a comment. I'm quite happy about the interfaces now, it should be in a good shape for the API review (would be nice to get some initial feedback before I do further cleanup). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein updated this revision to Diff 439705. hokein added a comment. Revised to the TokenManager approach: - Inroduce a Base Token class (TokenManager) for syntax-tree, the motivation is to allow using different underlying token implementation in syntax-tree - Decouple the syntax-tree from the TokenBuffer: - syntax-tree structure (Tree.h) doesn't depend on the TokenBuffer, SourceManager Source location etc, it communicates with TokenManager interfaces; - syntax-tree Arena is simpler, the token-managing responsiblity is transferred to TokenManager; - in SyntaxTree directory, we implement a TokenBuffer-based SyntaxTokenManager, which mangues all token-related stuff - For the mutation/replacement computation APIs, currently they only work on a TokenBuffer-based token manager. Asssertion will be raised if it is not satisfied. It is an NFC change Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 Files: clang/include/clang/Tooling/Syntax/Nodes.h clang/include/clang/Tooling/Syntax/TokenManager.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/ComputeReplacements.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/Tokens.cpp clang/lib/Tooling/Syntax/Tree.cpp clang/tools/clang-check/ClangCheck.cpp clang/unittests/Tooling/Syntax/BuildTreeTest.cpp clang/unittests/Tooling/Syntax/SynthesisTest.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp clang/unittests/Tooling/Syntax/TreeTestBase.h Index: clang/unittests/Tooling/Syntax/TreeTestBase.h === --- clang/unittests/Tooling/Syntax/TreeTestBase.h +++ clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -50,6 +50,7 @@ new SourceManager(*Diags, *FileMgr); std::shared_ptr Invocation; // Set after calling buildTree(). + std::unique_ptr TM; std::unique_ptr TB; std::unique_ptr Arena; }; Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp === --- clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,13 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, const SyntaxTokenManager ) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) -return llvm::makeArrayRef(L->getToken(), 1); +return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), -T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), +STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -69,23 +69,26 @@ class BuildSyntaxTree : public ASTConsumer { public: BuildSyntaxTree(syntax::TranslationUnit *, +std::unique_ptr , std::unique_ptr , std::unique_ptr , std::unique_ptr Tokens) -: Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { +: Root(Root), TM(TM), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext ) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); + TM = std::make_unique(Ctx.getSourceManager(), +Ctx.getLangOpts(), *TB); + Arena = std::make_unique(*TM); Root = syntax::buildSyntaxTree(*Arena, Ctx); } private: syntax::TranslationUnit * +std::unique_ptr std::unique_ptr std::unique_ptr std::unique_ptr Tokens; @@ -94,21 +97,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *, + std::unique_ptr , std::unique_ptr , std::unique_ptr ) -: Root(Root), TB(TB), Arena(Arena) {} +: Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance , StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return
[PATCH] D128411: [syntax] Introduce a BaseToken class.
sammccall added a comment. As discussed offline this has some problems: - putting virtual methods on BaseToken gives it a vtable, which makes it (and syntax::Token) large - being able to use ArrayRef but not ArrayRef is a bit weird - unusual uses of inheritance can be hard to reason about We suggested rather having Leaf store an opaque ID. Callers who know what kind of tokens are in use can use this to associate with the original token. For generic use (e.g dump()), we can have a TokenManager interface which provides the common operations (like getText()). This generalizes where SourceManager is needed today. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D128411/new/ https://reviews.llvm.org/D128411 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D128411: [syntax] Introduce a BaseToken class.
hokein created this revision. hokein added reviewers: sammccall, ilya-biryukov. Herald added a project: All. hokein requested review of this revision. Herald added a project: clang. This enables us to use a different underlying token implementation for the syntax Leaf node -- in clang pseudoparser, we want to produce a syntax-tree with its own pseudo::Token rather than syntax::Token. Now Leaf node points to a BaseToken, which can be a different subclass depending on token implementation. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D128411 Files: clang/include/clang/Tooling/Syntax/BaseToken.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h clang/lib/Tooling/Syntax/BuildTree.cpp clang/lib/Tooling/Syntax/ComputeReplacements.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/Tokens.cpp clang/lib/Tooling/Syntax/Tree.cpp clang/tools/clang-check/ClangCheck.cpp clang/unittests/Tooling/Syntax/BuildTreeTest.cpp clang/unittests/Tooling/Syntax/SynthesisTest.cpp clang/unittests/Tooling/Syntax/TokensTest.cpp clang/unittests/Tooling/Syntax/TreeTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp Index: clang/unittests/Tooling/Syntax/TreeTestBase.cpp === --- clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -38,10 +38,10 @@ ArrayRef tokens(syntax::Node *N) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) -return llvm::makeArrayRef(L->getToken(), 1); +return llvm::makeArrayRef(L->getTokenAs(), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), -T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(T->findFirstLeaf()->getTokenAs(), +T->findLastLeaf()->getTokenAs() + 1); } } // namespace Index: clang/unittests/Tooling/Syntax/TreeTest.cpp === --- clang/unittests/Tooling/Syntax/TreeTest.cpp +++ clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -180,7 +180,7 @@ private: std::string dumpQuotedTokensOrNull(const Node *N) { return N ? "'" + - StringRef(N->dumpTokens(Arena->getSourceManager())) + StringRef(N->dumpTokens(>getSourceManager())) .trim() .str() + "'" Index: clang/unittests/Tooling/Syntax/TokensTest.cpp === --- clang/unittests/Tooling/Syntax/TokensTest.cpp +++ clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -220,7 +220,7 @@ } // An equality test for search. auto TextMatches = [this](llvm::StringRef Q, const syntax::Token ) { - return Q == T.text(*SourceMgr); + return Q == T.text(&*SourceMgr); }; // Find a match. auto Found = @@ -906,7 +906,7 @@ SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(), Code.points()[Index]); const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer); -return Tok ? Tok->text(*SourceMgr) : ""; +return Tok ? Tok->text(&*SourceMgr) : ""; }; EXPECT_THAT(Touching(0), SameRange(findSpelled("int"))); Index: clang/unittests/Tooling/Syntax/SynthesisTest.cpp === --- clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -27,7 +27,8 @@ return ::testing::AssertionFailure() << "Root was not built successfully."; -auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); +auto Actual = +StringRef(Root->dump(>getSourceManager())).trim().str(); auto Expected = Dump.trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Expected, Actual); @@ -175,7 +176,7 @@ auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue); EXPECT_TRUE( - treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager(; + treeDumpEqual(Copy, StatementContinue->dump(>getSourceManager(; // FIXME: Test that copy is independent of original, once the Mutations API is // more developed. } Index: clang/unittests/Tooling/Syntax/BuildTreeTest.cpp === --- clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -26,7 +26,7 @@ auto ErrorOK = errorOK(Code); if (!ErrorOK) return ErrorOK; -auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); +auto Actual = StringRef(Root->dump(>getSourceManager())).trim().str(); // EXPECT_EQ shows the