ilya-biryukov created this revision. Herald added subscribers: kadircet, arphaman, jkorous, MaskRay, mgorny. Herald added a project: clang. ilya-biryukov added a parent revision: D59887: [Syntax] Introduce TokenBuffer, start clangToolingSyntax library.
Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D61681 Files: clang-tools-extra/clangd/CMakeLists.txt clang-tools-extra/clangd/ClangdUnit.cpp clang-tools-extra/clangd/ClangdUnit.h clang-tools-extra/clangd/refactor/Tweak.h clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp clang-tools-extra/clangd/tool/CMakeLists.txt clang-tools-extra/clangd/unittests/CMakeLists.txt clang-tools-extra/clangd/unittests/TweakTests.cpp clang/include/clang/Tooling/Syntax/Tokens.h clang/lib/Tooling/Syntax/Tokens.cpp
Index: clang/lib/Tooling/Syntax/Tokens.cpp =================================================================== --- clang/lib/Tooling/Syntax/Tokens.cpp +++ clang/lib/Tooling/Syntax/Tokens.cpp @@ -38,6 +38,27 @@ assert(!T.isAnnotation()); } +llvm::Optional<FileRange> syntax::Token::range(const SourceManager &SM) const { + if (!location().isFileID()) + return llvm::None; + + FileRange R; + std::tie(R.File, R.Begin) = SM.getDecomposedLoc(location()); + R.End = R.Begin + length(); + return R; +} + +llvm::Optional<FileRange> syntax::Token::range(const SourceManager &SM, + const syntax::Token &First, + const syntax::Token &Last) { + auto F = First.range(SM); + auto L = Last.range(SM); + if (!F || !L || F->File != L->File || L->Begin < F->Begin) + return llvm::None; + F->End = L->End; + return F; +} + llvm::StringRef syntax::Token::text(const SourceManager &SM) const { bool Invalid = false; const char *Start = SM.getCharacterData(location(), &Invalid); @@ -167,6 +188,32 @@ : LastSpelled + 1); } +llvm::Optional<TokenBuffer::Expansion> +TokenBuffer::findExpansion(const syntax::Token *Spelled) const { + assert(Spelled); + assert(Spelled->location().isFileID() && "not a spelled token"); + auto FileIt = Files.find(SourceMgr->getFileID(Spelled->location())); + assert(FileIt != Files.end()); + + auto &File = FileIt->second; + assert(File.SpelledTokens.data() <= Spelled && + Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size())); + + unsigned SpelledI = Spelled - File.SpelledTokens.data(); + auto M = llvm::bsearch(File.Mappings, [&](const Mapping &M) { + return SpelledI <= M.BeginSpelled; + }); + if (M == File.Mappings.end() || M->BeginSpelled != SpelledI) + return llvm::None; + + Expansion E; + E.Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled, + File.SpelledTokens.data() + M->EndSpelled); + E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded, + ExpandedTokens.data() + M->EndExpanded); + return E; +} + std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM, const LangOptions &LO) { std::vector<syntax::Token> Tokens; Index: clang/include/clang/Tooling/Syntax/Tokens.h =================================================================== --- clang/include/clang/Tooling/Syntax/Tokens.h +++ clang/include/clang/Tooling/Syntax/Tokens.h @@ -46,6 +46,31 @@ namespace syntax { +/// A half-open range inside a particular file, the start offset is included and +/// the end offset is excluded from the range. +struct FileRange { + FileID File; + /// Start offset (inclusive) in a corresponding file. + unsigned Begin = 0; + /// End offset (exclusive) in a corresponding file. + unsigned End = 0; + + unsigned length() const { return End - Begin; } + bool contains(unsigned Offset) const { + return Begin <= Offset && Offset < End; + } + /// Gets the substring that this FileRange refers to. + llvm::StringRef text(const SourceManager &SM) const; +}; +inline bool operator==(const FileRange &L, const FileRange &R) { + return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); +} +inline bool operator!=(const FileRange &L, const FileRange &R) { + return !(L == R); +} +/// For debugging purposes. +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRange &R); + /// A token coming directly from a file or from a macro invocation. Has just /// enough information to locate the token in the source code. /// Can represent both expanded and spelled tokens. @@ -65,6 +90,18 @@ } unsigned length() const { return Length; } + /// Gets a range of this token. Returns llvm::None for tokens from a macro + /// expansion. + llvm::Optional<FileRange> range(const SourceManager &SM) const; + + /// Given two tokens inside the same file, returns a file range that starts at + /// \p First and ends at \p Last. + /// Returns llvm::None if any of the tokens is from a macro expansion, tokens + /// are from different files or \p Last is located before \p First. + static llvm::Optional<FileRange> range(const SourceManager &SM, + const syntax::Token &First, + const syntax::Token &Last); + /// Get the substring covered by the token. Note that will include all /// digraphs, newline continuations, etc. E.g. tokens for 'int' and /// in\ @@ -84,27 +121,6 @@ /// For debugging purposes. Equivalent to a call to Token::str(). llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T); -/// A half-open range inside a particular file, the start offset is included and -/// the end offset is excluded from the range. -struct FileRange { - FileID File; - /// Start offset (inclusive) in a corresponding file. - unsigned Begin = 0; - /// End offset (exclusive) in a corresponding file. - unsigned End = 0; - - unsigned length() const { return End - Begin; } - /// Gets the substring that this FileRange refers to. - llvm::StringRef text(const SourceManager &SM) const; -}; -inline bool operator==(const FileRange &L, const FileRange &R) { - return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); -} -inline bool operator!=(const FileRange &L, const FileRange &R) { - return !(L == R); -} -/// For debugging purposes. -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRange &R); /// A list of tokens obtained by preprocessing a text buffer and operations to /// map between the expanded and spelled tokens, i.e. TokenBuffer has @@ -172,6 +188,14 @@ llvm::Optional<llvm::ArrayRef<syntax::Token>> spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const; + struct Expansion { + llvm::ArrayRef<syntax::Token> Spelled; + llvm::ArrayRef<syntax::Token> Expanded; + }; + /// If \p Spelled starts a mapping (e.g. if it's a macro name) return the + /// subrange of expanded tokens. + llvm::Optional<Expansion> findExpansion(const syntax::Token *Spelled) const; + /// Returns the text range, corresponding to a sequence of spelled tokens. /// EXPECTS: \p Spelled is not empty. /// EXPECTS: \p Spelled is a subrange of spelledTokens(F) for some file F. Index: clang-tools-extra/clangd/unittests/TweakTests.cpp =================================================================== --- clang-tools-extra/clangd/unittests/TweakTests.cpp +++ clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -107,7 +107,7 @@ void checkTransform(llvm::StringRef ID, llvm::StringRef Input, llvm::StringRef Output) { EXPECT_THAT_EXPECTED(apply(ID, Input), HasValue(Output)) - << "action id is" << ID; + << "action id is " << ID; } TEST(TweakTest, SwapIfBranches) { @@ -185,6 +185,36 @@ )cpp"); } +TEST(TweakTest, ExpandMacro) { + llvm::StringLiteral ID = "ExpandMacro"; + + checkTransform(ID, R"cpp( +#define FOO 1 2 3 +^FOO BAR FOO +)cpp", + R"cpp( +#define FOO 1 2 3 +1 2 3 BAR FOO +)cpp"); + checkTransform(ID, R"cpp( +#define FOO 1 2 3 +FOO BAR ^FOO +)cpp", + R"cpp( +#define FOO 1 2 3 +FOO BAR 1 2 3 +)cpp"); + + checkTransform(ID, R"cpp( +#define FOO 1 2 3 +FOO BAR ^FOO +)cpp", + R"cpp( +#define FOO 1 2 3 +FOO BAR 1 2 3 +)cpp"); +} + } // namespace } // namespace clangd } // namespace clang Index: clang-tools-extra/clangd/unittests/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/unittests/CMakeLists.txt +++ clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -86,6 +86,7 @@ clangTooling clangToolingCore clangToolingInclusions + clangToolingSyntax LLVMSupport LLVMTestingSupport ) Index: clang-tools-extra/clangd/tool/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/tool/CMakeLists.txt +++ clang-tools-extra/clangd/tool/CMakeLists.txt @@ -26,5 +26,6 @@ clangSema clangTooling clangToolingCore + clangToolingSyntax ${CLANGD_XPC_LIBS} ) Index: clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp =================================================================== --- /dev/null +++ clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp @@ -0,0 +1,84 @@ +//===--- ExpandMacro.cpp -----------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "refactor/Tweak.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Error.h" +#include <string> +namespace clang { +namespace clangd { +namespace { +/// Replaces a reference to a macro under cursor to its expansion. +/// Before: +/// #define FOO(X) X+X +/// FOO(10*a) +/// ^^^ +/// After: +/// #define FOO(X) X+X +/// 10*a+10*a +class ExpandMacro : public Tweak { +public: + const char *id() const override final; + + bool prepare(const Selection &Inputs) override; + Expected<tooling::Replacements> apply(const Selection &Inputs) override; + std::string title() const override; + +private: + syntax::TokenBuffer::Expansion Expansion; +}; + +REGISTER_TWEAK(ExpandMacro) + +bool ExpandMacro::prepare(const Selection &Inputs) { + auto &SM = Inputs.AST.getASTContext().getSourceManager(); + + auto SpelledTokens = Inputs.AST.tokens().spelledTokens(SM.getMainFileID()); + unsigned CursorOffset = SM.getFileOffset(Inputs.Cursor); + auto It = llvm::bsearch(SpelledTokens, [&](const syntax::Token &T) { + assert(SM.getFileID(T.location()) == SM.getFileID(Inputs.Cursor)); + return CursorOffset <= SM.getFileOffset(T.location()); + }); + if (It == SpelledTokens.end() || !It->range(SM)->contains(CursorOffset)) + return false; + auto Expansion = Inputs.AST.tokens().findExpansion(It); + if (!Expansion) + return false; + this->Expansion = *Expansion; + return true; +} + +Expected<tooling::Replacements> ExpandMacro::apply(const Selection &Inputs) { + auto &SM = Inputs.AST.getASTContext().getSourceManager(); + + std::string Replacement; + for (const syntax::Token &T : Expansion.Expanded) { + Replacement += T.text(SM); + Replacement += " "; + } + if (!Replacement.empty() && Replacement.back() == ' ') + Replacement.pop_back(); + + CharSourceRange MacroRange = + CharSourceRange::getCharRange(Expansion.Spelled.front().location(), + Expansion.Spelled.back().endLocation()); + + tooling::Replacements R; + llvm::cantFail(R.add(tooling::Replacement(SM, MacroRange, Replacement))); + return R; +} + +std::string ExpandMacro::title() const { return "Expand macro"; } + +} // namespace +} // namespace clangd +} // namespace clang Index: clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt +++ clang-tools-extra/clangd/refactor/tweaks/CMakeLists.txt @@ -12,10 +12,12 @@ # $<TARGET_OBJECTS:obj.clangDaemonTweaks> to a list of sources, see # clangd/tool/CMakeLists.txt for an example. add_clang_library(clangDaemonTweaks OBJECT + ExpandMacro.cpp SwapIfBranches.cpp LINK_LIBS clangAST clangDaemon clangToolingCore + clangToolingSyntax ) Index: clang-tools-extra/clangd/refactor/Tweak.h =================================================================== --- clang-tools-extra/clangd/refactor/Tweak.h +++ clang-tools-extra/clangd/refactor/Tweak.h @@ -23,6 +23,7 @@ #include "Protocol.h" #include "Selection.h" #include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" namespace clang { @@ -43,7 +44,7 @@ Selection(ParsedAST &AST, unsigned RangeBegin, unsigned RangeEnd); /// The text of the active document. llvm::StringRef Code; - /// Parsed AST of the active file. + /// Parsed AST of the active file. ParsedAST &AST; /// A location of the cursor in the editor. SourceLocation Cursor; Index: clang-tools-extra/clangd/ClangdUnit.h =================================================================== --- clang-tools-extra/clangd/ClangdUnit.h +++ clang-tools-extra/clangd/ClangdUnit.h @@ -24,6 +24,7 @@ #include "clang/Serialization/ASTBitCodes.h" #include "clang/Tooling/CompilationDatabase.h" #include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Tokens.h" #include <memory> #include <string> #include <vector> @@ -108,10 +109,14 @@ const IncludeStructure &getIncludeStructure() const; const CanonicalIncludes &getCanonicalIncludes() const; + /// Tokens recorded while parsing the main file. Does not record tokens from + /// the preamble. + const syntax::TokenBuffer& tokens() const { return Tokens; } + private: ParsedAST(std::shared_ptr<const PreambleData> Preamble, std::unique_ptr<CompilerInstance> Clang, - std::unique_ptr<FrontendAction> Action, + std::unique_ptr<FrontendAction> Action, syntax::TokenBuffer Tokens, std::vector<Decl *> LocalTopLevelDecls, std::vector<Diag> Diags, IncludeStructure Includes, CanonicalIncludes CanonIncludes); @@ -125,6 +130,9 @@ // FrontendAction.EndSourceFile). std::unique_ptr<CompilerInstance> Clang; std::unique_ptr<FrontendAction> Action; + /// Expanded tokens for the main file. Does not contain tokens for the file + /// preamble. + syntax::TokenBuffer Tokens; // Data, stored after parsing. std::vector<Diag> Diags; Index: clang-tools-extra/clangd/ClangdUnit.cpp =================================================================== --- clang-tools-extra/clangd/ClangdUnit.cpp +++ clang-tools-extra/clangd/ClangdUnit.cpp @@ -36,6 +36,7 @@ #include "clang/Serialization/ASTWriter.h" #include "clang/Serialization/PCHContainerOperations.h" #include "clang/Tooling/CompilationDatabase.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -386,6 +387,9 @@ collectIWYUHeaderMaps(&CanonIncludes); Clang->getPreprocessor().addCommentHandler(IWYUHandler.get()); + // Collect tokens of the main file. + syntax::TokenCollector Tokens(Clang->getPreprocessor()); + if (!Action->Execute()) log("Execute() failed when building AST for {0}", MainInput.getFile()); @@ -414,8 +418,9 @@ if (Preamble) Diags.insert(Diags.begin(), Preamble->Diags.begin(), Preamble->Diags.end()); return ParsedAST(std::move(Preamble), std::move(Clang), std::move(Action), - std::move(ParsedDecls), std::move(Diags), - std::move(Includes), std::move(CanonIncludes)); + std::move(Tokens).consume(), std::move(ParsedDecls), + std::move(Diags), std::move(Includes), + std::move(CanonIncludes)); } ParsedAST::ParsedAST(ParsedAST &&Other) = default; @@ -508,11 +513,13 @@ ParsedAST::ParsedAST(std::shared_ptr<const PreambleData> Preamble, std::unique_ptr<CompilerInstance> Clang, std::unique_ptr<FrontendAction> Action, + syntax::TokenBuffer Tokens, std::vector<Decl *> LocalTopLevelDecls, std::vector<Diag> Diags, IncludeStructure Includes, CanonicalIncludes CanonIncludes) : Preamble(std::move(Preamble)), Clang(std::move(Clang)), - Action(std::move(Action)), Diags(std::move(Diags)), + Action(std::move(Action)), Tokens(std::move(Tokens)), + Diags(std::move(Diags)), LocalTopLevelDecls(std::move(LocalTopLevelDecls)), Includes(std::move(Includes)), CanonIncludes(std::move(CanonIncludes)) { assert(this->Clang); Index: clang-tools-extra/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/clangd/CMakeLists.txt +++ clang-tools-extra/clangd/CMakeLists.txt @@ -126,6 +126,7 @@ clangToolingCore clangToolingInclusions clangToolingRefactor + clangToolingSyntax ${LLVM_PTHREAD_LIB} ${CLANGD_ATOMIC_LIB} )
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits