llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tools-extra Author: Victor Chernyakin (localspook) <details> <summary>Changes</summary> We have several checks that want to relex source code, but right now, doing so is annoying; the `Lexer` API is difficult to use. This PR introduces a ranges-style wrapper for it and converts some checks to use the new API. --- Full diff: https://github.com/llvm/llvm-project/pull/172508.diff 7 Files Affected: - (modified) clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp (+2-13) - (modified) clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp (+13-22) - (modified) clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.cpp (+4-7) - (modified) clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp (+2-11) - (modified) clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp (+22-33) - (modified) clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp (+7-16) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.h (+74) ``````````diff diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index ed30d01e645d1..235c68eea08b4 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -94,19 +94,8 @@ getCommentsInRange(ASTContext *Ctx, CharSourceRange Range) { if (Invalid) return Comments; - const char *StrData = Buffer.data() + BeginLoc.second; - - Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), Ctx->getLangOpts(), - Buffer.begin(), StrData, Buffer.end()); - TheLexer.SetCommentRetentionState(true); - - while (true) { - Token Tok; - if (TheLexer.LexFromRawLexer(Tok)) - break; - if (Tok.getLocation() == Range.getEnd() || Tok.is(tok::eof)) - break; - + for (const Token Tok : + utils::lexer::tokensIncludingComments(Range, SM, Ctx->getLangOpts())) { if (Tok.is(tok::comment)) { const std::pair<FileID, unsigned> CommentLoc = SM.getDecomposedLoc(Tok.getLocation()); diff --git a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp index 098d46cae5df4..8f88daf1ea7cc 100644 --- a/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "MacroToEnumCheck.h" +#include "../utils/LexerUtils.h" #include "IntegralLiteralExpressionMatcher.h" - #include "clang/AST/ASTContext.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Preprocessor.h" @@ -19,17 +19,14 @@ namespace clang::tidy::modernize { -static bool hasOnlyComments(SourceLocation Loc, const LangOptions &Options, - StringRef Text) { +static bool hasOnlyComments(SourceLocation Loc, const SourceManager &SM, + const LangOptions &Options, + CharSourceRange CharRange) { // Use a lexer to look for tokens; if we find something other than a single // hash, then there were intervening tokens between macro definitions. - const std::string Buffer{Text}; - Lexer Lex(Loc, Options, Buffer.c_str(), Buffer.c_str(), - Buffer.c_str() + Buffer.size()); - Token Tok; bool SeenHash = false; - while (!Lex.LexFromRawLexer(Tok)) { - if (Tok.getKind() == tok::hash && !SeenHash) { + for (const Token Tok : utils::lexer::tokens(CharRange, SM, Options)) { + if (Tok.is(tok::hash) && !SeenHash) { SeenHash = true; continue; } @@ -46,6 +43,7 @@ static bool hasOnlyComments(SourceLocation Loc, const LangOptions &Options, CRLFCR, }; + const StringRef Text = Lexer::getSourceText(CharRange, SM, Options); WhiteSpace State = WhiteSpace::Nothing; for (const char C : Text) { switch (C) { @@ -237,8 +235,7 @@ bool MacroToEnumCallbacks::isConsecutiveMacro(const MacroDirective *MD) const { SourceRange{CurrentFile->LastMacroLocation, Define}, true}; const CharSourceRange CharRange = Lexer::makeFileCharRange(BetweenMacros, SM, LangOpts); - const StringRef BetweenText = Lexer::getSourceText(CharRange, SM, LangOpts); - return hasOnlyComments(Define, LangOpts, BetweenText); + return hasOnlyComments(Define, SM, LangOpts, CharRange); } void MacroToEnumCallbacks::clearCurrentEnum(SourceLocation Loc) { @@ -258,17 +255,11 @@ void MacroToEnumCallbacks::conditionStart(const SourceLocation &Loc) { } void MacroToEnumCallbacks::checkCondition(SourceRange Range) { - const CharSourceRange CharRange = Lexer::makeFileCharRange( - CharSourceRange::getTokenRange(Range), SM, LangOpts); - std::string Text = Lexer::getSourceText(CharRange, SM, LangOpts).str(); - Lexer Lex(CharRange.getBegin(), LangOpts, Text.data(), Text.data(), - Text.data() + Text.size()); - Token Tok; - bool End = false; - while (!End) { - End = Lex.LexFromRawLexer(Tok); - if (Tok.is(tok::raw_identifier) && - Tok.getRawIdentifier().str() != "defined") + for (const Token Tok : utils::lexer::tokens( + Lexer::makeFileCharRange(CharSourceRange::getTokenRange(Range), SM, + LangOpts), + SM, LangOpts)) { + if (Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() != "defined") checkName(Tok); } } diff --git a/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.cpp index aa2db2146475b..d3125711b89c3 100644 --- a/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "RedundantVoidArgCheck.h" +#include "../utils/LexerUtils.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Lex/Lexer.h" @@ -127,12 +128,6 @@ void RedundantVoidArgCheck::removeVoidArgumentTokens( const CharSourceRange CharRange = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(Range), *Result.SourceManager, getLangOpts()); - - std::string DeclText = - Lexer::getSourceText(CharRange, *Result.SourceManager, getLangOpts()) - .str(); - Lexer PrototypeLexer(CharRange.getBegin(), getLangOpts(), DeclText.data(), - DeclText.data(), DeclText.data() + DeclText.size()); enum class TokenState { Start, MacroId, @@ -149,7 +144,9 @@ void RedundantVoidArgCheck::removeVoidArgumentTokens( const std::string Diagnostic = ("redundant void argument list in " + GrammarLocation).str(); - while (!PrototypeLexer.LexFromRawLexer(ProtoToken)) { + for (const Token Tok : + utils::lexer::tokens(CharRange, *Result.SourceManager, getLangOpts())) { + ProtoToken = Tok; switch (State) { case TokenState::Start: if (ProtoToken.is(tok::TokenKind::l_paren)) diff --git a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp index dd516f8e51264..6de465afcca84 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp @@ -54,21 +54,12 @@ void UseOverrideCheck::registerMatchers(MatchFinder *Finder) { static SmallVector<Token, 16> parseTokens(CharSourceRange Range, const MatchFinder::MatchResult &Result) { const SourceManager &Sources = *Result.SourceManager; - const std::pair<FileID, unsigned> LocInfo = - Sources.getDecomposedLoc(Range.getBegin()); - const StringRef File = Sources.getBufferData(LocInfo.first); - const char *TokenBegin = File.data() + LocInfo.second; - Lexer RawLexer(Sources.getLocForStartOfFile(LocInfo.first), - Result.Context->getLangOpts(), File.begin(), TokenBegin, - File.end()); SmallVector<Token, 16> Tokens; - Token Tok; int NestedParens = 0; - while (!RawLexer.LexFromRawLexer(Tok)) { + for (Token Tok : + utils::lexer::tokens(Range, Sources, Result.Context->getLangOpts())) { if ((Tok.is(tok::semi) || Tok.is(tok::l_brace)) && NestedParens == 0) break; - if (Sources.isBeforeInTranslationUnit(Range.getEnd(), Tok.getLocation())) - break; if (Tok.is(tok::l_paren)) ++NestedParens; else if (Tok.is(tok::r_paren)) diff --git a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp index 02865b65a9ec2..054213ea542b0 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "UseTrailingReturnTypeCheck.h" +#include "../utils/LexerUtils.h" #include "clang/AST/ASTContext.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" @@ -14,7 +15,6 @@ #include "clang/Tooling/FixIt.h" #include "llvm/ADT/StringExtras.h" -#include <cctype> #include <optional> namespace clang::tidy { @@ -173,13 +173,11 @@ static SourceLocation findTrailingReturnTypeSourceLocation( Lexer::getLocForEndOfToken(ClosingParen, 0, SM, LangOpts); // Skip subsequent CV and ref qualifiers. - const std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(Result); - const StringRef File = SM.getBufferData(Loc.first); - const char *TokenBegin = File.data() + Loc.second; - Lexer Lexer(SM.getLocForStartOfFile(Loc.first), LangOpts, File.begin(), - TokenBegin, File.end()); - Token T; - while (!Lexer.LexFromRawLexer(T)) { + for (Token T : utils::lexer::tokens( + Lexer::makeFileCharRange( + CharSourceRange::getTokenRange(Result, F.getEndLoc()), SM, + LangOpts), + SM, LangOpts)) { if (T.is(tok::raw_identifier)) { IdentifierInfo &Info = Ctx.Idents.get( StringRef(SM.getCharacterData(T.getLocation()), T.getLength())); @@ -255,15 +253,11 @@ classifyTokensBeforeFunctionName(const FunctionDecl &F, const ASTContext &Ctx, const SourceLocation BeginNameF = expandIfMacroId(F.getLocation(), SM); // Create tokens for everything before the name of the function. - const std::pair<FileID, unsigned> Loc = SM.getDecomposedLoc(BeginF); - const StringRef File = SM.getBufferData(Loc.first); - const char *TokenBegin = File.data() + Loc.second; - Lexer Lexer(SM.getLocForStartOfFile(Loc.first), LangOpts, File.begin(), - TokenBegin, File.end()); - Token T; SmallVector<ClassifiedToken, 8> ClassifiedTokens; - while (!Lexer.LexFromRawLexer(T) && - SM.isBeforeInTranslationUnit(T.getLocation(), BeginNameF)) { + for (Token T : utils::lexer::tokens( + Lexer::makeFileCharRange( + CharSourceRange::getCharRange(BeginF, BeginNameF), SM, LangOpts), + SM, LangOpts)) { if (T.is(tok::raw_identifier)) { IdentifierInfo &Info = Ctx.Idents.get( StringRef(SM.getCharacterData(T.getLocation()), T.getLength())); @@ -367,25 +361,20 @@ static SourceLocation findLambdaTrailingReturnInsertLoc( else ParamEndLoc = Method->getParametersSourceRange().getEnd(); - const std::pair<FileID, unsigned> ParamEndLocInfo = - SM.getDecomposedLoc(ParamEndLoc); - const StringRef Buffer = SM.getBufferData(ParamEndLocInfo.first); - - Lexer Lexer(SM.getLocForStartOfFile(ParamEndLocInfo.first), LangOpts, - Buffer.begin(), Buffer.data() + ParamEndLocInfo.second, - Buffer.end()); - - Token Token; - while (!Lexer.LexFromRawLexer(Token)) { - if (Token.is(tok::raw_identifier)) { - IdentifierInfo &Info = Ctx.Idents.get(StringRef( - SM.getCharacterData(Token.getLocation()), Token.getLength())); - Token.setIdentifierInfo(&Info); - Token.setKind(Info.getTokenID()); + for (Token T : utils::lexer::tokens( + Lexer::makeFileCharRange(CharSourceRange::getTokenRange( + ParamEndLoc, Method->getEndLoc()), + SM, LangOpts), + SM, LangOpts)) { + if (T.is(tok::raw_identifier)) { + IdentifierInfo &Info = Ctx.Idents.get( + StringRef(SM.getCharacterData(T.getLocation()), T.getLength())); + T.setIdentifierInfo(&Info); + T.setKind(Info.getTokenID()); } - if (Token.is(tok::kw_requires)) - return Token.getLocation().getLocWithOffset(-1); + if (T.is(tok::kw_requires)) + return T.getLocation().getLocWithOffset(-1); } return {}; diff --git a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp index 1a9c161068030..baf77e6774061 100644 --- a/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "SimplifyBooleanExprCheck.h" +#include "../utils/LexerUtils.h" #include "clang/AST/Expr.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/DiagnosticIDs.h" @@ -237,22 +238,12 @@ static std::string replacementExpression(const ASTContext &Context, static bool containsDiscardedTokens(const ASTContext &Context, CharSourceRange CharRange) { - std::string ReplacementText = - Lexer::getSourceText(CharRange, Context.getSourceManager(), - Context.getLangOpts()) - .str(); - Lexer Lex(CharRange.getBegin(), Context.getLangOpts(), ReplacementText.data(), - ReplacementText.data(), - ReplacementText.data() + ReplacementText.size()); - Lex.SetCommentRetentionState(true); - - Token Tok; - while (!Lex.LexFromRawLexer(Tok)) { - if (Tok.is(tok::TokenKind::comment) || Tok.is(tok::TokenKind::hash)) - return true; - } - - return false; + return llvm::any_of( + utils::lexer::tokensIncludingComments( + CharRange, Context.getSourceManager(), Context.getLangOpts()), + [](Token Tok) { + return Tok.isOneOf(tok::TokenKind::comment, tok::TokenKind::hash); + }); } class SimplifyBooleanExprCheck::Visitor : public RecursiveASTVisitor<Visitor> { diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h index c5fb646c0efd9..9daf005a6cb00 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h @@ -12,6 +12,8 @@ #include "clang/AST/ASTContext.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Lexer.h" +#include "clang/basic/SourceManager.h" +#include <iterator> #include <optional> #include <utility> @@ -127,6 +129,78 @@ SourceLocation getUnifiedEndLoc(const Stmt &S, const SourceManager &SM, SourceLocation getLocationForNoexceptSpecifier(const FunctionDecl *FuncDecl, const SourceManager &SM); +class TokenView { +public: + class iterator { // NOLINT(readability-identifier-naming) + public: + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using iterator_category = std::input_iterator_tag; + + iterator &operator++() { + if (View->RawLexer.getBufferLocation() < View->EndOfLexedRange) + View->RawLexer.LexFromRawLexer(View->Tok); + else + View = nullptr; // No more tokens. + return *this; + } + + void operator++(int) { operator++(); } + + friend bool operator==(iterator LHS, iterator RHS) { + return LHS.View == RHS.View; + } + + friend bool operator!=(iterator LHS, iterator RHS) { return !(LHS == RHS); } + + const Token &operator*() const { return View->Tok; } + const Token *operator->() const { return &View->Tok; } + + private: + friend class TokenView; + iterator(TokenView *V) : View(V) {} + TokenView *View; + }; + + iterator begin() { + iterator It(this); + ++It; + return It; + } + iterator end() { return {nullptr}; } + + TokenView(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts, bool RetainComments) + : RawLexer([&]() -> Lexer { + const auto [FID, BeginOffset] = SM.getDecomposedLoc(Range.getBegin()); + const auto [_, EndOffset] = SM.getDecomposedLoc(Range.getEnd()); + const StringRef FileContents = SM.getBufferData(FID); + const StringRef LexedRange = {FileContents.begin() + BeginOffset, + EndOffset - BeginOffset}; + EndOfLexedRange = LexedRange.end(); + return {Range.getBegin(), LangOpts, LexedRange.begin(), + LexedRange.begin(), FileContents.end()}; + }()) { + RawLexer.SetCommentRetentionState(RetainComments); + } + +private: + Lexer RawLexer; + const char *EndOfLexedRange; + Token Tok; +}; + +inline TokenView tokens(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts) { + return {Range, SM, LangOpts, false}; +} + +inline TokenView tokensIncludingComments(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts) { + return {Range, SM, LangOpts, true}; +} + } // namespace tidy::utils::lexer } // namespace clang `````````` </details> https://github.com/llvm/llvm-project/pull/172508 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
