llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tidy Author: Daniil Dudkin (unterumarmung) <details> <summary>Changes</summary> --- Full diff: https://github.com/llvm/llvm-project/pull/180371.diff 5 Files Affected: - (modified) clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp (+16-58) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.cpp (+64-3) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.h (+13) - (modified) clang-tools-extra/unittests/clang-tidy/CMakeLists.txt (+1) - (added) clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp (+130) ``````````diff diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index d46896808bd09..5128a232f0112 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -17,6 +17,8 @@ using namespace clang::ast_matchers; namespace clang::tidy::bugprone { + +using CommentToken = clang::tidy::utils::lexer::CommentToken; namespace { AST_MATCHER(Decl, isFromStdNamespaceOrSystemHeader) { if (const auto *D = Node.getDeclContext()->getEnclosingNamespaceContext()) @@ -77,55 +79,9 @@ void ArgumentCommentCheck::registerMatchers(MatchFinder *Finder) { this); } -static std::vector<std::pair<SourceLocation, StringRef>> -getCommentsInRange(ASTContext *Ctx, CharSourceRange Range) { - std::vector<std::pair<SourceLocation, StringRef>> Comments; - auto &SM = Ctx->getSourceManager(); - const std::pair<FileID, unsigned> BeginLoc = - SM.getDecomposedLoc(Range.getBegin()), - EndLoc = - SM.getDecomposedLoc(Range.getEnd()); - - if (BeginLoc.first != EndLoc.first) - return Comments; - - bool Invalid = false; - const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); - if (Invalid) - return Comments; - - const char *StrData = Buffer.data() + BeginLoc.second; - - Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), Ctx->getLangOpts(), - Buffer.begin(), StrData, Buffer.end()); - TheLexer.SetCommentRetentionState(true); - - while (true) { - Token Tok; - if (TheLexer.LexFromRawLexer(Tok)) - break; - if (Tok.getLocation() == Range.getEnd() || Tok.is(tok::eof)) - break; - - if (Tok.is(tok::comment)) { - const std::pair<FileID, unsigned> CommentLoc = - SM.getDecomposedLoc(Tok.getLocation()); - assert(CommentLoc.first == BeginLoc.first); - Comments.emplace_back( - Tok.getLocation(), - StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength())); - } else { - // Clear comments found before the different token, e.g. comma. - Comments.clear(); - } - } - - return Comments; -} - -static std::vector<std::pair<SourceLocation, StringRef>> -getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) { - std::vector<std::pair<SourceLocation, StringRef>> Comments; +static std::vector<CommentToken> getCommentsBeforeLoc(ASTContext *Ctx, + SourceLocation Loc) { + std::vector<CommentToken> Comments; while (Loc.isValid()) { const std::optional<Token> Tok = utils::lexer::getPreviousToken( Loc, Ctx->getSourceManager(), Ctx->getLangOpts(), @@ -133,11 +89,12 @@ getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) { if (!Tok || Tok->isNot(tok::comment)) break; Loc = Tok->getLocation(); - Comments.emplace_back( + Comments.emplace_back(CommentToken{ Loc, Lexer::getSourceText(CharSourceRange::getCharRange( Loc, Loc.getLocWithOffset(Tok->getLength())), - Ctx->getSourceManager(), Ctx->getLangOpts())); + Ctx->getSourceManager(), Ctx->getLangOpts()), + }); } return Comments; } @@ -304,9 +261,10 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx, MakeFileCharRange(ArgBeginLoc, Args[I]->getBeginLoc()); ArgBeginLoc = Args[I]->getEndLoc(); - std::vector<std::pair<SourceLocation, StringRef>> Comments; + std::vector<CommentToken> Comments; if (BeforeArgument.isValid()) { - Comments = getCommentsInRange(Ctx, BeforeArgument); + Comments = utils::lexer::getTrailingCommentsInRange( + BeforeArgument, Ctx->getSourceManager(), Ctx->getLangOpts()); } else { // Fall back to parsing back from the start of the argument. const CharSourceRange ArgsRange = @@ -314,18 +272,18 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx, Comments = getCommentsBeforeLoc(Ctx, ArgsRange.getBegin()); } - for (auto Comment : Comments) { + for (const auto &Comment : Comments) { llvm::SmallVector<StringRef, 2> Matches; - if (IdentRE.match(Comment.second, &Matches) && + if (IdentRE.match(Comment.Text, &Matches) && !sameName(Matches[2], II->getName(), StrictMode)) { { const DiagnosticBuilder Diag = - diag(Comment.first, "argument name '%0' in comment does not " - "match parameter name %1") + diag(Comment.Loc, "argument name '%0' in comment does not " + "match parameter name %1") << Matches[2] << II; if (isLikelyTypo(Callee->parameters(), Matches[2], II->getName())) { Diag << FixItHint::CreateReplacement( - Comment.first, (Matches[1] + II->getName() + Matches[3]).str()); + Comment.Loc, (Matches[1] + II->getName() + Matches[3]).str()); } } diag(PVD->getLocation(), "%0 declared here", DiagnosticIDs::Note) << II; diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index b77d985b76d77..7acdc2009b576 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -10,6 +10,7 @@ #include "clang/Basic/SourceManager.h" #include <optional> #include <utility> +#include <vector> namespace clang::tidy::utils::lexer { @@ -99,6 +100,66 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range, return false; } +std::vector<CommentToken> +getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts) { + std::vector<CommentToken> Comments; + if (!Range.isValid()) + return Comments; + + const CharSourceRange FileRange = + Lexer::makeFileCharRange(Range, SM, LangOpts); + if (!FileRange.isValid()) + return Comments; + + const std::pair<FileID, unsigned> BeginLoc = + SM.getDecomposedLoc(FileRange.getBegin()); + const std::pair<FileID, unsigned> EndLoc = + SM.getDecomposedLoc(FileRange.getEnd()); + + if (BeginLoc.first != EndLoc.first) + return Comments; + + bool Invalid = false; + const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); + if (Invalid) + return Comments; + + const char *StrData = Buffer.data() + BeginLoc.second; + + Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), LangOpts, + Buffer.begin(), StrData, Buffer.end()); + // Use raw lexing with comment retention so we can see comment tokens without + // preprocessing or macro expansion effects. + TheLexer.SetCommentRetentionState(true); + + while (true) { + Token Tok; + if (TheLexer.LexFromRawLexer(Tok)) + break; + if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() || + SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation())) + break; + + if (Tok.is(tok::comment)) { + const std::pair<FileID, unsigned> CommentLoc = + SM.getDecomposedLoc(Tok.getLocation()); + assert(CommentLoc.first == BeginLoc.first); + Comments.emplace_back(CommentToken{ + Tok.getLocation(), + StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()), + }); + } else { + // Clear comments found before the different token, e.g. comma. Callers + // use this to retrieve only the contiguous comment block that directly + // precedes a token of interest. + Comments.clear(); + } + } + + return Comments; +} + std::optional<Token> getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, @@ -124,11 +185,11 @@ std::optional<Token> getQualifyingToken(tok::TokenKind TK, Tok.setIdentifierInfo(&Info); Tok.setKind(Info.getTokenID()); } - if (Tok.is(tok::less)) + if (Tok.is(tok::less)) { SawTemplate = true; - else if (Tok.isOneOf(tok::greater, tok::greatergreater)) + } else if (Tok.isOneOf(tok::greater, tok::greatergreater)) { LastMatchAfterTemplate = std::nullopt; - else if (Tok.is(TK)) { + } else if (Tok.is(TK)) { if (SawTemplate) LastMatchAfterTemplate = Tok; else diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h index 61389d86f22f4..f586fa9154f6c 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h @@ -14,6 +14,7 @@ #include "clang/Lex/Lexer.h" #include <optional> #include <utility> +#include <vector> namespace clang { @@ -21,6 +22,12 @@ class Stmt; namespace tidy::utils::lexer { +// Represents a comment token and its source location in the original file. +struct CommentToken { + SourceLocation Loc; + StringRef Text; +}; + /// Returns previous token or ``std::nullopt`` if not found. std::optional<Token> getPreviousToken(SourceLocation Location, const SourceManager &SM, @@ -113,6 +120,12 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts); +/// Returns comment tokens found in the given range. If a non-comment token is +/// encountered, clears previously collected comments and continues. +std::vector<CommentToken> +getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts); + /// Assuming that ``Range`` spans a CVR-qualified type, returns the /// token in ``Range`` that is responsible for the qualification. ``Range`` /// must be valid with respect to ``SM``. Returns ``std::nullopt`` if no diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt index 64bf47e61736c..167f5d3def06b 100644 --- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -24,6 +24,7 @@ add_extra_unittest(ClangTidyTests DeclRefExprUtilsTest.cpp IncludeCleanerTest.cpp IncludeInserterTest.cpp + LexerUtilsTest.cpp GlobListTest.cpp GoogleModuleTest.cpp LLVMModuleTest.cpp diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp new file mode 100644 index 0000000000000..0a738a8f422eb --- /dev/null +++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp @@ -0,0 +1,130 @@ +//===--- LexerUtilsTest.cpp - clang-tidy ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../clang-tidy/utils/LexerUtils.h" + +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/ASTUnit.h" +#include "clang/Serialization/PCHContainerOperations.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Annotations/Annotations.h" +#include "gtest/gtest.h" + +namespace clang { +namespace tidy { +namespace test { + +using clang::tooling::FileContentMappings; + +static std::unique_ptr<ASTUnit> +buildAST(StringRef Code, const FileContentMappings &Mappings = {}) { + std::vector<std::string> Args = {"-std=c++20"}; + return clang::tooling::buildASTFromCodeWithArgs( + Code, Args, "input.cc", "clang-tool", + std::make_shared<PCHContainerOperations>(), + clang::tooling::getClangStripDependencyFileAdjuster(), Mappings); +} + +static CharSourceRange rangeFromAnnotations(const llvm::Annotations &A, + const SourceManager &SM, FileID FID, + llvm::StringRef Name = "") { + const auto R = A.range(Name); + const SourceLocation Begin = + SM.getLocForStartOfFile(FID).getLocWithOffset(R.Begin); + const SourceLocation End = + SM.getLocForStartOfFile(FID).getLocWithOffset(R.End); + return CharSourceRange::getCharRange(Begin, End); +} + +namespace { + +TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) { + llvm::Annotations Code(R"cpp( +void f() { + $range[[/*first*/ /*second*/]] + int x = 0; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getTrailingCommentsInRange(Range, SM, LangOpts); + ASSERT_EQ(2u, Comments.size()); + EXPECT_EQ("/*first*/", Comments[0].Text); + EXPECT_EQ("/*second*/", Comments[1].Text); +} + +TEST(LexerUtilsTest, GetTrailingCommentsInRangeClearsOnToken) { + llvm::Annotations Code(R"cpp( +void f() { + int x = ($range[[/*first*/ 0, /*second*/]] 1); +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getTrailingCommentsInRange(Range, SM, LangOpts); + ASSERT_EQ(1u, Comments.size()); + EXPECT_EQ("/*second*/", Comments.front().Text); +} + +TEST(LexerUtilsTest, GetTrailingCommentsInRangeLineComments) { + llvm::Annotations Code(R"cpp( +void f() { + $range[[// first + // second + ]] + int x = 0; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getTrailingCommentsInRange(Range, SM, LangOpts); + ASSERT_EQ(2u, Comments.size()); + EXPECT_EQ("// first", Comments[0].Text); + EXPECT_EQ("// second", Comments[1].Text); +} + +TEST(LexerUtilsTest, GetTrailingCommentsInRangeInvalidRange) { + std::unique_ptr<ASTUnit> AST = buildAST("int value = 0;"); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getTrailingCommentsInRange(CharSourceRange(), SM, LangOpts); + EXPECT_TRUE(Comments.empty()); +} + +} // namespace + +} // namespace test +} // namespace tidy +} // namespace clang `````````` </details> https://github.com/llvm/llvm-project/pull/180371 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
