llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-tidy
@llvm/pr-subscribers-clang-tools-extra
Author: Daniil Dudkin (unterumarmung)
<details>
<summary>Changes</summary>
Keep comment blocks between the typedef type and name by capturing the raw
lexer range and avoid injecting unrelated tokens into the replacement.
Add selected `LexerUtils` helpers with unit tests and extend
`modernize-use-using` regression coverage.
Fixes https://github.com/llvm/llvm-project/issues/159518.
---
Patch is 41.91 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/180372.diff
8 Files Affected:
- (modified) clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
(+16-58)
- (modified) clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp (+191-48)
- (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.cpp (+144-3)
- (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.h (+28)
- (modified) clang-tools-extra/docs/ReleaseNotes.rst (+2-1)
- (modified) clang-tools-extra/test/clang-tidy/checkers/modernize/use-using.cpp
(+68-1)
- (modified) clang-tools-extra/unittests/clang-tidy/CMakeLists.txt (+1)
- (added) clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp (+311)
``````````diff
diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
index d46896808bd09..5128a232f0112 100644
--- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp
@@ -17,6 +17,8 @@
using namespace clang::ast_matchers;
namespace clang::tidy::bugprone {
+
+using CommentToken = clang::tidy::utils::lexer::CommentToken;
namespace {
AST_MATCHER(Decl, isFromStdNamespaceOrSystemHeader) {
if (const auto *D = Node.getDeclContext()->getEnclosingNamespaceContext())
@@ -77,55 +79,9 @@ void ArgumentCommentCheck::registerMatchers(MatchFinder
*Finder) {
this);
}
-static std::vector<std::pair<SourceLocation, StringRef>>
-getCommentsInRange(ASTContext *Ctx, CharSourceRange Range) {
- std::vector<std::pair<SourceLocation, StringRef>> Comments;
- auto &SM = Ctx->getSourceManager();
- const std::pair<FileID, unsigned> BeginLoc =
- SM.getDecomposedLoc(Range.getBegin()),
- EndLoc =
- SM.getDecomposedLoc(Range.getEnd());
-
- if (BeginLoc.first != EndLoc.first)
- return Comments;
-
- bool Invalid = false;
- const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid);
- if (Invalid)
- return Comments;
-
- const char *StrData = Buffer.data() + BeginLoc.second;
-
- Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), Ctx->getLangOpts(),
- Buffer.begin(), StrData, Buffer.end());
- TheLexer.SetCommentRetentionState(true);
-
- while (true) {
- Token Tok;
- if (TheLexer.LexFromRawLexer(Tok))
- break;
- if (Tok.getLocation() == Range.getEnd() || Tok.is(tok::eof))
- break;
-
- if (Tok.is(tok::comment)) {
- const std::pair<FileID, unsigned> CommentLoc =
- SM.getDecomposedLoc(Tok.getLocation());
- assert(CommentLoc.first == BeginLoc.first);
- Comments.emplace_back(
- Tok.getLocation(),
- StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()));
- } else {
- // Clear comments found before the different token, e.g. comma.
- Comments.clear();
- }
- }
-
- return Comments;
-}
-
-static std::vector<std::pair<SourceLocation, StringRef>>
-getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) {
- std::vector<std::pair<SourceLocation, StringRef>> Comments;
+static std::vector<CommentToken> getCommentsBeforeLoc(ASTContext *Ctx,
+ SourceLocation Loc) {
+ std::vector<CommentToken> Comments;
while (Loc.isValid()) {
const std::optional<Token> Tok = utils::lexer::getPreviousToken(
Loc, Ctx->getSourceManager(), Ctx->getLangOpts(),
@@ -133,11 +89,12 @@ getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) {
if (!Tok || Tok->isNot(tok::comment))
break;
Loc = Tok->getLocation();
- Comments.emplace_back(
+ Comments.emplace_back(CommentToken{
Loc,
Lexer::getSourceText(CharSourceRange::getCharRange(
Loc, Loc.getLocWithOffset(Tok->getLength())),
- Ctx->getSourceManager(), Ctx->getLangOpts()));
+ Ctx->getSourceManager(), Ctx->getLangOpts()),
+ });
}
return Comments;
}
@@ -304,9 +261,10 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx,
MakeFileCharRange(ArgBeginLoc, Args[I]->getBeginLoc());
ArgBeginLoc = Args[I]->getEndLoc();
- std::vector<std::pair<SourceLocation, StringRef>> Comments;
+ std::vector<CommentToken> Comments;
if (BeforeArgument.isValid()) {
- Comments = getCommentsInRange(Ctx, BeforeArgument);
+ Comments = utils::lexer::getTrailingCommentsInRange(
+ BeforeArgument, Ctx->getSourceManager(), Ctx->getLangOpts());
} else {
// Fall back to parsing back from the start of the argument.
const CharSourceRange ArgsRange =
@@ -314,18 +272,18 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx,
Comments = getCommentsBeforeLoc(Ctx, ArgsRange.getBegin());
}
- for (auto Comment : Comments) {
+ for (const auto &Comment : Comments) {
llvm::SmallVector<StringRef, 2> Matches;
- if (IdentRE.match(Comment.second, &Matches) &&
+ if (IdentRE.match(Comment.Text, &Matches) &&
!sameName(Matches[2], II->getName(), StrictMode)) {
{
const DiagnosticBuilder Diag =
- diag(Comment.first, "argument name '%0' in comment does not "
- "match parameter name %1")
+ diag(Comment.Loc, "argument name '%0' in comment does not "
+ "match parameter name %1")
<< Matches[2] << II;
if (isLikelyTypo(Callee->parameters(), Matches[2], II->getName())) {
Diag << FixItHint::CreateReplacement(
- Comment.first, (Matches[1] + II->getName() +
Matches[3]).str());
+ Comment.Loc, (Matches[1] + II->getName() + Matches[3]).str());
}
}
diag(PVD->getLocation(), "%0 declared here", DiagnosticIDs::Note) <<
II;
diff --git a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp
b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp
index 4d9b5ee568d80..cc467407649b2 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp
@@ -26,6 +26,129 @@ AST_MATCHER(clang::LinkageSpecDecl, isExternCLinkage) {
namespace clang::tidy::modernize {
+namespace lexer = clang::tidy::utils::lexer;
+
+namespace {
+struct RangeTextInfo {
+ std::string Text;
+ lexer::TokenRangeInfo Tokens;
+};
+} // namespace
+
+static bool hasNonWhitespace(llvm::StringRef Text) {
+ return Text.find_first_not_of(" \t\n\r\f\v") != llvm::StringRef::npos;
+}
+
+static RangeTextInfo getRangeTextInfo(clang::SourceLocation Begin,
+ clang::SourceLocation End,
+ const clang::SourceManager &SM,
+ const clang::LangOptions &LangOpts) {
+ RangeTextInfo Info;
+ if (!Begin.isValid() || !End.isValid() || Begin.isMacroID() ||
+ End.isMacroID())
+ return Info;
+
+ const clang::CharSourceRange Range =
+ clang::CharSourceRange::getCharRange(Begin, End);
+ Info.Text = lexer::getSourceText(Range, SM, LangOpts);
+ Info.Tokens = lexer::analyzeTokenRange(Range, SM, LangOpts);
+ return Info;
+}
+
+static std::string getFunctionPointerTypeText(clang::SourceRange TypeRange,
+ clang::SourceLocation NameLoc,
+ const clang::SourceManager &SM,
+ const clang::LangOptions &LO) {
+ clang::SourceLocation StartLoc = NameLoc;
+ clang::SourceLocation EndLoc = NameLoc;
+
+ while (true) {
+ const std::optional<clang::Token> Prev =
+ lexer::getPreviousToken(StartLoc, SM, LO);
+ const std::optional<clang::Token> Next =
+ lexer::findNextTokenSkippingComments(EndLoc, SM, LO);
+ if (!Prev || Prev->isNot(clang::tok::l_paren) || !Next ||
+ Next->isNot(clang::tok::r_paren))
+ break;
+
+ StartLoc = Prev->getLocation();
+ EndLoc = Next->getLocation();
+ }
+
+ const clang::CharSourceRange RangeLeftOfIdentifier =
+ clang::CharSourceRange::getCharRange(TypeRange.getBegin(), StartLoc);
+ const clang::CharSourceRange RangeRightOfIdentifier =
+ clang::CharSourceRange::getCharRange(
+ clang::Lexer::getLocForEndOfToken(EndLoc, 0, SM, LO),
+ clang::Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO));
+ return lexer::getSourceText(RangeLeftOfIdentifier, SM, LO) +
+ lexer::getSourceText(RangeRightOfIdentifier, SM, LO);
+}
+
+static RangeTextInfo getLeadingTextInfo(bool IsFirstTypedefInGroup,
+ clang::SourceRange ReplaceRange,
+ clang::SourceRange TypeRange,
+ const clang::SourceManager &SM,
+ const clang::LangOptions &LO) {
+ RangeTextInfo Info;
+ if (!IsFirstTypedefInGroup)
+ return Info;
+
+ const clang::SourceLocation TypedefEnd =
+ clang::Lexer::getLocForEndOfToken(ReplaceRange.getBegin(), 0, SM, LO);
+ Info = getRangeTextInfo(TypedefEnd, TypeRange.getBegin(), SM, LO);
+ // Keep leading trivia only when it actually contains comments. This avoids
+ // shifting plain whitespace from between 'typedef' and the type into the
+ // replacement, preserving formatting for un-commented declarations.
+ if (!Info.Tokens.HasComment)
+ Info.Text.clear();
+ return Info;
+}
+
+static RangeTextInfo getSuffixTextInfo(bool FunctionPointerCase,
+ bool IsFirstTypedefInGroup,
+ clang::SourceLocation
PrevReplacementEnd,
+ clang::SourceRange TypeRange,
+ clang::SourceLocation NameLoc,
+ const clang::SourceManager &SM,
+ const clang::LangOptions &LO) {
+ RangeTextInfo Info;
+ if (FunctionPointerCase)
+ return Info;
+
+ // Capture the raw text between type and name to preserve trailing comments,
+ // including multi-line // blocks, without re-lexing individual comment
+ // tokens.
+ if (IsFirstTypedefInGroup) {
+ const clang::SourceLocation AfterType =
+ clang::Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO);
+ return getRangeTextInfo(AfterType, NameLoc, SM, LO);
+ }
+
+ if (!PrevReplacementEnd.isValid() || PrevReplacementEnd.isMacroID())
+ return Info;
+
+ clang::SourceLocation AfterComma = PrevReplacementEnd;
+ if (const std::optional<clang::Token> NextTok =
+ lexer::findNextTokenSkippingComments(AfterComma, SM, LO)) {
+ if (NextTok->is(clang::tok::comma)) {
+ AfterComma =
+ clang::Lexer::getLocForEndOfToken(NextTok->getLocation(), 0, SM, LO);
+ }
+ }
+ return getRangeTextInfo(AfterComma, NameLoc, SM, LO);
+}
+
+static void stripLeadingComma(RangeTextInfo &Info) {
+ if (Info.Text.empty())
+ return;
+ // Overlapping ranges in multi-declarator typedefs can leave a leading comma
+ // in the captured suffix. Drop it so the replacement doesn't reintroduce it.
+ const size_t NonWs = Info.Text.find_first_not_of(" \t\n\r\f\v");
+ if (NonWs != std::string::npos && Info.Text[NonWs] == ',')
+ Info.Text.erase(0, NonWs + 1);
+}
+
static constexpr StringRef ExternCDeclName = "extern-c-decl";
static constexpr StringRef ParentDeclName = "parent-decl";
static constexpr StringRef TagDeclName = "tag-decl";
@@ -130,69 +253,59 @@ void UseUsingCheck::check(const MatchFinder::MatchResult
&Result) {
const TypeLoc TL = MatchedDecl->getTypeSourceInfo()->getTypeLoc();
- bool FunctionPointerCase = false;
- auto [Type, QualifierStr] = [MatchedDecl, this, &TL, &FunctionPointerCase,
- &SM,
- &LO]() -> std::pair<std::string, std::string> {
- SourceRange TypeRange = TL.getSourceRange();
+ struct TypeInfo {
+ SourceRange Range;
+ bool FunctionPointerCase = false;
+ std::string Type;
+ std::string Qualifier;
+ };
+
+ const TypeInfo TI = [&] {
+ TypeInfo Info;
+ Info.Range = TL.getSourceRange();
// Function pointer case, get the left and right side of the identifier
// without the identifier.
- if (TypeRange.fullyContains(MatchedDecl->getLocation())) {
- FunctionPointerCase = true;
- SourceLocation StartLoc = MatchedDecl->getLocation();
- SourceLocation EndLoc = MatchedDecl->getLocation();
-
- while (true) {
- const std::optional<Token> Prev =
- utils::lexer::getPreviousToken(StartLoc, SM, LO);
- const std::optional<Token> Next =
- utils::lexer::findNextTokenSkippingComments(EndLoc, SM, LO);
- if (!Prev || Prev->isNot(tok::l_paren) || !Next ||
- Next->isNot(tok::r_paren))
- break;
-
- StartLoc = Prev->getLocation();
- EndLoc = Next->getLocation();
- }
-
- const auto RangeLeftOfIdentifier =
- CharSourceRange::getCharRange(TypeRange.getBegin(), StartLoc);
- const auto RangeRightOfIdentifier = CharSourceRange::getCharRange(
- Lexer::getLocForEndOfToken(EndLoc, 0, SM, LO),
- Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO));
- const std::string VerbatimType =
- (Lexer::getSourceText(RangeLeftOfIdentifier, SM, LO) +
- Lexer::getSourceText(RangeRightOfIdentifier, SM, LO))
- .str();
- return {VerbatimType, ""};
+ if (Info.Range.fullyContains(MatchedDecl->getLocation())) {
+ Info.FunctionPointerCase = true;
+ Info.Type = getFunctionPointerTypeText(
+ Info.Range, MatchedDecl->getLocation(), SM, LO);
+ return Info;
}
- StringRef ExtraReference = "";
- if (MainTypeEndLoc.isValid() && TypeRange.fullyContains(MainTypeEndLoc)) {
+ std::string ExtraReference;
+ if (MainTypeEndLoc.isValid() && Info.Range.fullyContains(MainTypeEndLoc)) {
// Each type introduced in a typedef can specify being a reference or
// pointer type separately, so we need to figure out if the new
using-decl
// needs to be to a reference or pointer as well.
- const SourceLocation Tok = utils::lexer::findPreviousAnyTokenKind(
+ const SourceLocation Tok = lexer::findPreviousAnyTokenKind(
MatchedDecl->getLocation(), SM, LO, tok::TokenKind::star,
tok::TokenKind::amp, tok::TokenKind::comma,
tok::TokenKind::kw_typedef);
- ExtraReference = Lexer::getSourceText(
+ ExtraReference = lexer::getSourceText(
CharSourceRange::getCharRange(Tok, Tok.getLocWithOffset(1)), SM, LO);
if (ExtraReference != "*" && ExtraReference != "&")
ExtraReference = "";
- TypeRange.setEnd(MainTypeEndLoc);
+ Info.Range.setEnd(MainTypeEndLoc);
}
- return {
- Lexer::getSourceText(CharSourceRange::getTokenRange(TypeRange), SM, LO)
- .str(),
- ExtraReference.str()};
+
+ Info.Type =
lexer::getSourceText(CharSourceRange::getTokenRange(Info.Range),
+ SM, LO);
+ Info.Qualifier = ExtraReference;
+ return Info;
}();
+
+ const SourceRange TypeRange = TI.Range;
+ const bool FunctionPointerCase = TI.FunctionPointerCase;
+ std::string Type = TI.Type;
+ const std::string QualifierStr = TI.Qualifier;
const StringRef Name = MatchedDecl->getName();
+ const SourceLocation NameLoc = MatchedDecl->getLocation();
SourceRange ReplaceRange = MatchedDecl->getSourceRange();
+ const SourceLocation PrevReplacementEnd = LastReplacementEnd;
// typedefs with multiple comma-separated definitions produce multiple
// consecutive TypedefDecl nodes whose SourceRanges overlap. Each range
starts
@@ -201,10 +314,13 @@ void UseUsingCheck::check(const MatchFinder::MatchResult
&Result) {
// But also we need to check that the ranges belong to the same file because
// different files may contain overlapping ranges.
std::string Using = "using ";
- if (ReplaceRange.getBegin().isMacroID() ||
+ const bool IsFirstTypedefInGroup =
+ ReplaceRange.getBegin().isMacroID() ||
(Result.SourceManager->getFileID(ReplaceRange.getBegin()) !=
Result.SourceManager->getFileID(LastReplacementEnd)) ||
- (ReplaceRange.getBegin() >= LastReplacementEnd)) {
+ (ReplaceRange.getBegin() >= LastReplacementEnd);
+
+ if (IsFirstTypedefInGroup) {
// This is the first (and possibly the only) TypedefDecl in a typedef. Save
// Type and Name in case we find subsequent TypedefDecl's in this typedef.
FirstTypedefType = Type;
@@ -223,6 +339,27 @@ void UseUsingCheck::check(const MatchFinder::MatchResult
&Result) {
Type = FirstTypedefName;
}
+ const RangeTextInfo LeadingTextInfo = getLeadingTextInfo(
+ IsFirstTypedefInGroup, ReplaceRange, TypeRange, SM, LO);
+ RangeTextInfo SuffixTextInfo =
+ getSuffixTextInfo(FunctionPointerCase, IsFirstTypedefInGroup,
+ PrevReplacementEnd, TypeRange, NameLoc, SM, LO);
+ if (!IsFirstTypedefInGroup)
+ stripLeadingComma(SuffixTextInfo);
+
+ const bool SuffixHasComment = SuffixTextInfo.Tokens.HasComment;
+ std::string SuffixText;
+ if (SuffixHasComment) {
+ SuffixText = SuffixTextInfo.Text;
+ } else if (QualifierStr.empty() && hasNonWhitespace(SuffixTextInfo.Text) &&
+ SuffixTextInfo.Tokens.HasPointerOrRef &&
+ !SuffixTextInfo.Tokens.HasIdentifier) {
+ // Only keep non-comment suffix text when it's purely pointer/ref trivia.
+ // This avoids accidentally pulling in keywords like 'typedef'.
+ SuffixText = SuffixTextInfo.Text;
+ }
+ const std::string QualifierText = SuffixHasComment ? "" : QualifierStr;
+
if (!ReplaceRange.getEnd().isMacroID()) {
const SourceLocation::IntTy Offset = FunctionPointerCase ? 0 : Name.size();
LastReplacementEnd = ReplaceRange.getEnd().getLocWithOffset(Offset);
@@ -235,14 +372,20 @@ void UseUsingCheck::check(const MatchFinder::MatchResult
&Result) {
if (LastTagDeclRange != LastTagDeclRanges.end() &&
LastTagDeclRange->second.isValid() &&
ReplaceRange.fullyContains(LastTagDeclRange->second)) {
- Type = std::string(Lexer::getSourceText(
- CharSourceRange::getTokenRange(LastTagDeclRange->second), SM, LO));
+ Type = lexer::getSourceText(
+ CharSourceRange::getTokenRange(LastTagDeclRange->second), SM, LO);
if (Type.empty())
return;
}
- const std::string Replacement =
- (Using + Name + " = " + Type + QualifierStr).str();
+ std::string TypeExpr =
+ LeadingTextInfo.Text + Type + QualifierText + SuffixText;
+ TypeExpr = StringRef(TypeExpr).rtrim(" \t").str();
+ StringRef Assign = " = ";
+ if (!TypeExpr.empty() &&
+ (TypeExpr.front() == ' ' || TypeExpr.front() == '\t'))
+ Assign = " =";
+ const std::string Replacement = (Using + Name + Assign + TypeExpr).str();
Diag << FixItHint::CreateReplacement(ReplaceRange, Replacement);
}
} // namespace clang::tidy::modernize
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index b77d985b76d77..6d374c6e17b63 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -10,6 +10,7 @@
#include "clang/Basic/SourceManager.h"
#include <optional>
#include <utility>
+#include <vector>
namespace clang::tidy::utils::lexer {
@@ -99,6 +100,146 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range,
return false;
}
+std::vector<CommentToken>
+getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
+ const LangOptions &LangOpts) {
+ std::vector<CommentToken> Comments;
+ if (!Range.isValid())
+ return Comments;
+
+ const CharSourceRange FileRange =
+ Lexer::makeFileCharRange(Range, SM, LangOpts);
+ if (!FileRange.isValid())
+ return Comments;
+
+ const std::pair<FileID, unsigned> BeginLoc =
+ SM.getDecomposedLoc(FileRange.getBegin());
+ const std::pair<FileID, unsigned> EndLoc =
+ SM.getDecomposedLoc(FileRange.getEnd());
+
+ if (BeginLoc.first != EndLoc.first)
+ return Comments;
+
+ bool Invalid = false;
+ const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid);
+ if (Invalid)
+ return Comments;
+
+ const char *StrData = Buffer.data() + BeginLoc.second;
+
+ Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), LangOpts,
+ Buffer.begin(), StrData, Buffer.end());
+ // Use raw lexing with comment retention so we can see comment tokens without
+ // preprocessing or macro expansion effects.
+ TheLexer.SetCommentRetentionState(true);
+
+ while (true) {
+ Token Tok;
+ if (TheLexer.LexFromRawLexer(Tok))
+ break;
+ if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() ||
+ SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation()))
+ break;
+
+ if (Tok.is(tok::...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/180372
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits