https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/149886
>From c613019535f3251d3201b0f1408988366f5732c2 Mon Sep 17 00:00:00 2001 From: Devon Loehr <[email protected]> Date: Mon, 21 Jul 2025 19:07:23 +0000 Subject: [PATCH 01/14] Make special case matcher slash-agnostic --- clang/docs/SanitizerSpecialCaseList.rst | 1 + clang/unittests/Basic/DiagnosticTest.cpp | 23 +++++++++++++++++++++++ llvm/docs/ReleaseNotes.md | 4 ++++ llvm/include/llvm/Support/GlobPattern.h | 1 + llvm/lib/Support/GlobPattern.cpp | 4 ++++ 5 files changed, 33 insertions(+) diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index 307c001664fba..f2a04dc9adcf1 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -174,6 +174,7 @@ tool-specific docs. # Lines starting with # are ignored. # Turn off checks for the source file # Entries without sections are placed into [*] and apply to all sanitizers + # "/" matches both windows and unix path separators ("/" and "\") src:path/to/source/file.c src:*/source/file.c # Turn off checks for this main file, including files included by it. diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 4b3af00c3b0ce..a6557b1e35c4b 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -360,4 +360,27 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) { clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS); EXPECT_THAT(diags(), IsEmpty()); } + +TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) { + llvm::StringLiteral SuppressionMappingFile = R"( + [unused] + src:*clang/* + src:*clang/lib/Sema/*=emit + src:*clang/lib\\Sema/foo*)"; + Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt"; + FS->addFile("foo.txt", /*ModificationTime=*/{}, + llvm::MemoryBuffer::getMemBuffer(SuppressionMappingFile)); + clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS); + EXPECT_THAT(diags(), IsEmpty()); + + EXPECT_TRUE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang/lib/Basic/foo.h)"))); + EXPECT_FALSE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang/lib/Sema\bar.h)"))); + EXPECT_TRUE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)"))); + // The third pattern requires a literal backslash before Sema + EXPECT_FALSE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)"))); +} } // namespace diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 85c16b9c33f10..6ff8d18b07e84 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -174,6 +174,10 @@ Changes to BOLT Changes to Sanitizers --------------------- +* The [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format) + now treats forward slashes as either a forward or a backslash, to handle + paths with mixed unix and window styles. + Other Changes ------------- diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 62ed4a0f23fd9..af92c63331282 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -35,6 +35,7 @@ namespace llvm { /// expansions are not supported. If \p MaxSubPatterns is empty then /// brace expansions are not supported and characters `{,}` are treated as /// literals. +/// * `/` matches both unix and windows path separators: `/` and `\`. /// * `\` escapes the next character so it is treated as a literal. /// /// Some known edge cases are: diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 7004adf461a0c..26b3724863ee8 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -231,6 +231,10 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const { ++S; continue; } + } else if (*P == '/' && (*S == '/' || *S == '\\')) { + ++P; + ++S; + continue; } else if (*P == *S || *P == '?') { ++P; ++S; >From a81b5509f3d9633eee2276c2242c595378d1cfdc Mon Sep 17 00:00:00 2001 From: Devon Loehr <[email protected]> Date: Tue, 22 Jul 2025 14:57:34 +0000 Subject: [PATCH 02/14] Enable only for special case list --- llvm/include/llvm/Support/GlobPattern.h | 9 +++++++-- llvm/lib/Support/GlobPattern.cpp | 12 +++++++----- llvm/lib/Support/SpecialCaseList.cpp | 3 ++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index af92c63331282..2729ba9a56649 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -56,8 +56,10 @@ class GlobPattern { /// \param MaxSubPatterns if provided limit the number of allowed subpatterns /// created from expanding braces otherwise disable /// brace expansion + /// \param IsSlashAgnostic whether to treat '/' as matching '\\' as well LLVM_ABI static Expected<GlobPattern> - create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}); + create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}, + bool IsSlashAgnostic = false); /// \returns \p true if \p S matches this glob pattern LLVM_ABI bool match(StringRef S) const; @@ -76,7 +78,9 @@ class GlobPattern { struct SubGlobPattern { /// \param Pat the pattern to match against - LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat); + /// \param SlashAgnostic whether to treat '/' as matching '\\' as well + LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat, + bool SlashAgnostic); /// \returns \p true if \p S matches this glob pattern LLVM_ABI bool match(StringRef S) const; StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } @@ -88,6 +92,7 @@ class GlobPattern { }; SmallVector<Bracket, 0> Brackets; SmallVector<char, 0> Pat; + bool IsSlashAgnostic; }; SmallVector<SubGlobPattern, 1> SubGlobs; }; diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 26b3724863ee8..4aa30a81c3fbf 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,8 +132,9 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) { return std::move(SubPatterns); } -Expected<GlobPattern> -GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { +Expected<GlobPattern> GlobPattern::create(StringRef S, + std::optional<size_t> MaxSubPatterns, + bool IsSlashAgnostic) { GlobPattern Pat; // Store the prefix that does not contain any metacharacter. @@ -147,7 +148,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats)) return std::move(Err); for (StringRef SubPat : SubPats) { - auto SubGlobOrErr = SubGlobPattern::create(SubPat); + auto SubGlobOrErr = SubGlobPattern::create(SubPat, IsSlashAgnostic); if (!SubGlobOrErr) return SubGlobOrErr.takeError(); Pat.SubGlobs.push_back(*SubGlobOrErr); @@ -157,8 +158,9 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { } Expected<GlobPattern::SubGlobPattern> -GlobPattern::SubGlobPattern::create(StringRef S) { +GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) { SubGlobPattern Pat; + Pat.IsSlashAgnostic = SlashAgnostic; // Parse brackets. Pat.Pat.assign(S.begin(), S.end()); @@ -231,7 +233,7 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const { ++S; continue; } - } else if (*P == '/' && (*S == '/' || *S == '\\')) { + } else if (IsSlashAgnostic && *P == '/' && (*S == '/' || *S == '\\')) { ++P; ++S; continue; diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 8d4e043bc1c9f..c597f03188507 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -59,7 +59,8 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber, Glob->LineNo = LineNumber; // We must be sure to use the string in `Glob` rather than the provided // reference which could be destroyed before match() is called - if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024) + if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024, + /*IsSlashAgnostic=*/true) .moveInto(Glob->Pattern)) return Err; Globs.push_back(std::move(Glob)); >From 7bfc6ad6afbcafe9f230d3854b28b4406f83537b Mon Sep 17 00:00:00 2001 From: Devon Loehr <[email protected]> Date: Fri, 5 Sep 2025 16:48:17 +0000 Subject: [PATCH 03/14] Enable only on windows --- clang/docs/SanitizerSpecialCaseList.rst | 2 +- clang/unittests/Basic/DiagnosticTest.cpp | 3 +++ llvm/docs/ReleaseNotes.md | 4 ++-- llvm/include/llvm/Support/GlobPattern.h | 3 ++- llvm/lib/Support/SpecialCaseList.cpp | 6 +++++- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index f2a04dc9adcf1..e14b654536b8a 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -174,7 +174,7 @@ tool-specific docs. # Lines starting with # are ignored. # Turn off checks for the source file # Entries without sections are placed into [*] and apply to all sanitizers - # "/" matches both windows and unix path separators ("/" and "\") + # On windows, "/" matches both styles of path separator ("/" and "\") src:path/to/source/file.c src:*/source/file.c # Turn off checks for this main file, including files included by it. diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index a6557b1e35c4b..7e9653bdd3c7e 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -361,6 +361,8 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) { EXPECT_THAT(diags(), IsEmpty()); } +#ifdef _WIN32 +// We're only slash-agnostic on windows hosts TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) { llvm::StringLiteral SuppressionMappingFile = R"( [unused] @@ -383,4 +385,5 @@ TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) { EXPECT_FALSE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)"))); } +#endif } // namespace diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 6ff8d18b07e84..ae6222d2fa145 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -174,9 +174,9 @@ Changes to BOLT Changes to Sanitizers --------------------- -* The [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format) +* On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format) now treats forward slashes as either a forward or a backslash, to handle - paths with mixed unix and window styles. + paths with mixed unix and windows styles. Other Changes ------------- diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 2729ba9a56649..4abd6b1874593 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -35,7 +35,8 @@ namespace llvm { /// expansions are not supported. If \p MaxSubPatterns is empty then /// brace expansions are not supported and characters `{,}` are treated as /// literals. -/// * `/` matches both unix and windows path separators: `/` and `\`. +/// * If IsSlashAgnostic is passed, `/` matches both unix and windows path +/// separators: `/` and `\`. /// * `\` escapes the next character so it is treated as a literal. /// /// Some known edge cases are: diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index c597f03188507..89ec193e1991c 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -18,6 +18,8 @@ #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/TargetParser/Host.h" +#include "llvm/TargetParser/Triple.h" #include <stdio.h> #include <string> #include <system_error> @@ -57,10 +59,12 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber, auto Glob = std::make_unique<Matcher::Glob>(); Glob->Name = Pattern.str(); Glob->LineNo = LineNumber; + // Backslashes are valid in posix-style filenames. + bool IsSlashAgnostic = Triple(sys::getDefaultTargetTriple()).isOSWindows(); // We must be sure to use the string in `Glob` rather than the provided // reference which could be destroyed before match() is called if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024, - /*IsSlashAgnostic=*/true) + /*IsSlashAgnostic=*/IsSlashAgnostic) .moveInto(Glob->Pattern)) return Err; Globs.push_back(std::move(Glob)); >From a5eaf5880c295a6a50f0be504d3c421641f28932 Mon Sep 17 00:00:00 2001 From: Devon Loehr <[email protected]> Date: Mon, 8 Sep 2025 15:00:04 +0000 Subject: [PATCH 04/14] Incorporate review feedback --- clang/docs/SanitizerSpecialCaseList.rst | 2 +- clang/lib/Basic/Diagnostic.cpp | 5 ++++- clang/lib/Basic/SanitizerSpecialCaseList.cpp | 2 +- clang/unittests/Basic/DiagnosticTest.cpp | 12 ++++++----- llvm/docs/ReleaseNotes.md | 4 ++-- llvm/include/llvm/Support/GlobPattern.h | 19 +++++++---------- llvm/include/llvm/Support/SpecialCaseList.h | 5 +++-- llvm/lib/Support/GlobPattern.cpp | 21 +++++++++---------- llvm/lib/Support/SpecialCaseList.cpp | 22 +++++++++++--------- 9 files changed, 48 insertions(+), 44 deletions(-) diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index e14b654536b8a..752602c1b3093 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -174,7 +174,7 @@ tool-specific docs. # Lines starting with # are ignored. # Turn off checks for the source file # Entries without sections are placed into [*] and apply to all sanitizers - # On windows, "/" matches both styles of path separator ("/" and "\") + # On windows, "/" also matches "\" in filenames src:path/to/source/file.c src:*/source/file.c # Turn off checks for this main file, including files included by it. diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index dc3778bbf339c..9dd133cb4c03e 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -622,6 +622,8 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId, bool WarningsSpecialCaseList::globsMatches( const llvm::StringMap<Matcher> &CategoriesToMatchers, StringRef FilePath) const { + static bool HaveWindowsPathStyle = + llvm::sys::path::is_style_windows(llvm::sys::path::Style::native); StringRef LongestMatch; bool LongestIsPositive = false; for (const auto &Entry : CategoriesToMatchers) { @@ -631,7 +633,8 @@ bool WarningsSpecialCaseList::globsMatches( for (const auto &Glob : Matcher.Globs) { if (Glob->Name.size() < LongestMatch.size()) continue; - if (!Glob->Pattern.match(FilePath)) + if (!Glob->Pattern.match(FilePath, + /*IsSlashAgnostic=*/HaveWindowsPathStyle)) continue; LongestMatch = Glob->Name; LongestIsPositive = IsPositive; diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp index f7bc1d5545d75..4ad35d4d73fdd 100644 --- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp +++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp @@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() { SanitizerMask Mask; #define SANITIZER(NAME, ID) \ - if (S.SectionMatcher->match(NAME)) \ + if (S.SectionMatcher->match(NAME, /*IsFilename=*/false)) \ Mask |= SanitizerKind::ID; #define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID) diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 7e9653bdd3c7e..2af86b6a5ef38 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -363,12 +363,13 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) { #ifdef _WIN32 // We're only slash-agnostic on windows hosts -TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) { +TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) { llvm::StringLiteral SuppressionMappingFile = R"( [unused] src:*clang/* src:*clang/lib/Sema/*=emit - src:*clang/lib\\Sema/foo*)"; + src:*clang/lib\\Sema/foo* + fun:suppress/me)"; Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt"; FS->addFile("foo.txt", /*ModificationTime=*/{}, llvm::MemoryBuffer::getMemBuffer(SuppressionMappingFile)); @@ -376,12 +377,13 @@ TEST_F(SuppressionMappingTest, ForwardSlashMatchesBothDirections) { EXPECT_THAT(diags(), IsEmpty()); EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Basic/foo.h)"))); + diag::warn_unused_function, locForFile(R"(clang/lib/Basic/bar.h)"))); EXPECT_FALSE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Sema\bar.h)"))); + diag::warn_unused_function, locForFile(R"(clang/lib/Sema\baz.h)"))); + + // We require a literal backslash before "Sema" EXPECT_TRUE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)"))); - // The third pattern requires a literal backslash before Sema EXPECT_FALSE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)"))); } diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index ae6222d2fa145..39c2a82239fe0 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -175,8 +175,8 @@ Changes to Sanitizers --------------------- * On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format) - now treats forward slashes as either a forward or a backslash, to handle - paths with mixed unix and windows styles. + now treats forward slashes in filenames as matching either a forward or a + backslash, to accommodate paths with mixed unix and windows styles. Other Changes ------------- diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 4abd6b1874593..5fd1e0764cc7a 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -35,9 +35,9 @@ namespace llvm { /// expansions are not supported. If \p MaxSubPatterns is empty then /// brace expansions are not supported and characters `{,}` are treated as /// literals. -/// * If IsSlashAgnostic is passed, `/` matches both unix and windows path -/// separators: `/` and `\`. /// * `\` escapes the next character so it is treated as a literal. +/// * If \p IsSlashAgnostic is passed to the match function, then forward +/// slashes `/` also match backslashes `\`. /// /// Some known edge cases are: /// * The literal `]` is allowed as the first character in a character class, @@ -57,12 +57,11 @@ class GlobPattern { /// \param MaxSubPatterns if provided limit the number of allowed subpatterns /// created from expanding braces otherwise disable /// brace expansion - /// \param IsSlashAgnostic whether to treat '/' as matching '\\' as well LLVM_ABI static Expected<GlobPattern> - create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}, - bool IsSlashAgnostic = false); + create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}); + /// \param IsSlashAgnostic whether to treat '/' as also matching '\' /// \returns \p true if \p S matches this glob pattern - LLVM_ABI bool match(StringRef S) const; + LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic = false) const; // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). @@ -79,11 +78,10 @@ class GlobPattern { struct SubGlobPattern { /// \param Pat the pattern to match against - /// \param SlashAgnostic whether to treat '/' as matching '\\' as well - LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat, - bool SlashAgnostic); + LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat); + /// \param IsSlashAgnostic whether to treat '/' as also matching '\' /// \returns \p true if \p S matches this glob pattern - LLVM_ABI bool match(StringRef S) const; + LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic) const; StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } // Brackets with their end position and matched bytes. @@ -93,7 +91,6 @@ class GlobPattern { }; SmallVector<Bracket, 0> Brackets; SmallVector<char, 0> Pat; - bool IsSlashAgnostic; }; SmallVector<SubGlobPattern, 1> SubGlobs; }; diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index 22a62eac9e01a..951f27eed8ee8 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -124,8 +124,9 @@ class SpecialCaseList { LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber, bool UseRegex); // Returns the line number in the source file that this query matches to. - // Returns zero if no match is found. - LLVM_ABI unsigned match(StringRef Query) const; + // On windows, treat '/' as also matching '\' in filenames when using globs. + // Returns zero if no match is found + LLVM_ABI unsigned match(StringRef Query, bool IsFilename) const; struct Glob { std::string Name; diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 4aa30a81c3fbf..578c0dd0760d2 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -132,9 +132,8 @@ parseBraceExpansions(StringRef S, std::optional<size_t> MaxSubPatterns) { return std::move(SubPatterns); } -Expected<GlobPattern> GlobPattern::create(StringRef S, - std::optional<size_t> MaxSubPatterns, - bool IsSlashAgnostic) { +Expected<GlobPattern> +GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { GlobPattern Pat; // Store the prefix that does not contain any metacharacter. @@ -148,7 +147,7 @@ Expected<GlobPattern> GlobPattern::create(StringRef S, if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats)) return std::move(Err); for (StringRef SubPat : SubPats) { - auto SubGlobOrErr = SubGlobPattern::create(SubPat, IsSlashAgnostic); + auto SubGlobOrErr = SubGlobPattern::create(SubPat); if (!SubGlobOrErr) return SubGlobOrErr.takeError(); Pat.SubGlobs.push_back(*SubGlobOrErr); @@ -158,9 +157,8 @@ Expected<GlobPattern> GlobPattern::create(StringRef S, } Expected<GlobPattern::SubGlobPattern> -GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) { +GlobPattern::SubGlobPattern::create(StringRef S) { SubGlobPattern Pat; - Pat.IsSlashAgnostic = SlashAgnostic; // Parse brackets. Pat.Pat.assign(S.begin(), S.end()); @@ -192,21 +190,22 @@ GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) { return Pat; } -bool GlobPattern::match(StringRef S) const { +bool GlobPattern::match(StringRef S, bool IsSlashAgnostic) const { if (!S.consume_front(Prefix)) return false; if (SubGlobs.empty() && S.empty()) return true; for (auto &Glob : SubGlobs) - if (Glob.match(S)) + if (Glob.match(S, IsSlashAgnostic)) return true; return false; } // Factor the pattern into segments split by '*'. The segment is matched -// sequentianlly by finding the first occurrence past the end of the previous +// sequentially by finding the first occurrence past the end of the previous // match. -bool GlobPattern::SubGlobPattern::match(StringRef Str) const { +bool GlobPattern::SubGlobPattern::match(StringRef Str, + bool IsSlashAgnostic) const { const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(), *SavedS = S; const char *const PEnd = P + Pat.size(), *const End = S + Str.size(); @@ -233,7 +232,7 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str) const { ++S; continue; } - } else if (IsSlashAgnostic && *P == '/' && (*S == '/' || *S == '\\')) { + } else if (IsSlashAgnostic && *P == '/' && *S == '\\') { ++P; ++S; continue; diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 89ec193e1991c..c65cb977c005c 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -17,9 +17,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" -#include "llvm/TargetParser/Host.h" -#include "llvm/TargetParser/Triple.h" #include <stdio.h> #include <string> #include <system_error> @@ -59,21 +58,22 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber, auto Glob = std::make_unique<Matcher::Glob>(); Glob->Name = Pattern.str(); Glob->LineNo = LineNumber; - // Backslashes are valid in posix-style filenames. - bool IsSlashAgnostic = Triple(sys::getDefaultTargetTriple()).isOSWindows(); // We must be sure to use the string in `Glob` rather than the provided // reference which could be destroyed before match() is called - if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024, - /*IsSlashAgnostic=*/IsSlashAgnostic) + if (auto Err = GlobPattern::create(Glob->Name, /*MaxSubPatterns=*/1024) .moveInto(Glob->Pattern)) return Err; Globs.push_back(std::move(Glob)); return Error::success(); } -unsigned SpecialCaseList::Matcher::match(StringRef Query) const { +unsigned SpecialCaseList::Matcher::match(StringRef Query, + bool IsFilename) const { + static bool HaveWindowsPathStyle = + llvm::sys::path::is_style_windows(llvm::sys::path::Style::native); for (const auto &Glob : reverse(Globs)) - if (Glob->Pattern.match(Query)) + if (Glob->Pattern.match( + Query, /*IsSlashAgnostic=*/(HaveWindowsPathStyle && IsFilename))) return Glob->LineNo; for (const auto &[Regex, LineNumber] : reverse(RegExes)) if (Regex->match(Query)) @@ -223,7 +223,8 @@ std::pair<unsigned, unsigned> SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { for (const auto &S : reverse(Sections)) { - if (S.SectionMatcher->match(Section)) { + bool IsFilename = Prefix == "src" || Prefix == "mainfile"; + if (S.SectionMatcher->match(Section, IsFilename)) { unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category); if (Blame) return {S.FileIdx, Blame}; @@ -242,7 +243,8 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, if (II == I->second.end()) return 0; - return II->getValue().match(Query); + bool IsFilename = Prefix == "src" || Prefix == "mainfile"; + return II->getValue().match(Query, IsFilename); } } // namespace llvm >From 47236f1866050ac23126a64a3d340b755fa89918 Mon Sep 17 00:00:00 2001 From: Devon Loehr <[email protected]> Date: Thu, 2 Oct 2025 14:56:49 +0000 Subject: [PATCH 05/14] Add glob pattern unittest --- llvm/lib/Support/GlobPattern.cpp | 2 +- llvm/unittests/Support/GlobPatternTest.cpp | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index 578c0dd0760d2..e19c8c84d19a9 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -137,7 +137,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { GlobPattern Pat; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\"); + size_t PrefixSize = S.find_first_of("?*[{\\/"); Pat.Prefix = S.substr(0, PrefixSize); if (PrefixSize == std::string::npos) return Pat; diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index e4f1025b00956..ba33e233c70bc 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -271,4 +271,13 @@ TEST_F(GlobPatternTest, Pathological) { EXPECT_FALSE(Pat->match(S)); EXPECT_TRUE(Pat->match(S + 'b')); } + +TEST_F(GlobPatternTest, SlashAgnostic) { + auto Pat = GlobPattern::create("clang/*"); + ASSERT_TRUE((bool)Pat); + EXPECT_TRUE(Pat->match("clang/foo")); + EXPECT_FALSE(Pat->match(R"(clang\foo)")); + EXPECT_TRUE(Pat->match("clang/foo", /*isSlashAgnostic=*/true)); + EXPECT_TRUE(Pat->match(R"(clang\foo)", /*isSlashAgnostic=*/true)); +} } >From 8535a1144ebad85b6282cac531d6048c7f45b4d7 Mon Sep 17 00:00:00 2001 From: Devon Loehr <[email protected]> Date: Mon, 6 Oct 2025 17:48:19 +0000 Subject: [PATCH 06/14] Canonicalize special case list filenames when loading --- clang/docs/SanitizerSpecialCaseList.rst | 18 ++++++++++- clang/lib/Basic/Diagnostic.cpp | 21 ++++++++----- clang/lib/Basic/SanitizerSpecialCaseList.cpp | 2 +- clang/unittests/Basic/DiagnosticTest.cpp | 16 +++++++--- llvm/docs/ReleaseNotes.md | 7 +++-- llvm/include/llvm/Support/GlobPattern.h | 8 ++--- llvm/include/llvm/Support/SpecialCaseList.h | 8 ++--- llvm/lib/Support/GlobPattern.cpp | 15 +++------ llvm/lib/Support/SpecialCaseList.cpp | 31 ++++++++++--------- llvm/unittests/Support/GlobPatternTest.cpp | 9 ------ .../unittests/Support/SpecialCaseListTest.cpp | 18 +++++++++++ 11 files changed, 92 insertions(+), 61 deletions(-) diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index 752602c1b3093..f5c45c1b81df0 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -174,7 +174,6 @@ tool-specific docs. # Lines starting with # are ignored. # Turn off checks for the source file # Entries without sections are placed into [*] and apply to all sanitizers - # On windows, "/" also matches "\" in filenames src:path/to/source/file.c src:*/source/file.c # Turn off checks for this main file, including files included by it. @@ -197,6 +196,23 @@ tool-specific docs. [{cfi-vcall,cfi-icall}] fun:*BadCfiCall + +.. note:: + + By default, ``src`` and ``mainfile`` are matched against the filename as seen + by LLVM. On Windows, this might involve a mix of forward and backslashes as + file separators, and writing patterns to match both variants can be + inconvenient. + + If the special case list file begins with ``#!canonical-paths``, then paths + will be canonicalized before patterns are matched against them. This involves + stripping any leading dots and slashes, and (on Windows only) converting all + backslashes to forward slashes. + + If the file uses both ``#!special-case-list-v1`` and ``#!canonical-paths``, + then they should occupy the first two lines, and ``#!canonical-paths`` must + appear on the second line. + ``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but does not need plumbing into the build system. This works well for internal linkage functions but has a caveat for C++ vague linkage functions. diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 9dd133cb4c03e..71762d10aefa6 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -612,18 +612,24 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId, SrcEntriesIt->getValue(); // We also use presumed locations here to improve reproducibility for // preprocessed inputs. - if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid()) - return globsMatches( - CategoriesToMatchers, - llvm::sys::path::remove_leading_dotslash(PLoc.getFilename())); + if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid()) { + if (CanonicalizePaths) { + return globsMatches( + CategoriesToMatchers, + llvm::sys::path::convert_to_slash( + llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()))); + } else { + return globsMatches( + CategoriesToMatchers, + llvm::sys::path::remove_leading_dotslash(PLoc.getFilename())); + } + } return false; } bool WarningsSpecialCaseList::globsMatches( const llvm::StringMap<Matcher> &CategoriesToMatchers, StringRef FilePath) const { - static bool HaveWindowsPathStyle = - llvm::sys::path::is_style_windows(llvm::sys::path::Style::native); StringRef LongestMatch; bool LongestIsPositive = false; for (const auto &Entry : CategoriesToMatchers) { @@ -633,8 +639,7 @@ bool WarningsSpecialCaseList::globsMatches( for (const auto &Glob : Matcher.Globs) { if (Glob->Name.size() < LongestMatch.size()) continue; - if (!Glob->Pattern.match(FilePath, - /*IsSlashAgnostic=*/HaveWindowsPathStyle)) + if (!Glob->Pattern.match(FilePath)) continue; LongestMatch = Glob->Name; LongestIsPositive = IsPositive; diff --git a/clang/lib/Basic/SanitizerSpecialCaseList.cpp b/clang/lib/Basic/SanitizerSpecialCaseList.cpp index 4ad35d4d73fdd..f7bc1d5545d75 100644 --- a/clang/lib/Basic/SanitizerSpecialCaseList.cpp +++ b/clang/lib/Basic/SanitizerSpecialCaseList.cpp @@ -42,7 +42,7 @@ void SanitizerSpecialCaseList::createSanitizerSections() { SanitizerMask Mask; #define SANITIZER(NAME, ID) \ - if (S.SectionMatcher->match(NAME, /*IsFilename=*/false)) \ + if (S.SectionMatcher->match(NAME)) \ Mask |= SanitizerKind::ID; #define SANITIZER_GROUP(NAME, ID, ALIAS) SANITIZER(NAME, ID) diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 2af86b6a5ef38..2e052b9e5eaf3 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -362,9 +362,8 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) { } #ifdef _WIN32 -// We're only slash-agnostic on windows hosts -TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) { - llvm::StringLiteral SuppressionMappingFile = R"( +TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) { + llvm::StringLiteral SuppressionMappingFile = R"(#!canonical-paths [unused] src:*clang/* src:*clang/lib/Sema/*=emit @@ -378,14 +377,21 @@ TEST_F(SuppressionMappingTest, TreatsFilesAsSlashAgnosticOnWindows) { EXPECT_TRUE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang/lib/Basic/bar.h)"))); + EXPECT_TRUE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang/lib/Basic\bar.h)"))); + EXPECT_TRUE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang\lib/Basic/bar.h)"))); + EXPECT_FALSE(Diags.isSuppressedViaMapping( + diag::warn_unused_function, locForFile(R"(clang/lib/Sema/baz.h)"))); EXPECT_FALSE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang/lib/Sema\baz.h)"))); - // We require a literal backslash before "Sema" - EXPECT_TRUE(Diags.isSuppressedViaMapping( + // The backslash gets canonicalized so we never match the third pattern + EXPECT_FALSE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)"))); EXPECT_FALSE(Diags.isSuppressedViaMapping( diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)"))); } #endif + } // namespace diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 39c2a82239fe0..5a7f41ef3f0dd 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -174,9 +174,10 @@ Changes to BOLT Changes to Sanitizers --------------------- -* On windows hosts, the [sanitizer special case list format](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html#format) - now treats forward slashes in filenames as matching either a forward or a - backslash, to accommodate paths with mixed unix and windows styles. +* (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html] + may now be prefixed with ``#!canonical-paths`` to specify that filename patterns + should be matched against canonicalized paths, without leading dots or slashes + and (on Windows only) without any backslashes. Other Changes ------------- diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 5fd1e0764cc7a..62ed4a0f23fd9 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -36,8 +36,6 @@ namespace llvm { /// brace expansions are not supported and characters `{,}` are treated as /// literals. /// * `\` escapes the next character so it is treated as a literal. -/// * If \p IsSlashAgnostic is passed to the match function, then forward -/// slashes `/` also match backslashes `\`. /// /// Some known edge cases are: /// * The literal `]` is allowed as the first character in a character class, @@ -59,9 +57,8 @@ class GlobPattern { /// brace expansion LLVM_ABI static Expected<GlobPattern> create(StringRef Pat, std::optional<size_t> MaxSubPatterns = {}); - /// \param IsSlashAgnostic whether to treat '/' as also matching '\' /// \returns \p true if \p S matches this glob pattern - LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic = false) const; + LLVM_ABI bool match(StringRef S) const; // Returns true for glob pattern "*". Can be used to avoid expensive // preparation/acquisition of the input for match(). @@ -79,9 +76,8 @@ class GlobPattern { struct SubGlobPattern { /// \param Pat the pattern to match against LLVM_ABI static Expected<SubGlobPattern> create(StringRef Pat); - /// \param IsSlashAgnostic whether to treat '/' as also matching '\' /// \returns \p true if \p S matches this glob pattern - LLVM_ABI bool match(StringRef S, bool IsSlashAgnostic) const; + LLVM_ABI bool match(StringRef S) const; StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } // Brackets with their end position and matched bytes. diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index 951f27eed8ee8..5c5df23a8623d 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -122,11 +122,10 @@ class SpecialCaseList { class Matcher { public: LLVM_ABI Error insert(StringRef Pattern, unsigned LineNumber, - bool UseRegex); + bool UseGlobs); // Returns the line number in the source file that this query matches to. - // On windows, treat '/' as also matching '\' in filenames when using globs. - // Returns zero if no match is found - LLVM_ABI unsigned match(StringRef Query, bool IsFilename) const; + // Returns zero if no match is found. + LLVM_ABI unsigned match(StringRef Query) const; struct Glob { std::string Name; @@ -155,6 +154,7 @@ class SpecialCaseList { }; std::vector<Section> Sections; + bool CanonicalizePaths = false; LLVM_ABI Expected<Section *> addSection(StringRef SectionStr, unsigned FileIdx, unsigned LineNo, diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index e19c8c84d19a9..7004adf461a0c 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -137,7 +137,7 @@ GlobPattern::create(StringRef S, std::optional<size_t> MaxSubPatterns) { GlobPattern Pat; // Store the prefix that does not contain any metacharacter. - size_t PrefixSize = S.find_first_of("?*[{\\/"); + size_t PrefixSize = S.find_first_of("?*[{\\"); Pat.Prefix = S.substr(0, PrefixSize); if (PrefixSize == std::string::npos) return Pat; @@ -190,22 +190,21 @@ GlobPattern::SubGlobPattern::create(StringRef S) { return Pat; } -bool GlobPattern::match(StringRef S, bool IsSlashAgnostic) const { +bool GlobPattern::match(StringRef S) const { if (!S.consume_front(Prefix)) return false; if (SubGlobs.empty() && S.empty()) return true; for (auto &Glob : SubGlobs) - if (Glob.match(S, IsSlashAgnostic)) + if (Glob.match(S)) return true; return false; } // Factor the pattern into segments split by '*'. The segment is matched -// sequentially by finding the first occurrence past the end of the previous +// sequentianlly by finding the first occurrence past the end of the previous // match. -bool GlobPattern::SubGlobPattern::match(StringRef Str, - bool IsSlashAgnostic) const { +bool GlobPattern::SubGlobPattern::match(StringRef Str) const { const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(), *SavedS = S; const char *const PEnd = P + Pat.size(), *const End = S + Str.size(); @@ -232,10 +231,6 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str, ++S; continue; } - } else if (IsSlashAgnostic && *P == '/' && *S == '\\') { - ++P; - ++S; - continue; } else if (*P == *S || *P == '?') { ++P; ++S; diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index c65cb977c005c..4de7478aaf53b 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -17,7 +17,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" #include <stdio.h> #include <string> @@ -67,13 +66,9 @@ Error SpecialCaseList::Matcher::insert(StringRef Pattern, unsigned LineNumber, return Error::success(); } -unsigned SpecialCaseList::Matcher::match(StringRef Query, - bool IsFilename) const { - static bool HaveWindowsPathStyle = - llvm::sys::path::is_style_windows(llvm::sys::path::Style::native); +unsigned SpecialCaseList::Matcher::match(StringRef Query) const { for (const auto &Glob : reverse(Globs)) - if (Glob->Pattern.match( - Query, /*IsSlashAgnostic=*/(HaveWindowsPathStyle && IsFilename))) + if (Glob->Pattern.match(Query)) return Glob->LineNo; for (const auto &[Regex, LineNumber] : reverse(RegExes)) if (Regex->match(Query)) @@ -158,12 +153,17 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, return false; } + // Scan the start of the file for special comments. These don't appear when + // iterating below because comment lines are automatically skipped. + StringRef Buffer = MB->getBuffer(); // In https://reviews.llvm.org/D154014 we added glob support and planned to // remove regex support in patterns. We temporarily support the original - // behavior using regexes if "#!special-case-list-v1" is the first line of the - // file. For more details, see + // behavior using regexes if "#!special-case-list-v1" is the first line of + // the file. For more details, see // https://discourse.llvm.org/t/use-glob-instead-of-regex-for-specialcaselists/71666 - bool UseGlobs = !MB->getBuffer().starts_with("#!special-case-list-v1\n"); + bool UseGlobs = !Buffer.consume_front("#!special-case-list-v1\n"); + // Specifies that patterns should be matched against canonicalized filepaths. + CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n"); for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); !LineIt.is_at_eof(); LineIt++) { @@ -223,8 +223,7 @@ std::pair<unsigned, unsigned> SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { for (const auto &S : reverse(Sections)) { - bool IsFilename = Prefix == "src" || Prefix == "mainfile"; - if (S.SectionMatcher->match(Section, IsFilename)) { + if (S.SectionMatcher->match(Section)) { unsigned Blame = inSectionBlame(S.Entries, Prefix, Query, Category); if (Blame) return {S.FileIdx, Blame}; @@ -243,8 +242,12 @@ unsigned SpecialCaseList::inSectionBlame(const SectionEntries &Entries, if (II == I->second.end()) return 0; - bool IsFilename = Prefix == "src" || Prefix == "mainfile"; - return II->getValue().match(Query, IsFilename); + if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) { + return II->getValue().match(llvm::sys::path::convert_to_slash( + llvm::sys::path::remove_leading_dotslash(Query))); + } else { + return II->getValue().match(Query); + } } } // namespace llvm diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index ba33e233c70bc..e4f1025b00956 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -271,13 +271,4 @@ TEST_F(GlobPatternTest, Pathological) { EXPECT_FALSE(Pat->match(S)); EXPECT_TRUE(Pat->match(S + 'b')); } - -TEST_F(GlobPatternTest, SlashAgnostic) { - auto Pat = GlobPattern::create("clang/*"); - ASSERT_TRUE((bool)Pat); - EXPECT_TRUE(Pat->match("clang/foo")); - EXPECT_FALSE(Pat->match(R"(clang\foo)")); - EXPECT_TRUE(Pat->match("clang/foo", /*isSlashAgnostic=*/true)); - EXPECT_TRUE(Pat->match(R"(clang\foo)", /*isSlashAgnostic=*/true)); -} } diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp index 5be2b9e3a7a5d..5fc077f3d94ac 100644 --- a/llvm/unittests/Support/SpecialCaseListTest.cpp +++ b/llvm/unittests/Support/SpecialCaseListTest.cpp @@ -372,4 +372,22 @@ TEST_F(SpecialCaseListTest, FileIdx) { sys::fs::remove(Path); } +#ifdef _WIN32 +TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) { + std::unique_ptr<SpecialCaseList> SCL = + makeSpecialCaseList("#!canonical-paths\n" + "\n" + "src:*foo/bar*\n" + "src:*foo\\\\baz\n" + "fun:hi\\\\bye=category\n"); + EXPECT_TRUE(SCL->inSection("", "src", "foo/bar")); + EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar")); + // The baz pattern doesn't match because paths are canonicalized first + EXPECT_FALSE(SCL->inSection("", "src", "foo/baz")); + EXPECT_FALSE(SCL->inSection("", "src", "foo\\baz")); + // The canonicalization only applies to files + EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category")); +} +#endif + } // namespace >From 85cdb803db0b6091a647ca340cadb269e366d02c Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 10:22:39 -0700 Subject: [PATCH 07/14] Resolve merge conflicts --- llvm/include/llvm/Support/SpecialCaseList.h | 2 +- llvm/lib/Support/SpecialCaseList.cpp | 35 +++++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index 97c08f84cc800..deda39d93199c 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -119,7 +119,7 @@ class SpecialCaseList { class Section { public: LLVM_ABI Section(StringRef Name, unsigned FileIdx, bool UseGlobs); - bool UseGlobs); + LLVM_ABI Section(Section &&); LLVM_ABI ~Section(); // Returns name of the section, its entire string in []. diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 1afa609784d22..66f0efc57029a 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" +#include "llvm/Support/Path.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" @@ -322,11 +323,14 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, std::string &Error) { + StringRef Buffer = MB->getBuffer(); unsigned long long Version = 2; + if (Buffer.consume_front("#!special-case-list-v")) { + consumeUnsignedInteger(Buffer, 10, Version); + Buffer = Buffer.ltrim(" \t\r\n"); + } - StringRef Header = MB->getBuffer(); - if (Header.consume_front("#!special-case-list-v")) - consumeUnsignedInteger(Header, 10, Version); + CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n"); // In https://reviews.llvm.org/D154014 we added glob support and planned // to remove regex support in patterns. We temporarily support the @@ -344,14 +348,9 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } Section::SectionImpl *CurrentImpl = ErrOrSection.get()->Impl.get(); - // Scan the start of the file for special comments. These don't appear when - // iterating below because comment lines are automatically skipped. - StringRef Buffer = MB->getBuffer(); - // behavior using regexes if "#!special-case-list-v1" is the first line of - // the file. For more details, see - bool UseGlobs = !Buffer.consume_front("#!special-case-list-v1\n"); - // Specifies that patterns should be matched against canonicalized filepaths. - CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n"); + // This is the current list of prefixes for all existing users matching file + // path. We may need parametrization in constructor in future. + constexpr StringRef PathPrefixes[] = {"src", "!src", "mainfile", "source"}; for (line_iterator LineIt(*MB, /*SkipBlanks=*/true, /*CommentMarker=*/'#'); !LineIt.is_at_eof(); LineIt++) { @@ -415,6 +414,12 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, std::pair<unsigned, unsigned> SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { + std::string CanonicalizedQuery; + if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) { + CanonicalizedQuery = llvm::sys::path::convert_to_slash( + llvm::sys::path::remove_leading_dotslash(Query)); + Query = CanonicalizedQuery; + } for (const auto &S : reverse(Sections)) { if (S.Impl->SectionMatcher.matchAny(Section)) { unsigned Blame = S.getLastMatch(Prefix, Query, Category); @@ -459,12 +464,8 @@ unsigned SpecialCaseList::Section::getLastMatch(StringRef Prefix, return 0; } - if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) { - return II->getValue().match(llvm::sys::path::convert_to_slash( - llvm::sys::path::remove_leading_dotslash(Query))); - } else { - return II->getValue().match(Query); - } +bool SpecialCaseList::Section::hasPrefix(StringRef Prefix) const { + return Impl->Entries.contains(Prefix); } } // namespace llvm >From d2345cfb80b7b4422d70e6be135ed3af5b918ef8 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 10:31:09 -0700 Subject: [PATCH 08/14] [SpecialCaseList] Replace #!canonical-paths with #!special-case-list-v4 --- clang/docs/SanitizerSpecialCaseList.rst | 7 ++----- clang/unittests/Basic/DiagnosticTest.cpp | 2 +- llvm/docs/ReleaseNotes.md | 6 +++--- llvm/lib/Support/SpecialCaseList.cpp | 2 +- llvm/unittests/Support/SpecialCaseListTest.cpp | 3 ++- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index dcda6283b6889..9fe070825f60d 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -238,15 +238,12 @@ tool-specific docs. file separators, and writing patterns to match both variants can be inconvenient. - If the special case list file begins with ``#!canonical-paths``, then paths + File path canonicalization is supported in version 4 or later (indicated by + starting the file with ``#!special-case-list-v4``). In this version, paths will be canonicalized before patterns are matched against them. This involves stripping any leading dots and slashes, and (on Windows only) converting all backslashes to forward slashes. - If the file uses both ``#!special-case-list-v1`` and ``#!canonical-paths``, - then they should occupy the first two lines, and ``#!canonical-paths`` must - appear on the second line. - ``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but does not need plumbing into the build system. This works well for internal linkage functions but has a caveat for C++ vague linkage functions. diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 4fea63f5f2a39..7e86e232493cd 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -417,7 +417,7 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) { #ifdef _WIN32 TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) { - llvm::StringLiteral SuppressionMappingFile = R"(#!canonical-paths + llvm::StringLiteral SuppressionMappingFile = R"(#!special-case-list-v4 [unused] src:*clang/* src:*clang/lib/Sema/*=emit diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 6a2c554c94d1f..c6af634682a36 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -292,9 +292,9 @@ Makes programs 10x faster by doing Special New Thing. a zero input results in poison. * (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html] - may now be prefixed with ``#!canonical-paths`` to specify that filename patterns - should be matched against canonicalized paths, without leading dots or slashes - and (on Windows only) without any backslashes. + version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename + patterns should be matched against canonicalized paths, without leading dots or + slashes and (on Windows only) without any backslashes. * Renamed G_CTLZ_ZERO_UNDEF to G_CTLZ_ZERO_POISON opcode to make it clear that diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 66f0efc57029a..b527314c111bf 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -330,7 +330,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, Buffer = Buffer.ltrim(" \t\r\n"); } - CanonicalizePaths = Buffer.consume_front("#!canonical-paths\n"); + CanonicalizePaths = Version > 3; // In https://reviews.llvm.org/D154014 we added glob support and planned // to remove regex support in patterns. We temporarily support the diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp index 54f55dd714273..508593e0bc380 100644 --- a/llvm/unittests/Support/SpecialCaseListTest.cpp +++ b/llvm/unittests/Support/SpecialCaseListTest.cpp @@ -419,7 +419,7 @@ TEST_F(SpecialCaseListTest, FileIdx) { #ifdef _WIN32 TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) { std::unique_ptr<SpecialCaseList> SCL = - makeSpecialCaseList("#!canonical-paths\n" + makeSpecialCaseList("#!special-case-list-v4\n" "\n" "src:*foo/bar*\n" "src:*foo\\\\baz\n" @@ -432,6 +432,7 @@ TEST_F(SpecialCaseListTest, CanonicalizePathsOnWindows) { // The canonicalization only applies to files EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category")); } + #endif } // namespace >From ecd5f9c22ff3e1aeefa44864be9c81b1b6dacc0f Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 10:58:45 -0700 Subject: [PATCH 09/14] Revert "[SpecialCaseList] Canonicalize paths in Matcher::match" --- clang/docs/ReleaseNotes.rst | 5 +++ clang/lib/Basic/Diagnostic.cpp | 15 +-------- llvm/docs/ReleaseNotes.md | 11 ++++--- llvm/include/llvm/Support/SpecialCaseList.h | 2 ++ llvm/lib/Support/SpecialCaseList.cpp | 34 +++++++++++---------- 5 files changed, 32 insertions(+), 35 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a569572af43ca..09fa6e4fc2e4e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -264,6 +264,11 @@ Non-comprehensive list of changes in this release - Added support for floating point and pointer values in most ``__atomic_`` builtins. +- Warning suppression mappings (``--warning-suppression-mappings``) now recognize + version 4 of the Special Case List format (indicated by ``#!special-case-list-v4``), + which specifies that filename patterns should be matched against canonicalized + paths (without leading dots or slashes, and on Windows, without backslashes). + - Added ``__builtin_stdc_rotate_left`` and ``__builtin_stdc_rotate_right`` for bit rotation of unsigned integers including ``_BitInt`` types. Rotation counts are normalized modulo the bit-width and support negative values. diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 24ad02df8159b..348c9927e87b9 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -585,22 +585,9 @@ bool WarningsSpecialCaseList::isDiagSuppressed(diag::kind DiagId, if (!DiagSection) return false; - StringRef F = llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()); - + StringRef F = PLoc.getFilename(); unsigned LastSup = DiagSection->getLastMatch("src", F, ""); if (LastSup == 0) - if (PresumedLoc PLoc = SM.getPresumedLoc(DiagLoc); PLoc.isValid()) { - if (CanonicalizePaths) { - return globsMatches( - CategoriesToMatchers, - llvm::sys::path::convert_to_slash( - llvm::sys::path::remove_leading_dotslash(PLoc.getFilename()))); - } else { - return globsMatches( - CategoriesToMatchers, - llvm::sys::path::remove_leading_dotslash(PLoc.getFilename())); - } - } return false; unsigned LastEmit = DiagSection->getLastMatch("src", F, "emit"); diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index c6af634682a36..a91af78704dfa 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -291,11 +291,7 @@ Makes programs 10x faster by doing Special New Thing. * Renamed ISD::CTTZ_ZERO_UNDEF to ISD::CTTZ_ZERO_POISON opcode to make it clear that a zero input results in poison. -* (Sanitizer Special Case Lists)[https://clang.llvm.org/docs/SanitizerSpecialCaseList.html] - version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename - patterns should be matched against canonicalized paths, without leading dots or - slashes and (on Windows only) without any backslashes. - +### Changes to the GlobalISel infrastructure * Renamed G_CTLZ_ZERO_UNDEF to G_CTLZ_ZERO_POISON opcode to make it clear that a zero input results in poison. @@ -406,6 +402,11 @@ Makes programs 10x faster by doing Special New Thing. * Add a random delay into ThreadSanitizer to help find rare thread interleavings. +* [Sanitizer Special Case Lists](https://clang.llvm.org/docs/SanitizerSpecialCaseList.html) + version 4 (indicated by ``#!special-case-list-v4``) now specifies that filename + patterns should be matched against canonicalized paths, without leading dots or + slashes and (on Windows only) without any backslashes. + ### Other Changes ## External Open Source Projects Using LLVM {{env.config.release}} diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index deda39d93199c..1fc23d763c300 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -105,6 +105,8 @@ class SpecialCaseList { inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category = StringRef()) const; + + protected: // Implementations of the create*() functions that can also be used by derived // classes. diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index b527314c111bf..d2e2d0b986eed 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -91,7 +91,7 @@ class GlobMatcher { /// Represents a set of patterns and their line numbers class Matcher { public: - Matcher(bool UseGlobs, bool RemoveDotSlash); + Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths); Error insert(StringRef Pattern, unsigned LineNumber); unsigned match(StringRef Query) const; @@ -100,6 +100,7 @@ class Matcher { std::variant<RegexMatcher, GlobMatcher> M; bool RemoveDotSlash; + bool CanonicalizePaths; }; Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) { @@ -219,8 +220,8 @@ unsigned GlobMatcher::match(StringRef Query) const { return Best < 0 ? 0 : Globs[Best].LineNo; } -Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash) - : RemoveDotSlash(RemoveDotSlash) { +Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths) + : RemoveDotSlash(RemoveDotSlash), CanonicalizePaths(CanonicalizePaths) { if (UseGlobs) M.emplace<GlobMatcher>(); else @@ -232,8 +233,14 @@ Error Matcher::insert(StringRef Pattern, unsigned LineNumber) { } unsigned Matcher::match(StringRef Query) const { - if (RemoveDotSlash) + std::string CanonicalizedQuery; + if (CanonicalizePaths) { + CanonicalizedQuery = llvm::sys::path::convert_to_slash( + llvm::sys::path::remove_leading_dotslash(Query)); + Query = CanonicalizedQuery; + } else if (RemoveDotSlash) { Query = llvm::sys::path::remove_leading_dotslash(Query); + } return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M); } } // namespace @@ -245,7 +252,7 @@ class SpecialCaseList::Section::SectionImpl { using SectionEntries = StringMap<StringMap<Matcher>>; explicit SectionImpl(bool UseGlobs) - : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false) {} + : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false, /*CanonicalizePaths=*/false) {} Matcher SectionMatcher; SectionEntries Entries; @@ -325,10 +332,8 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, std::string &Error) { StringRef Buffer = MB->getBuffer(); unsigned long long Version = 2; - if (Buffer.consume_front("#!special-case-list-v")) { + if (Buffer.consume_front("#!special-case-list-v")) consumeUnsignedInteger(Buffer, 10, Version); - Buffer = Buffer.ltrim(" \t\r\n"); - } CanonicalizePaths = Version > 3; @@ -387,9 +392,12 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } auto [Pattern, Category] = Postfix.split("="); + bool MatcherRemoveDotSlash = + RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix); + bool MatcherCanonicalizePaths = + CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix); auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace( - Category, UseGlobs, - RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix)); + Category, UseGlobs, MatcherRemoveDotSlash, MatcherCanonicalizePaths); Pattern = Pattern.copy(StrAlloc); if (auto Err = It->second.insert(Pattern, LineNo)) { Error = @@ -414,12 +422,6 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix, std::pair<unsigned, unsigned> SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category) const { - std::string CanonicalizedQuery; - if (CanonicalizePaths && (Prefix == "src" || Prefix == "mainfile")) { - CanonicalizedQuery = llvm::sys::path::convert_to_slash( - llvm::sys::path::remove_leading_dotslash(Query)); - Query = CanonicalizedQuery; - } for (const auto &S : reverse(Sections)) { if (S.Impl->SectionMatcher.matchAny(Section)) { unsigned Blame = S.getLastMatch(Prefix, Query, Category); >From 52373bb50f1626ff77bb1e83151dc60a45cf1364 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 12:29:06 -0700 Subject: [PATCH 10/14] [Support] Remove CanonicalizePaths member from SpecialCaseList --- llvm/include/llvm/Support/SpecialCaseList.h | 3 --- llvm/lib/Support/SpecialCaseList.cpp | 17 ++++++++--------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h index 1fc23d763c300..28e264f078d89 100644 --- a/llvm/include/llvm/Support/SpecialCaseList.h +++ b/llvm/include/llvm/Support/SpecialCaseList.h @@ -105,8 +105,6 @@ class SpecialCaseList { inSectionBlame(StringRef Section, StringRef Prefix, StringRef Query, StringRef Category = StringRef()) const; - - protected: // Implementations of the create*() functions that can also be used by derived // classes. @@ -155,7 +153,6 @@ class SpecialCaseList { private: BumpPtrAllocator StrAlloc; std::vector<Section> Sections; - bool CanonicalizePaths = false; LLVM_ABI Expected<Section *> addSection(StringRef SectionStr, unsigned FileIdx, unsigned LineNo, diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index d2e2d0b986eed..e45cb9d05f5f7 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -330,12 +330,13 @@ SpecialCaseList::addSection(StringRef SectionStr, unsigned FileNo, bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, std::string &Error) { - StringRef Buffer = MB->getBuffer(); unsigned long long Version = 2; - if (Buffer.consume_front("#!special-case-list-v")) - consumeUnsignedInteger(Buffer, 10, Version); - CanonicalizePaths = Version > 3; + StringRef Header = MB->getBuffer(); + if (Header.consume_front("#!special-case-list-v")) + consumeUnsignedInteger(Header, 10, Version); + + bool CanonicalizePaths = Version > 3; // In https://reviews.llvm.org/D154014 we added glob support and planned // to remove regex support in patterns. We temporarily support the @@ -392,12 +393,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } auto [Pattern, Category] = Postfix.split("="); - bool MatcherRemoveDotSlash = - RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix); - bool MatcherCanonicalizePaths = - CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix); auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace( - Category, UseGlobs, MatcherRemoveDotSlash, MatcherCanonicalizePaths); + Category, UseGlobs, + RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix), + CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix)); Pattern = Pattern.copy(StrAlloc); if (auto Err = It->second.insert(Pattern, LineNo)) { Error = >From fe17675058df4bd5d62141b405a0458f59e55869 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 12:35:18 -0700 Subject: [PATCH 11/14] [Support] Rename CanonicalizePaths to CanonicalizeSlashes --- llvm/lib/Support/SpecialCaseList.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index e45cb9d05f5f7..7c069adfa50dd 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -91,7 +91,7 @@ class GlobMatcher { /// Represents a set of patterns and their line numbers class Matcher { public: - Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths); + Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes); Error insert(StringRef Pattern, unsigned LineNumber); unsigned match(StringRef Query) const; @@ -100,7 +100,7 @@ class Matcher { std::variant<RegexMatcher, GlobMatcher> M; bool RemoveDotSlash; - bool CanonicalizePaths; + bool CanonicalizeSlashes; }; Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) { @@ -220,8 +220,8 @@ unsigned GlobMatcher::match(StringRef Query) const { return Best < 0 ? 0 : Globs[Best].LineNo; } -Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizePaths) - : RemoveDotSlash(RemoveDotSlash), CanonicalizePaths(CanonicalizePaths) { +Matcher::Matcher(bool UseGlobs, bool RemoveDotSlash, bool CanonicalizeSlashes) + : RemoveDotSlash(RemoveDotSlash), CanonicalizeSlashes(CanonicalizeSlashes) { if (UseGlobs) M.emplace<GlobMatcher>(); else @@ -234,13 +234,12 @@ Error Matcher::insert(StringRef Pattern, unsigned LineNumber) { unsigned Matcher::match(StringRef Query) const { std::string CanonicalizedQuery; - if (CanonicalizePaths) { - CanonicalizedQuery = llvm::sys::path::convert_to_slash( - llvm::sys::path::remove_leading_dotslash(Query)); + if (CanonicalizeSlashes) { + CanonicalizedQuery = llvm::sys::path::convert_to_slash(Query); Query = CanonicalizedQuery; - } else if (RemoveDotSlash) { - Query = llvm::sys::path::remove_leading_dotslash(Query); } + if (RemoveDotSlash) + Query = llvm::sys::path::remove_leading_dotslash(Query); return std::visit([&](auto &V) -> unsigned { return V.match(Query); }, M); } } // namespace @@ -252,7 +251,8 @@ class SpecialCaseList::Section::SectionImpl { using SectionEntries = StringMap<StringMap<Matcher>>; explicit SectionImpl(bool UseGlobs) - : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false, /*CanonicalizePaths=*/false) {} + : SectionMatcher(UseGlobs, /*RemoveDotSlash=*/false, + /*CanonicalizeSlashes=*/false) {} Matcher SectionMatcher; SectionEntries Entries; @@ -336,7 +336,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, if (Header.consume_front("#!special-case-list-v")) consumeUnsignedInteger(Header, 10, Version); - bool CanonicalizePaths = Version > 3; + bool CanonicalizeSlashes = Version > 3; // In https://reviews.llvm.org/D154014 we added glob support and planned // to remove regex support in patterns. We temporarily support the @@ -396,7 +396,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace( Category, UseGlobs, RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix), - CanonicalizePaths && llvm::is_contained(PathPrefixes, Prefix)); + CanonicalizeSlashes && llvm::is_contained(PathPrefixes, Prefix)); Pattern = Pattern.copy(StrAlloc); if (auto Err = It->second.insert(Pattern, LineNo)) { Error = >From be45576387c7f8ca89da65666e67b1bd29fe6722 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 12:40:32 -0700 Subject: [PATCH 12/14] Update docs --- clang/docs/ReleaseNotes.rst | 11 ++++++----- clang/docs/SanitizerSpecialCaseList.rst | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 09fa6e4fc2e4e..e7d891c3725ef 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -264,11 +264,6 @@ Non-comprehensive list of changes in this release - Added support for floating point and pointer values in most ``__atomic_`` builtins. -- Warning suppression mappings (``--warning-suppression-mappings``) now recognize - version 4 of the Special Case List format (indicated by ``#!special-case-list-v4``), - which specifies that filename patterns should be matched against canonicalized - paths (without leading dots or slashes, and on Windows, without backslashes). - - Added ``__builtin_stdc_rotate_left`` and ``__builtin_stdc_rotate_right`` for bit rotation of unsigned integers including ``_BitInt`` types. Rotation counts are normalized modulo the bit-width and support negative values. @@ -973,6 +968,12 @@ Sanitizers ---------- - UndefinedBehaviorSanitizer now supports ``__ubsan_default_suppressions``. +- Sanitizer Special Case Lists (``-fsanitize-ignorelist``) and warning + suppression mappings (``--warning-suppression-mappings``) now recognize version + 4 of the Special Case List format (indicated by ``#!special-case-list-v4``), + which specifies that filename patterns should be matched against canonicalized + paths (without leading dots or slashes, and on Windows, without backslashes). + Python Binding Changes ---------------------- - Add deprecation warnings to ``CompletionChunk.isKind...`` methods. diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index 9fe070825f60d..f23139f868235 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -237,7 +237,7 @@ tool-specific docs. by LLVM. On Windows, this might involve a mix of forward and backslashes as file separators, and writing patterns to match both variants can be inconvenient. - + File path canonicalization is supported in version 4 or later (indicated by starting the file with ``#!special-case-list-v4``). In this version, paths will be canonicalized before patterns are matched against them. This involves >From 85564ef434c6deec8b1291b8cda19bb1882e9727 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 12:59:18 -0700 Subject: [PATCH 13/14] windows only --- clang/docs/ReleaseNotes.rst | 4 ++-- clang/docs/SanitizerSpecialCaseList.rst | 7 ++----- llvm/lib/Support/SpecialCaseList.cpp | 9 +++++---- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e7d891c3725ef..a4299825f1fd7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -971,8 +971,8 @@ Sanitizers - Sanitizer Special Case Lists (``-fsanitize-ignorelist``) and warning suppression mappings (``--warning-suppression-mappings``) now recognize version 4 of the Special Case List format (indicated by ``#!special-case-list-v4``), - which specifies that filename patterns should be matched against canonicalized - paths (without leading dots or slashes, and on Windows, without backslashes). + which canonicalizes path separators by converting backslashes to forward slashes + on Windows hosts. Python Binding Changes ---------------------- diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index f23139f868235..40fa19b37d93b 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -238,11 +238,8 @@ tool-specific docs. file separators, and writing patterns to match both variants can be inconvenient. - File path canonicalization is supported in version 4 or later (indicated by - starting the file with ``#!special-case-list-v4``). In this version, paths - will be canonicalized before patterns are matched against them. This involves - stripping any leading dots and slashes, and (on Windows only) converting all - backslashes to forward slashes. + Starting with version 4 (indicated by ``#!special-case-list-v4``), path separators + on Windows hosts are canonicalized by converting backslashes to forward slashes. ``mainfile`` is similar to applying ``-fno-sanitize=`` to a set of files but does not need plumbing into the build system. This works well for internal diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 7c069adfa50dd..6be3d38ac18b6 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -336,7 +336,8 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, if (Header.consume_front("#!special-case-list-v")) consumeUnsignedInteger(Header, 10, Version); - bool CanonicalizeSlashes = Version > 3; + bool CanonicalizeSlashes = + Version > 3 && llvm::sys::path::is_separator('\\'); // In https://reviews.llvm.org/D154014 we added glob support and planned // to remove regex support in patterns. We temporarily support the @@ -393,10 +394,10 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, } auto [Pattern, Category] = Postfix.split("="); + bool IsPath = llvm::is_contained(PathPrefixes, Prefix); auto [It, _] = CurrentImpl->Entries[Prefix].try_emplace( - Category, UseGlobs, - RemoveDotSlash && llvm::is_contained(PathPrefixes, Prefix), - CanonicalizeSlashes && llvm::is_contained(PathPrefixes, Prefix)); + Category, UseGlobs, RemoveDotSlash && IsPath, + CanonicalizeSlashes && IsPath); Pattern = Pattern.copy(StrAlloc); if (auto Err = It->second.insert(Pattern, LineNo)) { Error = >From 005fa63fe8ddac69f291b0700399bd88feece998 Mon Sep 17 00:00:00 2001 From: Vitaly Buka <[email protected]> Date: Tue, 9 Jun 2026 12:59:49 -0700 Subject: [PATCH 14/14] format --- llvm/lib/Support/SpecialCaseList.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index 6be3d38ac18b6..11a752c68acfd 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -23,8 +23,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" -#include "llvm/Support/Path.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -336,8 +336,7 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, if (Header.consume_front("#!special-case-list-v")) consumeUnsignedInteger(Header, 10, Version); - bool CanonicalizeSlashes = - Version > 3 && llvm::sys::path::is_separator('\\'); + bool CanonicalizeSlashes = Version > 3 && llvm::sys::path::is_separator('\\'); // In https://reviews.llvm.org/D154014 we added glob support and planned // to remove regex support in patterns. We temporarily support the _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
