https://github.com/a-tarasyuk updated https://github.com/llvm/llvm-project/pull/154018
>From d9cd8679c7594da4dc31ca736a439b113a1239d6 Mon Sep 17 00:00:00 2001 From: Oleksandr Tarasiuk <oleksandr.taras...@outlook.com> Date: Sun, 17 Aug 2025 16:00:48 +0300 Subject: [PATCH 1/2] [Clang] improve -Wstring-concatenation to warn on every missing comma in initializer lists --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/Sema/SemaDecl.cpp | 45 +++++++++++++++++++-------------- clang/test/Sema/string-concat.c | 14 ++++++++++ 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b35f4ea42818a..48575b6e821f0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -162,6 +162,8 @@ Improvements to Clang's diagnostics an override of a virtual method. - Fixed fix-it hint for fold expressions. Clang now correctly places the suggested right parenthesis when diagnosing malformed fold expressions. (#GH151787) +- ``-Wstring-concatenation`` now diagnoses every missing comma in an initializer list, + rather than stopping after the first. (#GH153745) - Fixed an issue where emitted format-signedness diagnostics were not associated with an appropriate diagnostic id. Besides being incorrect from an API standpoint, this was user visible, e.g.: diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 8ddbaf34a7f47..b63c157d81dc8 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -14708,7 +14708,16 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { isa<InitListExpr>(var->getInit())) { const auto *ILE = cast<InitListExpr>(var->getInit()); unsigned NumInits = ILE->getNumInits(); - if (NumInits > 2) + if (NumInits > 2) { + auto concatenatedPartsAt = [&](unsigned Index) -> unsigned { + const Expr *E = ILE->getInit(Index); + if (E) { + if (const auto *S = dyn_cast<StringLiteral>(E->IgnoreImpCasts())) + return S->getNumConcatenated(); + } + return 0; + }; + for (unsigned I = 0; I < NumInits; ++I) { const auto *Init = ILE->getInit(I); if (!Init) @@ -14721,24 +14730,23 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { // Diagnose missing comma in string array initialization. // Do not warn when all the elements in the initializer are concatenated // together. Do not warn for macros too. - if (NumConcat == 2 && !SL->getBeginLoc().isMacroID()) { - bool OnlyOneMissingComma = true; - for (unsigned J = I + 1; J < NumInits; ++J) { - const auto *Init = ILE->getInit(J); - if (!Init) - break; - const auto *SLJ = dyn_cast<StringLiteral>(Init->IgnoreImpCasts()); - if (!SLJ || SLJ->getNumConcatenated() > 1) { - OnlyOneMissingComma = false; - break; - } - } + if (NumConcat == 2) { + if (SL->getBeginLoc().isMacroID()) + continue; + + unsigned L = I > 0 ? concatenatedPartsAt(I - 1) : 0; + unsigned R = I + 1 < NumInits ? concatenatedPartsAt(I + 1) : 0; + + // Skip neighbors with multi-part concatenations. + if (L > 1 || R > 1) + continue; - if (OnlyOneMissingComma) { + // Diagnose when at least one neighbor is a single literal. + if (L || R) { SmallVector<FixItHint, 1> Hints; - for (unsigned i = 0; i < NumConcat - 1; ++i) - Hints.push_back(FixItHint::CreateInsertion( - PP.getLocForEndOfToken(SL->getStrTokenLoc(i)), ",")); + // Insert a comma between the two tokens of this element. + Hints.push_back(FixItHint::CreateInsertion( + PP.getLocForEndOfToken(SL->getStrTokenLoc(0)), ", ")); Diag(SL->getStrTokenLoc(1), diag::warn_concatenated_literal_array_init) @@ -14746,10 +14754,9 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { Diag(SL->getBeginLoc(), diag::note_concatenated_string_literal_silence); } - // In any case, stop now. - break; } } + } } diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c index 63abf100c020f..ae6c0897878a7 100644 --- a/clang/test/Sema/string-concat.c +++ b/clang/test/Sema/string-concat.c @@ -168,3 +168,17 @@ const char *extra_parens_to_suppress_warning[] = { "promise"), "shared_future" }; + +const char *multiple_missing_commas[] = { + "1", + "2" // expected-note {{place parentheses around the string literal to silence warning}} + "3", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}} + "4", + "5", + "6" // expected-note {{place parentheses around the string literal to silence warning}} + "7", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}} + "8", + "9", + "10", + "11", +}; >From b62c1448055b380e2ca1a2d1895f23e3a7aa6988 Mon Sep 17 00:00:00 2001 From: Oleksandr Tarasiuk <oleksandr.taras...@outlook.com> Date: Mon, 18 Aug 2025 14:06:24 +0300 Subject: [PATCH 2/2] add test for multiple consecutive missing commas --- clang/lib/Sema/SemaDecl.cpp | 8 +++----- clang/test/Sema/string-concat.c | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index b63c157d81dc8..98485cf9e72be 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -14710,11 +14710,9 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { unsigned NumInits = ILE->getNumInits(); if (NumInits > 2) { auto concatenatedPartsAt = [&](unsigned Index) -> unsigned { - const Expr *E = ILE->getInit(Index); - if (E) { + if (const Expr *E = ILE->getInit(Index)) if (const auto *S = dyn_cast<StringLiteral>(E->IgnoreImpCasts())) return S->getNumConcatenated(); - } return 0; }; @@ -14738,11 +14736,11 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { unsigned R = I + 1 < NumInits ? concatenatedPartsAt(I + 1) : 0; // Skip neighbors with multi-part concatenations. - if (L > 1 || R > 1) + if (R > 1) continue; // Diagnose when at least one neighbor is a single literal. - if (L || R) { + if (R == 1 || L == 1) { SmallVector<FixItHint, 1> Hints; // Insert a comma between the two tokens of this element. Hints.push_back(FixItHint::CreateInsertion( diff --git a/clang/test/Sema/string-concat.c b/clang/test/Sema/string-concat.c index ae6c0897878a7..4b52a74116b49 100644 --- a/clang/test/Sema/string-concat.c +++ b/clang/test/Sema/string-concat.c @@ -169,7 +169,7 @@ const char *extra_parens_to_suppress_warning[] = { "shared_future" }; -const char *multiple_missing_commas[] = { +const char *multiple_missing_commas1[] = { "1", "2" // expected-note {{place parentheses around the string literal to silence warning}} "3", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}} @@ -182,3 +182,17 @@ const char *multiple_missing_commas[] = { "10", "11", }; + +const char *multiple_missing_commas2[] = { + "1", + "2" + "3" + "4" + "5", + "6" // expected-note {{place parentheses around the string literal to silence warning}} + "7", // expected-warning {{suspicious concatenation of string literals in an array initialization; did you mean to separate the elements with a comma?}} + "8", + "9", + "10", + "11", +}; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits