https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/143487
>From 5825b324e47c78a939d8e62d1101e1574fd04638 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Tue, 10 Jun 2025 15:50:10 +0800 Subject: [PATCH 1/3] [-Wunterminated-string-initialization] Handle C string literals ending with explicit '\0' In C, a char array needs no "nonstring" attribute, if its initializer is a string literal that 1) explicitly ends with '\0' and 2) fits in the array after a possible truncation. For example `char a[4] = "ABC\0"; // fine, needs no "nonstring" attr` rdar://152506883 --- clang/lib/Sema/SemaInit.cpp | 5 +++++ clang/test/Sema/attr-nonstring_safe.c | 28 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 clang/test/Sema/attr-nonstring_safe.c diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index da56225b2f926..f7592688e0327 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -260,6 +260,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, diag::ext_initializer_string_for_char_array_too_long) << Str->getSourceRange(); else if (StrLength - 1 == ArrayLen) { + // If the string literal is null-terminated explicitly, e.g., `char a[4] = + // "ABC\0"`, there should be no warn: + if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens())) + if (SL->isOrdinary() && SL->getBytes().back() == 0) + return; // If the entity being initialized has the nonstring attribute, then // silence the "missing nonstring" diagnostic. If there's no entity, // check whether we're initializing an array of arrays; if so, walk the diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c new file mode 100644 index 0000000000000..3ea441e033dba --- /dev/null +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c +// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c++ + + +// In C, the following examples are fine: +#if __cplusplus +char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +struct S { + char buf[3]; + char fub[3]; +} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} + +#else +//expected-no-diagnostics +char foo[3] = "fo\0"; + +struct S { + char buf[3]; + char fub[3]; +} s = { "ba\0", "bo\0" }; + +signed char scfoo[3] = "fo\0"; +unsigned char ucfoo[3] = "fo\0"; +#endif >From afb909341d4c5152f1d5ac6f2deef5385901bb61 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Wed, 11 Jun 2025 14:47:30 +0800 Subject: [PATCH 2/3] address comments --- clang/lib/Sema/SemaInit.cpp | 9 +++--- clang/test/Sema/attr-nonstring_safe.c | 45 +++++++++++++++++++++------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index f7592688e0327..ac611aed6d581 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -261,10 +261,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, const ArrayType *AT, << Str->getSourceRange(); else if (StrLength - 1 == ArrayLen) { // If the string literal is null-terminated explicitly, e.g., `char a[4] = - // "ABC\0"`, there should be no warn: - if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens())) - if (SL->isOrdinary() && SL->getBytes().back() == 0) - return; + // "ABC\0"`, there should be no warning: + if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()); + SL && SL->getLength() > 0 && + SL->getCodeUnit(SL->getLength() - 1) == 0) + return; // If the entity being initialized has the nonstring attribute, then // silence the "missing nonstring" diagnostic. If there's no entity, // check whether we're initializing an array of arrays; if so, walk the diff --git a/clang/test/Sema/attr-nonstring_safe.c b/clang/test/Sema/attr-nonstring_safe.c index 3ea441e033dba..93715d18db5a8 100644 --- a/clang/test/Sema/attr-nonstring_safe.c +++ b/clang/test/Sema/attr-nonstring_safe.c @@ -1,28 +1,53 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c -// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization %s -x c++ +// RUN: %clang_cc1 -fsyntax-only -verify=cxx,expected -Wunterminated-string-initialization %s -x c++ -// In C, the following examples are fine: -#if __cplusplus -char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +#ifdef __cplusplus +// C++ is stricter so the following cases should be warned about: + +char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +char foo1[1] = "\0"; // cxx-error {{initializer-string for char array is too long, array size is 1 but initializer has size 2 (including the null terminating character)}} struct S { char buf[3]; char fub[3]; -} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} - -signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} -unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} +char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array is too long, array size is 3 but initializer has size 4 (including the null terminating character)}} #else -//expected-no-diagnostics -char foo[3] = "fo\0"; + +// In C, the following examples are fine: +#include <stddef.h> +typedef unsigned short char16_t; +typedef unsigned int char32_t; + +char foo3[3] = "fo\0"; +char foo1[1] = "\0"; struct S { char buf[3]; char fub[3]; } s = { "ba\0", "bo\0" }; +// Test different encodings: signed char scfoo[3] = "fo\0"; unsigned char ucfoo[3] = "fo\0"; +wchar_t wcfoo[3] = L"fo\0"; +char16_t c16foo[3] = u"fo\0"; +char32_t c32foo[3] = U"fo\0"; + +// Test list initializer: +signed char scfoo_lst[3] = {'f', 'o', '\0'}; +unsigned char ucfoo_lst[3] = {'f', 'o', '\0'}; +wchar_t wcfoo_lst[3] = {L'f', L'o', L'\0'}; +char16_t c16foo_lst[3] = {u'f', u'o', u'\0'}; +char32_t c32foo_lst[3] = {U'f', U'o', U'\0'}; + +// Declaring an array of size 0 is invalid by C standard but compilers +// may allow it: +char a[0] = ""; // expected-warning {{initializer-string for character array is too long, array size is 0 but initializer has size 1 (including the null terminating character); did you mean to use the 'nonstring' attribute?}} #endif >From fca602a4c18fdfe1ada285ea096e3436d7fa8253 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <ziq...@udel.edu> Date: Wed, 11 Jun 2025 14:53:19 +0800 Subject: [PATCH 3/3] add release notes --- clang/docs/ReleaseNotes.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index beed0da6883d6..13e4db89e5dc8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -220,6 +220,7 @@ C Language Changes char buf1[3] = "foo"; // -Wunterminated-string-initialization char buf2[3] = "flarp"; // -Wexcess-initializers + char buf3[3] = "fo\0"; // This is fine, no warning. This diagnostic can be suppressed by adding the new ``nonstring`` attribute to the field or variable being initialized. #GH137705 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits