https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/143487

>From 5825b324e47c78a939d8e62d1101e1574fd04638 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziq...@udel.edu>
Date: Tue, 10 Jun 2025 15:50:10 +0800
Subject: [PATCH 1/2] [-Wunterminated-string-initialization] Handle C string
 literals ending with explicit '\0'

In C, a char array needs no "nonstring" attribute, if its initializer
is a string literal that 1) explicitly ends with '\0' and 2) fits in
the array after a possible truncation.

For example
`char a[4] = "ABC\0"; // fine, needs no "nonstring" attr`

rdar://152506883
---
 clang/lib/Sema/SemaInit.cpp           |  5 +++++
 clang/test/Sema/attr-nonstring_safe.c | 28 +++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 clang/test/Sema/attr-nonstring_safe.c

diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index da56225b2f926..f7592688e0327 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -260,6 +260,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, 
const ArrayType *AT,
              diag::ext_initializer_string_for_char_array_too_long)
           << Str->getSourceRange();
     else if (StrLength - 1 == ArrayLen) {
+      // If the string literal is null-terminated explicitly, e.g., `char a[4] 
=
+      // "ABC\0"`, there should be no warn:
+      if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()))
+        if (SL->isOrdinary() && SL->getBytes().back() == 0)
+          return;
       // If the entity being initialized has the nonstring attribute, then
       // silence the "missing nonstring" diagnostic. If there's no entity,
       // check whether we're initializing an array of arrays; if so, walk the
diff --git a/clang/test/Sema/attr-nonstring_safe.c 
b/clang/test/Sema/attr-nonstring_safe.c
new file mode 100644
index 0000000000000..3ea441e033dba
--- /dev/null
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization 
%s -x c
+// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization 
%s -x c++
+
+
+// In C, the following examples are fine:
+#if __cplusplus
+char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is 
too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+
+struct S {
+  char buf[3];
+  char fub[3];
+} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+
+signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for 
char array is too long, array size is 3 but initializer has size 4 (including 
the null terminating character)}}
+
+#else
+//expected-no-diagnostics
+char foo[3] = "fo\0";
+
+struct S {
+  char buf[3];
+  char fub[3];
+} s = { "ba\0", "bo\0" };
+
+signed char scfoo[3] = "fo\0";
+unsigned char ucfoo[3] = "fo\0";
+#endif

>From afb909341d4c5152f1d5ac6f2deef5385901bb61 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziq...@udel.edu>
Date: Wed, 11 Jun 2025 14:47:30 +0800
Subject: [PATCH 2/2] address comments

---
 clang/lib/Sema/SemaInit.cpp           |  9 +++---
 clang/test/Sema/attr-nonstring_safe.c | 45 +++++++++++++++++++++------
 2 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index f7592688e0327..ac611aed6d581 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -261,10 +261,11 @@ static void CheckStringInit(Expr *Str, QualType &DeclT, 
const ArrayType *AT,
           << Str->getSourceRange();
     else if (StrLength - 1 == ArrayLen) {
       // If the string literal is null-terminated explicitly, e.g., `char a[4] 
=
-      // "ABC\0"`, there should be no warn:
-      if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens()))
-        if (SL->isOrdinary() && SL->getBytes().back() == 0)
-          return;
+      // "ABC\0"`, there should be no warning:
+      if (const auto *SL = dyn_cast<StringLiteral>(Str->IgnoreParens());
+          SL && SL->getLength() > 0 &&
+          SL->getCodeUnit(SL->getLength() - 1) == 0)
+        return;
       // If the entity being initialized has the nonstring attribute, then
       // silence the "missing nonstring" diagnostic. If there's no entity,
       // check whether we're initializing an array of arrays; if so, walk the
diff --git a/clang/test/Sema/attr-nonstring_safe.c 
b/clang/test/Sema/attr-nonstring_safe.c
index 3ea441e033dba..93715d18db5a8 100644
--- a/clang/test/Sema/attr-nonstring_safe.c
+++ b/clang/test/Sema/attr-nonstring_safe.c
@@ -1,28 +1,53 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization 
%s -x c
-// RUN: %clang_cc1 -fsyntax-only -verify -Wunterminated-string-initialization 
%s -x c++
+// RUN: %clang_cc1 -fsyntax-only -verify=cxx,expected 
-Wunterminated-string-initialization %s -x c++
 
 
-// In C, the following examples are fine:
-#if __cplusplus
-char foo[3] = "fo\0"; // expected-error {{initializer-string for char array is 
too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+#ifdef __cplusplus
+// C++ is stricter so the following cases should be warned about:
+
+char foo3[3] = "fo\0"; // cxx-error {{initializer-string for char array is too 
long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+char foo1[1] = "\0";   // cxx-error {{initializer-string for char array is too 
long, array size is 1 but initializer has size 2 (including the null 
terminating character)}}
 
 struct S {
   char buf[3];
   char fub[3];
-} s = { "ba\0", "bo\0" }; // expected-error 2{{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
-
-signed char scfoo[3] = "fo\0"; // expected-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
-unsigned char ucfoo[3] = "fo\0"; // expected-error {{initializer-string for 
char array is too long, array size is 3 but initializer has size 4 (including 
the null terminating character)}}
+} s = { "ba\0", "bo\0" }; // cxx-error 2{{initializer-string for char array is 
too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
 
+signed char scfoo[3] = "fo\0"; // cxx-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+unsigned char ucfoo[3] = "fo\0"; // cxx-error {{initializer-string for char 
array is too long, array size is 3 but initializer has size 4 (including the 
null terminating character)}}
+wchar_t wcfoo[3] = L"fo\0"; // cxx-error {{initializer-string for char array 
is too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+char16_t c16foo[3] = u"fo\0"; // cxx-error {{initializer-string for char array 
is too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
+char32_t c32foo[3] = U"fo\0"; // cxx-error {{initializer-string for char array 
is too long, array size is 3 but initializer has size 4 (including the null 
terminating character)}}
 #else
-//expected-no-diagnostics
-char foo[3] = "fo\0";
+
+// In C, the following examples are fine:
+#include <stddef.h>
+typedef unsigned short char16_t;
+typedef unsigned int char32_t;
+
+char foo3[3] = "fo\0";
+char foo1[1] = "\0";
 
 struct S {
   char buf[3];
   char fub[3];
 } s = { "ba\0", "bo\0" };
 
+// Test different encodings:
 signed char scfoo[3] = "fo\0";
 unsigned char ucfoo[3] = "fo\0";
+wchar_t wcfoo[3] = L"fo\0";
+char16_t c16foo[3] = u"fo\0";
+char32_t c32foo[3] = U"fo\0";
+
+// Test list initializer:
+signed char scfoo_lst[3] = {'f', 'o', '\0'};
+unsigned char ucfoo_lst[3] = {'f', 'o', '\0'};
+wchar_t wcfoo_lst[3] = {L'f', L'o', L'\0'};
+char16_t c16foo_lst[3] = {u'f', u'o', u'\0'};
+char32_t c32foo_lst[3] = {U'f', U'o', U'\0'};
+
+// Declaring an array of size 0 is invalid by C standard but compilers
+// may allow it:
+char a[0] = ""; // expected-warning {{initializer-string for character array 
is too long, array size is 0 but initializer has size 1 (including the null 
terminating character); did you mean to use the 'nonstring' attribute?}}
 #endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to