https://github.com/ojhunt updated 
https://github.com/llvm/llvm-project/pull/173140

>From b769a72dbbd2f775b3cdaa756bc72db9ac81f4ad Mon Sep 17 00:00:00 2001
From: Oliver Hunt <[email protected]>
Date: Fri, 19 Dec 2025 20:33:17 -0800
Subject: [PATCH] [clang] Add support for consteval null terminated strings

Adds support for null terminated strings produced by constexpr
evaluation. This makes it possible to perform analysis of format
strings that previously were not possible, and is needed in the
future to support __ptrauth qualifier options.
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  7 ++-
 clang/include/clang/Sema/Sema.h               |  9 ++--
 clang/lib/AST/ByteCode/Context.cpp            |  8 +--
 clang/lib/Sema/SemaDeclCXX.cpp                | 42 +++++++++++++++
 clang/test/Parser/asm.cpp                     | 29 ++++++++++-
 clang/test/SemaCXX/gnu-asm-constexpr.cpp      |  4 +-
 clang/test/SemaCXX/static-assert-cxx26.cpp    | 51 ++++++++++++++++++-
 7 files changed, 137 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 51b6eba965103..4c36462a08580 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -1747,7 +1747,7 @@ def subst_user_defined_msg : TextSubstitution<
   "%select{a static assertion|this asm operand}0">;
 
 def err_user_defined_msg_invalid : Error<
-  "%sub{subst_user_defined_msg}0 must be a string literal or an "
+  "%sub{subst_user_defined_msg}0 must be a null terminated constant string or 
an "
   "object with 'data()' and 'size()' member functions">;
 def err_user_defined_msg_missing_member_function : Error<
   "the %select{message|string}0 object in "
@@ -1755,6 +1755,11 @@ def err_user_defined_msg_missing_member_function : Error<
   "a 'size()' member function|"
   "a 'data()' member function|"
   "'data()' and 'size()' member functions}1">;
+def err_user_defined_msg_not_null_terminated_string : Error<
+  "%sub{subst_user_defined_msg}0 is not null terminated">;
+def ext_consteval_string_constants : Extension<
+  "consteval string constants are an extension">, DefaultWarn,
+  InGroup<DiagGroup<"consteval-string-constants-extension">>;
 def err_user_defined_msg_invalid_mem_fn_ret_ty : Error<
   "%sub{subst_user_defined_msg}0 must have a '%select{size|data}1()' member "
   "function returning an object convertible to '%select{std::size_t|const char 
*}1'">;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 91b07aa500b86..9d00aa5edebc9 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -831,10 +831,11 @@ enum class CCEKind {
   ArrayBound,    ///< Array bound in array declarator or new-expression.
   ExplicitBool,  ///< Condition in an explicit(bool) specifier.
   Noexcept,      ///< Condition in a noexcept(bool) specifier.
-  StaticAssertMessageSize, ///< Call to size() in a static assert
-                           ///< message.
-  StaticAssertMessageData, ///< Call to data() in a static assert
-                           ///< message.
+  StaticAssertMessageSize,          ///< Call to size() in a static assert
+                                    ///< message.
+  StaticAssertMessageData,          ///< Call to data() in a static assert
+                                    ///< message.
+  StaticAssertNullTerminatedString, ///< tryEvaluateStrLen
 };
 
 /// Enums for the diagnostics of target, target_version and target_clones.
diff --git a/clang/lib/AST/ByteCode/Context.cpp 
b/clang/lib/AST/ByteCode/Context.cpp
index 74ec986e49ca7..208fcb2a2732e 100644
--- a/clang/lib/AST/ByteCode/Context.cpp
+++ b/clang/lib/AST/ByteCode/Context.cpp
@@ -294,13 +294,15 @@ bool Context::evaluateStrlen(State &Parent, const Expr 
*E, uint64_t &Result) {
     if (!FieldDesc->isPrimitiveArray())
       return false;
 
-    if (Ptr.isDummy() || Ptr.isUnknownSizeArray())
+    if (Ptr.isDummy() || Ptr.isUnknownSizeArray() || Ptr.isPastEnd())
       return false;
 
     unsigned N = Ptr.getNumElems();
     if (Ptr.elemSize() == 1) {
-      Result = strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), N);
-      return Result != N;
+      unsigned Size = N - Ptr.getIndex();
+      Result =
+          strnlen(reinterpret_cast<const char *>(Ptr.getRawAddress()), Size);
+      return Result != Size;
     }
 
     PrimType ElemT = FieldDesc->getPrimType();
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 4da431f19acec..45cdf9563235c 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -17693,6 +17693,44 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) {
   }
 }
 
+template <typename ResultType>
+static bool EvaluateAsNullTerminatedCharBuffer(
+    Sema &SemaRef, Expr *Message, ResultType &Result, ASTContext &Ctx,
+    Sema::StringEvaluationContext EvalContext, bool ErrorOnInvalidMessage) {
+  SourceLocation Loc = Message->getBeginLoc();
+  QualType SizeT = Ctx.getSizeType();
+  QualType ConstCharPtr = Ctx.getPointerType(Ctx.getConstType(Ctx.CharTy));
+  Expr::EvalResult Status;
+  SmallVector<PartialDiagnosticAt, 8> Notes;
+  Status.Diag = &Notes;
+
+  auto DiagnoseInvalidConstantString = [&]() {
+    SemaRef.Diag(Loc, diag::err_user_defined_msg_not_null_terminated_string)
+        << EvalContext;
+    for (const auto &Note : Notes)
+      SemaRef.Diag(Note.first, Note.second);
+    return !ErrorOnInvalidMessage;
+  };
+  ExprResult EvaluatedData = SemaRef.BuildConvertedConstantExpression(
+      Message, ConstCharPtr, CCEKind::StaticAssertNullTerminatedString);
+  if (EvaluatedData.isInvalid())
+    return DiagnoseInvalidConstantString();
+
+  uint64_t Length = 0;
+  if (!EvaluatedData.get()->tryEvaluateStrLen(Length, Ctx))
+    return DiagnoseInvalidConstantString();
+
+  llvm::APInt SizeVal(Ctx.getIntWidth(SizeT), Length);
+  Expr *SizeExpr = IntegerLiteral::Create(Ctx, SizeVal, SizeT, Loc);
+
+  bool EvalResult = Message->EvaluateCharRangeAsString(
+      Result, SizeExpr, EvaluatedData.get(), Ctx, Status);
+  if (!EvalResult || !Notes.empty())
+    return DiagnoseInvalidConstantString();
+  SemaRef.Diag(Loc, diag::ext_consteval_string_constants);
+  return true;
+}
+
 template <typename ResultType>
 static bool EvaluateAsStringImpl(Sema &SemaRef, Expr *Message,
                                  ResultType &Result, ASTContext &Ctx,
@@ -17726,6 +17764,10 @@ static bool EvaluateAsStringImpl(Sema &SemaRef, Expr 
*Message,
 
   SourceLocation Loc = Message->getBeginLoc();
   QualType T = Message->getType().getNonReferenceType();
+  if (T->isPointerType() && T->getPointeeType()->isCharType())
+    return EvaluateAsNullTerminatedCharBuffer(
+        SemaRef, Message, Result, Ctx, EvalContext, ErrorOnInvalidMessage);
+
   auto *RD = T->getAsCXXRecordDecl();
   if (!RD) {
     SemaRef.Diag(Loc, diag::err_user_defined_msg_invalid) << EvalContext;
diff --git a/clang/test/Parser/asm.cpp b/clang/test/Parser/asm.cpp
index cf23b6f42a077..517af5e777ce6 100644
--- a/clang/test/Parser/asm.cpp
+++ b/clang/test/Parser/asm.cpp
@@ -35,6 +35,9 @@ struct string_view {
 int foo1 asm ((string_view("test"))); // expected-error {{expected string 
literal in 'asm'}}
 int func() asm ((string_view("test"))); // expected-error {{expected string 
literal in 'asm'}}
 
+constexpr const char* getConstantString(const char* s) {
+  return s;
+}
 
 void f2() {
   asm(string_view("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
@@ -44,6 +47,13 @@ void f2() {
   asm("" :: string_view("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
   asm(::string_view("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
 
+  asm(getConstantString("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
+  asm("" : getConstantString("")); // expected-error {{expected string literal 
or parenthesized constant expression in 'asm'}}
+  asm("" : : getConstantString("")); // expected-error {{expected string 
literal or parenthesized constant expression in 'asm'}}
+  asm("" : : : getConstantString("")); // expected-error {{expected ')'}}
+  asm("" :: getConstantString("")); // expected-error {{expected string 
literal or parenthesized constant expression in 'asm'}}
+  asm(::getConstantString("")); // expected-error {{expected string literal or 
parenthesized constant expression in 'asm'}}
+
   int i;
 
   asm((string_view("")));
@@ -55,5 +65,22 @@ void f2() {
   asm("" : (::string_view("+g")) (i) : (::string_view("g")) (0) : 
(string_view("memory")));
 
 
-  asm((0)); // expected-error {{the expression in this asm operand must be a 
string literal or an object with 'data()' and 'size()' member functions}}
+  asm((getConstantString("")));
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm((::getConstantString("")));
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i));
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g"))); // expected-error {{expected '(' 
after 'asm operand'}}
+  // expected-warning@-1 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0));
+  // expected-warning@-1 2 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g"))); // 
expected-error {{expected '(' after 'asm operand'}}
+  // expected-warning@-1 2 {{consteval string constants are an extension}}
+  asm("" : (::getConstantString("+g")) (i) : (::getConstantString("g")) (0) : 
(getConstantString("memory")));
+  // expected-warning@-1 3 {{consteval string constants are an extension}}
+
+
+
+  asm((0)); // expected-error {{the expression in this asm operand must be a 
null terminated constant string or an object with 'data()' and 'size()' member 
functions}}
 }
diff --git a/clang/test/SemaCXX/gnu-asm-constexpr.cpp 
b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
index 77466df12bdc1..f1d2862a62918 100644
--- a/clang/test/SemaCXX/gnu-asm-constexpr.cpp
+++ b/clang/test/SemaCXX/gnu-asm-constexpr.cpp
@@ -77,7 +77,7 @@ struct string_view {
 
 
 void f() {
-    asm(("")); // expected-error {{the expression in this asm operand must be 
a string literal or an object with 'data()' and 'size()' member functions}}
+    asm(("")); // expected-error {{the expression in this asm operand must be 
a null terminated constant string or an object with 'data()' and 'size()' 
member functions}}
     asm((NotAString{})); // expected-error {{the string object in this asm 
operand is missing 'data()' and 'size()' member functions}}
     asm((MessageInvalidData{})); // expected-error {{the expression in this 
asm operand must have a 'data()' member function returning an object 
convertible to 'const char *'}} \
                                  // expected-error {{too few arguments to 
function call, expected 1, have 0}}
@@ -106,7 +106,7 @@ void test_dependent1(int i) {
 
 template void test_dependent1<int>(int);
 // expected-note@-1 {{in instantiation of function template specialization}}
-// expected-error@#err-int {{the expression in this asm operand must be a 
string literal or an object with 'data()' and 'size()' member functions}}
+// expected-error@#err-int {{the expression in this asm operand must be a null 
terminated constant string or an object with 'data()' and 'size()' member 
functions}}
 // expected-error@#err-int2 {{cannot initialize a value of type 'int' with an 
lvalue of type 'const char[3]'}}
 // expected-error@#err-int3 {{cannot initialize a value of type 'int' with an 
lvalue of type 'const char[2]'}}
 // expected-error@#err-int4 {{cannot initialize a value of type 'int' with an 
lvalue of type 'const char[7]'}}
diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp 
b/clang/test/SemaCXX/static-assert-cxx26.cpp
index b2ebd2abb785e..79be55610b27c 100644
--- a/clang/test/SemaCXX/static-assert-cxx26.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -2,7 +2,7 @@
 // RUN: %clang_cc1 -std=c++2c -triple=x86_64-linux -fsyntax-only %s -verify 
-fexperimental-new-constant-interpreter
 
 static_assert(true, "");
-static_assert(true, 0); // expected-error {{the message in a static assertion 
must be a string literal or an object with 'data()' and 'size()' member 
functions}}
+static_assert(true, 0); // expected-error {{the message in a static assertion 
must be a null terminated constant string or an object with 'data()' and 
'size()' member functions}}
 struct Empty{};
 static_assert(true, Empty{}); // expected-error {{the message object in this 
static assertion is missing 'data()' and 'size()' member functions}}
 struct NoData {
@@ -288,7 +288,7 @@ struct Good {
 
 template <typename Ty>
 struct Bad {
-  static_assert(false, Ty{}); // expected-error {{the message in a static 
assertion must be a string literal or an object with 'data()' and 'size()' 
member functions}} \
+  static_assert(false, Ty{}); // expected-error {{the message in a static 
assertion must be a null terminated constant string or an object with 'data()' 
and 'size()' member functions}} \
                               // expected-error {{static assertion failed}}
 };
 
@@ -416,3 +416,50 @@ static_assert(
       // expected-note@-1 {{read of dereferenced one-past-the-end pointer is 
not allowed in a constant expression}}
 );
 }
+
+static_assert(false, &(" basic test"[1]));
+// expected-error@-1 {{static assertion failed: basic test}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+
+constexpr const char *constexpr_global = "global_constexpr";
+constexpr const char null_terminated_buffer[] = { 'n', 'u', 'l', 'l', 't', 
'e', 'r', 'm', 0 };
+constexpr const char no_null_buffer[] = { 'n', 'o', 'n', 'u', 'l', 'l', 't', 
'e', 'r', 'm' };
+
+constexpr const char *selector(int i) {
+  constexpr const char * a_constant = "a_constant";
+  const char *non_constexpr = "non-constexpr string";
+  switch (i) {
+    case 0: return "case 0";
+    case 1: return a_constant;
+    case 2: return constexpr_global;
+    case 3: return null_terminated_buffer;
+    case 4: return &(""[1]); // point to after the null terminator
+    case 5: return nullptr;
+    case 6: return no_null_buffer;
+  }
+};
+
+static_assert(false, selector(0));
+// expected-error@-1 {{static assertion failed: case 0}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(1));
+// expected-error@-1 {{static assertion failed: a_constant}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(2));
+// expected-error@-1 {{static assertion failed: global_constexpr}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(3));
+// expected-error@-1 {{static assertion failed: nullterm}}
+// expected-warning@-2 {{consteval string constants are an extension}}
+static_assert(false, selector(4));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}
+static_assert(false, selector(5));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}
+static_assert(false, selector(6));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}
+static_assert(false, selector(7));
+// expected-error@-1 {{the message in a static assertion is not null 
terminated}}
+// expected-error@-2 {{static assertion failed}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to