https://github.com/erichkeane updated 
https://github.com/llvm/llvm-project/pull/185513

>From 1a2893f99e740ff551fc5c519a349a77c63b595f Mon Sep 17 00:00:00 2001
From: erichkeane <[email protected]>
Date: Mon, 9 Mar 2026 13:05:04 -0700
Subject: [PATCH 1/2] [CIR] Ensure strings are null-terminated, better deal
 with trailing null

Our current implementation of string lowering did some work to remove
extra trailing zeros, plus do a 'zero' constant.  That is unchanged by
this patch.  However, this patch ALSO ensures that we do the 'remove
extra trailing zeros' to remove ALL trailing zeros, which likely has
canonicalization benefits later on.

However, the real benefit of this patch is to make string emission by
default emit a null-terminator, which fixes the virtual table 'name'
field get lowered correctly. We do this by making the builder::getString
function take an argument (true by default) that will ensure we add a
null terminator if necessary.

This reflects the llvm::ConstantDataArray::getString function, which has
the same functionality. However, doing this during lowering seems
incorrect, since the FE is the one that knows whether these null
terminators are necessary. There is not currently an 'opt out' use of
the behavior, but the functionality is left in place to better reflect
the ConstantDataArray::getString function interface.

Note with the tests that this fixes some inconsistencies between LLVM
and OGCG lowering.
---
 clang/lib/CIR/CodeGen/CIRGenBuilder.h         | 27 ++++++++++++-------
 clang/test/CIR/CodeGen/predefined-expr.c      | 18 ++++++-------
 clang/test/CIR/CodeGen/string-literals.c      |  6 ++---
 clang/test/CIR/CodeGen/string-literals.cpp    | 12 ++++-----
 clang/test/CIR/CodeGen/vtt.cpp                |  4 +--
 .../CIR/CodeGenBuiltins/builtin-printf.cpp    |  4 +--
 6 files changed, 39 insertions(+), 32 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index e2f89cc5aa12f..32dd03052d319 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -43,7 +43,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   /// Note: This is different from what is returned by
   /// mlir::Builder::getStringAttr() which is an mlir::StringAttr.
   mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy,
-                            std::optional<size_t> size) {
+                            std::optional<size_t> size,
+                            bool ensureNullTerm = true) {
     size_t finalSize = size.value_or(str.size());
 
     size_t lastNonZeroPos = str.find_last_not_of('\0');
@@ -53,18 +54,24 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
       auto arrayTy = cir::ArrayType::get(eltTy, finalSize);
       return cir::ZeroAttr::get(arrayTy);
     }
-    // We emit trailing zeros only if there are multiple trailing zeros.
-    size_t trailingZerosNum = 0;
-    if (finalSize > lastNonZeroPos + 2)
-      trailingZerosNum = finalSize - lastNonZeroPos - 1;
+
+    // We emit trailing zeros for all trailing zeros, so the null-terminator in
+    // a constant is always in trailing zeros, and the null-terminator is
+    // skipped in the CIR representation.
+    size_t trailingZerosNum = finalSize - lastNonZeroPos - 1;
     auto truncatedArrayTy =
         cir::ArrayType::get(eltTy, finalSize - trailingZerosNum);
+    auto strAttr = mlir::StringAttr::get(str.drop_back(trailingZerosNum),
+                                         truncatedArrayTy);
+
+    // Most C strings are null terminated, so if we are ensuring there is one,
+    // grow the array size by 1 to add a trailing zero if necessary. The 'auto'
+    // calculation of trailing zeros (the difference between the provided 
string
+    // and the type) will ensure we get the count correct.
+    finalSize += (ensureNullTerm && trailingZerosNum == 0);
+
     auto fullArrayTy = cir::ArrayType::get(eltTy, finalSize);
-    return cir::ConstArrayAttr::get(
-        fullArrayTy,
-        mlir::StringAttr::get(str.drop_back(trailingZerosNum),
-                              truncatedArrayTy),
-        trailingZerosNum);
+    return cir::ConstArrayAttr::get(fullArrayTy, strAttr);
   }
 
   cir::ConstArrayAttr getConstArray(mlir::Attribute attrs,
diff --git a/clang/test/CIR/CodeGen/predefined-expr.c 
b/clang/test/CIR/CodeGen/predefined-expr.c
index 73b842fd49bf9..8644717f187b8 100644
--- a/clang/test/CIR/CodeGen/predefined-expr.c
+++ b/clang/test/CIR/CodeGen/predefined-expr.c
@@ -8,15 +8,15 @@
 // CIR: cir.global "private" internal dso_local @staticFuncName.name = 
#cir.global_view<@".str.4"> : !cir.ptr<!s8i> {alignment = 8 : i64} loc(#loc1)
 // CIR: cir.global "private" constant cir_private dso_local @".str" = 
#cir.zero : !cir.array<!s8i x 1>
 // CIR: cir.global external @func = #cir.global_view<@".str"> : !cir.ptr<!s8i>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.plainFunction = #cir.const_array<"plainFunction\00" : !cir.array<!s8i 
x 14>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.plainFunction = #cir.const_array<"void 
plainFunction(void)\00" : !cir.array<!s8i x 25>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.externFunction = #cir.const_array<"externFunction\00" : 
!cir.array<!s8i x 15>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.externFunction = #cir.const_array<"void 
externFunction(void)\00" : !cir.array<!s8i x 26>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.privateExternFunction = #cir.const_array<"privateExternFunction\00" : 
!cir.array<!s8i x 22>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.privateExternFunction = #cir.const_array<"void 
privateExternFunction(void)\00" : !cir.array<!s8i x 33>>
-// CIR: cir.global "private" constant cir_private dso_local @".str.4" = 
#cir.const_array<"staticFuncName\00" : !cir.array<!s8i x 15>> : !cir.array<!s8i 
x 15>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.staticFunction = #cir.const_array<"staticFunction\00" : 
!cir.array<!s8i x 15>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.staticFunction = #cir.const_array<"void 
staticFunction(void)\00" : !cir.array<!s8i x 26>>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.plainFunction = #cir.const_array<"plainFunction" : !cir.array<!s8i x 
13>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.plainFunction = #cir.const_array<"void 
plainFunction(void)" : !cir.array<!s8i x 24>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.externFunction = #cir.const_array<"externFunction" : !cir.array<!s8i 
x 14>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.externFunction = #cir.const_array<"void 
externFunction(void)" : !cir.array<!s8i x 25>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.privateExternFunction = #cir.const_array<"privateExternFunction" : 
!cir.array<!s8i x 21>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.privateExternFunction = #cir.const_array<"void 
privateExternFunction(void)" : !cir.array<!s8i x 32>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local @".str.4" = 
#cir.const_array<"staticFuncName" : !cir.array<!s8i x 14>, trailing_zeros> : 
!cir.array<!s8i x 15>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.staticFunction = #cir.const_array<"staticFunction" : !cir.array<!s8i 
x 14>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.staticFunction = #cir.const_array<"void 
staticFunction(void)" : !cir.array<!s8i x 25>, trailing_zeros>
 
 // TODO(cir): These should be unnamed_addr
 // LLVM: @staticFuncName.name = internal global ptr @.str.4, align 8
diff --git a/clang/test/CIR/CodeGen/string-literals.c 
b/clang/test/CIR/CodeGen/string-literals.c
index 38657b2cd1175..2b4a34153a5b6 100644
--- a/clang/test/CIR/CodeGen/string-literals.c
+++ b/clang/test/CIR/CodeGen/string-literals.c
@@ -7,7 +7,7 @@
 
 char g_str[] = "1234";
 
-// CIR: cir.global external @g_str = #cir.const_array<"1234\00" : 
!cir.array<!s8i x 5>> : !cir.array<!s8i x 5>
+// CIR: cir.global external @g_str = #cir.const_array<"1234" : !cir.array<!s8i 
x 4>, trailing_zeros> : !cir.array<!s8i x 5>
 
 char g_oversized[100] = "123";
 
@@ -15,9 +15,9 @@ char g_oversized[100] = "123";
 
 char g_exact[4] = "123";
 
-// CIR: cir.global external @g_exact = #cir.const_array<"123\00" : 
!cir.array<!s8i x 4>> : !cir.array<!s8i x 4>
+// CIR: cir.global external @g_exact = #cir.const_array<"123" : 
!cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 4>
 
-// CIR: cir.global "private" constant cir_private dso_local 
@[[STR1_GLOBAL:.*]] = #cir.const_array<"1\00" : !cir.array<!s8i x 2>> : 
!cir.array<!s8i x 2>
+// CIR: cir.global "private" constant cir_private dso_local 
@[[STR1_GLOBAL:.*]] = #cir.const_array<"1" : !cir.array<!s8i x 1>, 
trailing_zeros> : !cir.array<!s8i x 2>
 // CIR: cir.global "private" constant cir_private dso_local 
@[[STR2_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 1>
 // CIR: cir.global "private" constant cir_private dso_local 
@[[STR3_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 2>
 
diff --git a/clang/test/CIR/CodeGen/string-literals.cpp 
b/clang/test/CIR/CodeGen/string-literals.cpp
index 30c2b76ae5360..14cfe468ee6e7 100644
--- a/clang/test/CIR/CodeGen/string-literals.cpp
+++ b/clang/test/CIR/CodeGen/string-literals.cpp
@@ -10,11 +10,11 @@ char const *array[] {
     "my", "hands", "are", "typing", "words"
 };
 
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR:.+]]" = 
#cir.const_array<"my\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR1:.+]]" = 
#cir.const_array<"hands\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR2:.+]]" = 
#cir.const_array<"are\00" : !cir.array<!s8i x 4>> : !cir.array<!s8i x 4>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR3:.+]]" = 
#cir.const_array<"typing\00" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR4:.+]]" = 
#cir.const_array<"words\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR:.+]]" = 
#cir.const_array<"my" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i 
x 3>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR1:.+]]" = 
#cir.const_array<"hands" : !cir.array<!s8i x 5>, trailing_zeros> : 
!cir.array<!s8i x 6>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR2:.+]]" = 
#cir.const_array<"are" : !cir.array<!s8i x 3>, trailing_zeros> : 
!cir.array<!s8i x 4>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR3:.+]]" = 
#cir.const_array<"typing" : !cir.array<!s8i x 6>, trailing_zeros> : 
!cir.array<!s8i x 7>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR4:.+]]" = 
#cir.const_array<"words" : !cir.array<!s8i x 5>, trailing_zeros> : 
!cir.array<!s8i x 6>
 // CIR: cir.global external @array = 
#cir.const_array<[#cir.global_view<@"[[STR]]"> : !cir.ptr<!s8i>, 
#cir.global_view<@"[[STR1]]"> : !cir.ptr<!s8i>, #cir.global_view<@"[[STR2]]"> : 
!cir.ptr<!s8i>, #cir.global_view<@"[[STR3]]"> : !cir.ptr<!s8i>, 
#cir.global_view<@"[[STR4]]"> : !cir.ptr<!s8i>]> : !cir.array<!cir.ptr<!s8i> x 
5>
 
 // LLVM: @[[STR:.+]] = private constant [3 x i8] c"my\00"
@@ -31,7 +31,7 @@ char const *array[] {
 // OGCG: @[[STR4:.+]] = private unnamed_addr constant [6 x i8] c"words\00"
 // OGCG: @array = global [5 x ptr] [ptr @[[STR]], ptr @[[STR1]], ptr 
@[[STR2]], ptr @[[STR3]], ptr @[[STR4]]]
 
-// CIR: cir.global "private" constant cir_private dso_local 
@[[STR5_GLOBAL:.*]] = #cir.const_array<"abcd\00" : !cir.array<!s8i x 5>> : 
!cir.array<!s8i x 5>
+// CIR: cir.global "private" constant cir_private dso_local 
@[[STR5_GLOBAL:.*]] = #cir.const_array<"abcd" : !cir.array<!s8i x 4>, 
trailing_zeros> : !cir.array<!s8i x 5>
 
 // LLVM: @[[STR5_GLOBAL:.*]] = private constant [5 x i8] c"abcd\00"
 
diff --git a/clang/test/CIR/CodeGen/vtt.cpp b/clang/test/CIR/CodeGen/vtt.cpp
index 5880feb323f00..bd926c46d7a9b 100644
--- a/clang/test/CIR/CodeGen/vtt.cpp
+++ b/clang/test/CIR/CodeGen/vtt.cpp
@@ -210,7 +210,7 @@ void D::y() {}
 
 // CIR-RTTI:  cir.globa{{.*}} @_ZTVN10__cxxabiv121__vmi_class_type_infoE : 
!cir.ptr<!cir.ptr<!u8i>>
 
-// CIR-RTTI:  cir.global{{.*}} @_ZTS1D = #cir.const_array<"1D" : 
!cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+// CIR-RTTI:  cir.global{{.*}} @_ZTS1D = #cir.const_array<"1D" : 
!cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i x 3>
 
 // CIR-RTTI:      cir.global{{.*}} @_ZTI1D = #cir.typeinfo<{
 // CIR-RTTI-SAME:   
#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : 
!cir.ptr<!u8i>,
@@ -224,7 +224,7 @@ void D::y() {}
 // CIR-RTTI: cir.global{{.*}} @_ZTV1A : !rec_anon_struct3
 
 // LLVM-RTTI: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global ptr
-// LLVM-RTTI: @_ZTS1D = global [2 x i8] c"1D", align 1
+// LLVM-RTTI: @_ZTS1D = global [3 x i8] c"1D\00", align 1
 
 // LLVM-RTTI:      @_ZTI1D = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 
} {
 // LLVM-RTTI-SAME:   ptr getelementptr (i8, ptr 
@_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 16),
diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp 
b/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp
index 304b8725547f6..2a0f5c4196a8d 100644
--- a/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp
@@ -5,8 +5,8 @@
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu 
-Wno-unused-value -emit-llvm %s -o %t.ll
 // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
 
-// CIR: cir.global "private" constant cir_private dso_local @".str" = 
#cir.const_array<"%s\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3> 
-// CIR: cir.global "private" constant cir_private dso_local @".str.1" = 
#cir.const_array<"%s %d\0A\00" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7>
+// CIR: cir.global "private" constant cir_private dso_local @".str" = 
#cir.const_array<"%s" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i 
x 3> 
+// CIR: cir.global "private" constant cir_private dso_local @".str.1" = 
#cir.const_array<"%s %d\0A" : !cir.array<!s8i x 6>, trailing_zeros> : 
!cir.array<!s8i x 7>
 // LLVM: @.str = private constant [3 x i8] c"%s\00"
 // LLVM: @.str.1 = private constant [7 x i8] c"%s %d\0A\00"
 // OGCG: @.str = private unnamed_addr constant [3 x i8] c"%s\00"

>From a35a4a1d4e65f5a4c1c0b7dd62a547d85cea6a33 Mon Sep 17 00:00:00 2001
From: erichkeane <[email protected]>
Date: Mon, 9 Mar 2026 14:45:56 -0700
Subject: [PATCH 2/2] Don't set ensure-null-terminator on
 getConstantFromStringLiteral

---
 clang/lib/CIR/CodeGen/CIRGenModule.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index b5905e2db6de1..1b4939b4741bb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1095,7 +1095,7 @@ CIRGenModule::getConstantArrayFromStringLiteral(const 
StringLiteral *e) {
     str.resize(finalSize);
 
     mlir::Type eltTy = convertType(cat->getElementType());
-    return builder.getString(str, eltTy, finalSize);
+    return builder.getString(str, eltTy, finalSize, /*ensureNullTerm=*/false);
   }
 
   auto arrayTy = mlir::cast<cir::ArrayType>(convertType(e->getType()));

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to