Author: Erich Keane
Date: 2026-03-10T07:02:36-07:00
New Revision: 7484eb74884fa29376d4e410cc462040030fb677

URL: 
https://github.com/llvm/llvm-project/commit/7484eb74884fa29376d4e410cc462040030fb677
DIFF: 
https://github.com/llvm/llvm-project/commit/7484eb74884fa29376d4e410cc462040030fb677.diff

LOG: [CIR] Ensure strings are null-terminated, better deal with trailing null 
(#185513)

Our current implementation of string lowering did some work to remove
extra trailing zeros, plus do a 'zero' constant. That is unchanged by
this patch. However, this patch ALSO ensures that we do the 'remove
extra trailing zeros' to remove ALL trailing zeros, which likely has
canonicalization benefits later on.

However, the real benefit of this patch is to make string emission by
default emit a null-terminator, which fixes the virtual table 'name'
field get lowered correctly. We do this by making the builder::getString
function take an argument (true by default) that will ensure we add a
null terminator if necessary.

This reflects the llvm::ConstantDataArray::getString function, which has
the same functionality. However, doing this during lowering seems
incorrect, since the FE is the one that knows whether these null
terminators are necessary. There is not currently an 'opt out' use of
the behavior, but the functionality is left in place to better reflect
the ConstantDataArray::getString function interface.

Note with the tests that this fixes some inconsistencies between LLVM
and OGCG lowering.

Added: 
    

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuilder.h
    clang/lib/CIR/CodeGen/CIRGenModule.cpp
    clang/test/CIR/CodeGen/predefined-expr.c
    clang/test/CIR/CodeGen/string-literals.c
    clang/test/CIR/CodeGen/string-literals.cpp
    clang/test/CIR/CodeGen/vtt.cpp
    clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index ad0bf7fdcf6b6..7cd1bdcf491be 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -43,7 +43,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   /// Note: This is 
diff erent from what is returned by
   /// mlir::Builder::getStringAttr() which is an mlir::StringAttr.
   mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy,
-                            std::optional<size_t> size) {
+                            std::optional<size_t> size,
+                            bool ensureNullTerm = true) {
     size_t finalSize = size.value_or(str.size());
 
     size_t lastNonZeroPos = str.find_last_not_of('\0');
@@ -53,18 +54,24 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
       auto arrayTy = cir::ArrayType::get(eltTy, finalSize);
       return cir::ZeroAttr::get(arrayTy);
     }
-    // We emit trailing zeros only if there are multiple trailing zeros.
-    size_t trailingZerosNum = 0;
-    if (finalSize > lastNonZeroPos + 2)
-      trailingZerosNum = finalSize - lastNonZeroPos - 1;
+
+    // We emit trailing zeros for all trailing zeros, so the null-terminator in
+    // a constant is always in trailing zeros, and the null-terminator is
+    // skipped in the CIR representation.
+    size_t trailingZerosNum = finalSize - lastNonZeroPos - 1;
     auto truncatedArrayTy =
         cir::ArrayType::get(eltTy, finalSize - trailingZerosNum);
+    auto strAttr = mlir::StringAttr::get(str.drop_back(trailingZerosNum),
+                                         truncatedArrayTy);
+
+    // Most C strings are null terminated, so if we are ensuring there is one,
+    // grow the array size by 1 to add a trailing zero if necessary. The 'auto'
+    // calculation of trailing zeros (the 
diff erence between the provided string
+    // and the type) will ensure we get the count correct.
+    finalSize += (ensureNullTerm && trailingZerosNum == 0);
+
     auto fullArrayTy = cir::ArrayType::get(eltTy, finalSize);
-    return cir::ConstArrayAttr::get(
-        fullArrayTy,
-        mlir::StringAttr::get(str.drop_back(trailingZerosNum),
-                              truncatedArrayTy),
-        trailingZerosNum);
+    return cir::ConstArrayAttr::get(fullArrayTy, strAttr);
   }
 
   cir::ConstArrayAttr getConstArray(mlir::Attribute attrs,

diff  --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index b5905e2db6de1..1b4939b4741bb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1095,7 +1095,7 @@ CIRGenModule::getConstantArrayFromStringLiteral(const 
StringLiteral *e) {
     str.resize(finalSize);
 
     mlir::Type eltTy = convertType(cat->getElementType());
-    return builder.getString(str, eltTy, finalSize);
+    return builder.getString(str, eltTy, finalSize, /*ensureNullTerm=*/false);
   }
 
   auto arrayTy = mlir::cast<cir::ArrayType>(convertType(e->getType()));

diff  --git a/clang/test/CIR/CodeGen/predefined-expr.c 
b/clang/test/CIR/CodeGen/predefined-expr.c
index 73b842fd49bf9..8644717f187b8 100644
--- a/clang/test/CIR/CodeGen/predefined-expr.c
+++ b/clang/test/CIR/CodeGen/predefined-expr.c
@@ -8,15 +8,15 @@
 // CIR: cir.global "private" internal dso_local @staticFuncName.name = 
#cir.global_view<@".str.4"> : !cir.ptr<!s8i> {alignment = 8 : i64} loc(#loc1)
 // CIR: cir.global "private" constant cir_private dso_local @".str" = 
#cir.zero : !cir.array<!s8i x 1>
 // CIR: cir.global external @func = #cir.global_view<@".str"> : !cir.ptr<!s8i>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.plainFunction = #cir.const_array<"plainFunction\00" : !cir.array<!s8i 
x 14>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.plainFunction = #cir.const_array<"void 
plainFunction(void)\00" : !cir.array<!s8i x 25>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.externFunction = #cir.const_array<"externFunction\00" : 
!cir.array<!s8i x 15>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.externFunction = #cir.const_array<"void 
externFunction(void)\00" : !cir.array<!s8i x 26>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.privateExternFunction = #cir.const_array<"privateExternFunction\00" : 
!cir.array<!s8i x 22>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.privateExternFunction = #cir.const_array<"void 
privateExternFunction(void)\00" : !cir.array<!s8i x 33>>
-// CIR: cir.global "private" constant cir_private dso_local @".str.4" = 
#cir.const_array<"staticFuncName\00" : !cir.array<!s8i x 15>> : !cir.array<!s8i 
x 15>
-// CIR: cir.global "private" constant cir_private dso_local 
@__func__.staticFunction = #cir.const_array<"staticFunction\00" : 
!cir.array<!s8i x 15>>
-// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.staticFunction = #cir.const_array<"void 
staticFunction(void)\00" : !cir.array<!s8i x 26>>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.plainFunction = #cir.const_array<"plainFunction" : !cir.array<!s8i x 
13>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.plainFunction = #cir.const_array<"void 
plainFunction(void)" : !cir.array<!s8i x 24>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.externFunction = #cir.const_array<"externFunction" : !cir.array<!s8i 
x 14>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.externFunction = #cir.const_array<"void 
externFunction(void)" : !cir.array<!s8i x 25>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.privateExternFunction = #cir.const_array<"privateExternFunction" : 
!cir.array<!s8i x 21>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.privateExternFunction = #cir.const_array<"void 
privateExternFunction(void)" : !cir.array<!s8i x 32>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local @".str.4" = 
#cir.const_array<"staticFuncName" : !cir.array<!s8i x 14>, trailing_zeros> : 
!cir.array<!s8i x 15>
+// CIR: cir.global "private" constant cir_private dso_local 
@__func__.staticFunction = #cir.const_array<"staticFunction" : !cir.array<!s8i 
x 14>, trailing_zeros>
+// CIR: cir.global "private" constant cir_private dso_local 
@__PRETTY_FUNCTION__.staticFunction = #cir.const_array<"void 
staticFunction(void)" : !cir.array<!s8i x 25>, trailing_zeros>
 
 // TODO(cir): These should be unnamed_addr
 // LLVM: @staticFuncName.name = internal global ptr @.str.4, align 8

diff  --git a/clang/test/CIR/CodeGen/string-literals.c 
b/clang/test/CIR/CodeGen/string-literals.c
index 38657b2cd1175..2b4a34153a5b6 100644
--- a/clang/test/CIR/CodeGen/string-literals.c
+++ b/clang/test/CIR/CodeGen/string-literals.c
@@ -7,7 +7,7 @@
 
 char g_str[] = "1234";
 
-// CIR: cir.global external @g_str = #cir.const_array<"1234\00" : 
!cir.array<!s8i x 5>> : !cir.array<!s8i x 5>
+// CIR: cir.global external @g_str = #cir.const_array<"1234" : !cir.array<!s8i 
x 4>, trailing_zeros> : !cir.array<!s8i x 5>
 
 char g_oversized[100] = "123";
 
@@ -15,9 +15,9 @@ char g_oversized[100] = "123";
 
 char g_exact[4] = "123";
 
-// CIR: cir.global external @g_exact = #cir.const_array<"123\00" : 
!cir.array<!s8i x 4>> : !cir.array<!s8i x 4>
+// CIR: cir.global external @g_exact = #cir.const_array<"123" : 
!cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 4>
 
-// CIR: cir.global "private" constant cir_private dso_local 
@[[STR1_GLOBAL:.*]] = #cir.const_array<"1\00" : !cir.array<!s8i x 2>> : 
!cir.array<!s8i x 2>
+// CIR: cir.global "private" constant cir_private dso_local 
@[[STR1_GLOBAL:.*]] = #cir.const_array<"1" : !cir.array<!s8i x 1>, 
trailing_zeros> : !cir.array<!s8i x 2>
 // CIR: cir.global "private" constant cir_private dso_local 
@[[STR2_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 1>
 // CIR: cir.global "private" constant cir_private dso_local 
@[[STR3_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 2>
 

diff  --git a/clang/test/CIR/CodeGen/string-literals.cpp 
b/clang/test/CIR/CodeGen/string-literals.cpp
index 30c2b76ae5360..14cfe468ee6e7 100644
--- a/clang/test/CIR/CodeGen/string-literals.cpp
+++ b/clang/test/CIR/CodeGen/string-literals.cpp
@@ -10,11 +10,11 @@ char const *array[] {
     "my", "hands", "are", "typing", "words"
 };
 
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR:.+]]" = 
#cir.const_array<"my\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR1:.+]]" = 
#cir.const_array<"hands\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR2:.+]]" = 
#cir.const_array<"are\00" : !cir.array<!s8i x 4>> : !cir.array<!s8i x 4>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR3:.+]]" = 
#cir.const_array<"typing\00" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7>
-// CIR: cir.global "private" constant cir_private dso_local @"[[STR4:.+]]" = 
#cir.const_array<"words\00" : !cir.array<!s8i x 6>> : !cir.array<!s8i x 6>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR:.+]]" = 
#cir.const_array<"my" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i 
x 3>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR1:.+]]" = 
#cir.const_array<"hands" : !cir.array<!s8i x 5>, trailing_zeros> : 
!cir.array<!s8i x 6>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR2:.+]]" = 
#cir.const_array<"are" : !cir.array<!s8i x 3>, trailing_zeros> : 
!cir.array<!s8i x 4>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR3:.+]]" = 
#cir.const_array<"typing" : !cir.array<!s8i x 6>, trailing_zeros> : 
!cir.array<!s8i x 7>
+// CIR: cir.global "private" constant cir_private dso_local @"[[STR4:.+]]" = 
#cir.const_array<"words" : !cir.array<!s8i x 5>, trailing_zeros> : 
!cir.array<!s8i x 6>
 // CIR: cir.global external @array = 
#cir.const_array<[#cir.global_view<@"[[STR]]"> : !cir.ptr<!s8i>, 
#cir.global_view<@"[[STR1]]"> : !cir.ptr<!s8i>, #cir.global_view<@"[[STR2]]"> : 
!cir.ptr<!s8i>, #cir.global_view<@"[[STR3]]"> : !cir.ptr<!s8i>, 
#cir.global_view<@"[[STR4]]"> : !cir.ptr<!s8i>]> : !cir.array<!cir.ptr<!s8i> x 
5>
 
 // LLVM: @[[STR:.+]] = private constant [3 x i8] c"my\00"
@@ -31,7 +31,7 @@ char const *array[] {
 // OGCG: @[[STR4:.+]] = private unnamed_addr constant [6 x i8] c"words\00"
 // OGCG: @array = global [5 x ptr] [ptr @[[STR]], ptr @[[STR1]], ptr 
@[[STR2]], ptr @[[STR3]], ptr @[[STR4]]]
 
-// CIR: cir.global "private" constant cir_private dso_local 
@[[STR5_GLOBAL:.*]] = #cir.const_array<"abcd\00" : !cir.array<!s8i x 5>> : 
!cir.array<!s8i x 5>
+// CIR: cir.global "private" constant cir_private dso_local 
@[[STR5_GLOBAL:.*]] = #cir.const_array<"abcd" : !cir.array<!s8i x 4>, 
trailing_zeros> : !cir.array<!s8i x 5>
 
 // LLVM: @[[STR5_GLOBAL:.*]] = private constant [5 x i8] c"abcd\00"
 

diff  --git a/clang/test/CIR/CodeGen/vtt.cpp b/clang/test/CIR/CodeGen/vtt.cpp
index 5880feb323f00..bd926c46d7a9b 100644
--- a/clang/test/CIR/CodeGen/vtt.cpp
+++ b/clang/test/CIR/CodeGen/vtt.cpp
@@ -210,7 +210,7 @@ void D::y() {}
 
 // CIR-RTTI:  cir.globa{{.*}} @_ZTVN10__cxxabiv121__vmi_class_type_infoE : 
!cir.ptr<!cir.ptr<!u8i>>
 
-// CIR-RTTI:  cir.global{{.*}} @_ZTS1D = #cir.const_array<"1D" : 
!cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+// CIR-RTTI:  cir.global{{.*}} @_ZTS1D = #cir.const_array<"1D" : 
!cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i x 3>
 
 // CIR-RTTI:      cir.global{{.*}} @_ZTI1D = #cir.typeinfo<{
 // CIR-RTTI-SAME:   
#cir.global_view<@_ZTVN10__cxxabiv121__vmi_class_type_infoE, [2 : i32]> : 
!cir.ptr<!u8i>,
@@ -224,7 +224,7 @@ void D::y() {}
 // CIR-RTTI: cir.global{{.*}} @_ZTV1A : !rec_anon_struct3
 
 // LLVM-RTTI: @_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global ptr
-// LLVM-RTTI: @_ZTS1D = global [2 x i8] c"1D", align 1
+// LLVM-RTTI: @_ZTS1D = global [3 x i8] c"1D\00", align 1
 
 // LLVM-RTTI:      @_ZTI1D = constant { ptr, ptr, i32, i32, ptr, i64, ptr, i64 
} {
 // LLVM-RTTI-SAME:   ptr getelementptr (i8, ptr 
@_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 16),

diff  --git a/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp 
b/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp
index 304b8725547f6..2a0f5c4196a8d 100644
--- a/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp
+++ b/clang/test/CIR/CodeGenBuiltins/builtin-printf.cpp
@@ -5,8 +5,8 @@
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu 
-Wno-unused-value -emit-llvm %s -o %t.ll
 // RUN: FileCheck --input-file=%t.ll %s -check-prefix=OGCG
 
-// CIR: cir.global "private" constant cir_private dso_local @".str" = 
#cir.const_array<"%s\00" : !cir.array<!s8i x 3>> : !cir.array<!s8i x 3> 
-// CIR: cir.global "private" constant cir_private dso_local @".str.1" = 
#cir.const_array<"%s %d\0A\00" : !cir.array<!s8i x 7>> : !cir.array<!s8i x 7>
+// CIR: cir.global "private" constant cir_private dso_local @".str" = 
#cir.const_array<"%s" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i 
x 3> 
+// CIR: cir.global "private" constant cir_private dso_local @".str.1" = 
#cir.const_array<"%s %d\0A" : !cir.array<!s8i x 6>, trailing_zeros> : 
!cir.array<!s8i x 7>
 // LLVM: @.str = private constant [3 x i8] c"%s\00"
 // LLVM: @.str.1 = private constant [7 x i8] c"%s %d\0A\00"
 // OGCG: @.str = private unnamed_addr constant [3 x i8] c"%s\00"


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to