https://github.com/erichkeane created https://github.com/llvm/llvm-project/pull/198966
This showed up in a test suite: If the size of the variable is larger than the literal, we were asserting since we were only accounting for a single null terminator. The FE will ensure that we have enough room for the variable + a null terminator, but the size must be larger! This patch counds on 'trailing-zeroes' to make sure the null terminator is in place. >From 77ab6b50f6dbaf5620340a3f87619fe67414b91e Mon Sep 17 00:00:00 2001 From: erichkeane <[email protected]> Date: Wed, 20 May 2026 20:55:29 -0700 Subject: [PATCH] [CIR] Wide String literals in a larger variable lowering This showed up in a test suite: If the size of the variable is larger than the literal, we were asserting since we were only accounting for a single null terminator. The FE will ensure that we have enough room for the variable + a null terminator, but the size must be larger! This patch counds on 'trailing-zeroes' to make sure the null terminator is in place. --- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 7 ++----- clang/test/CIR/CodeGen/string-literals.cpp | 6 ++++++ clang/test/CIR/CodeGen/wide-string.cpp | 13 +++++++++---- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 8b0f071c3f645..e810afb2685ab 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1615,9 +1615,8 @@ CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) { uint64_t arraySize = arrayTy.getSize(); unsigned literalSize = e->getLength(); - assert(arraySize == literalSize + 1 && - "wide string literal array size must be literal length plus null " - "terminator"); + assert(arraySize > literalSize && + "wide string literal array size must have room for null terminator?"); // Check if the string is all null bytes before building the vector. // In most non-zero cases, this will break out on the first element. @@ -1637,8 +1636,6 @@ CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) { elements.reserve(arraySize); for (unsigned i = 0; i < literalSize; ++i) elements.push_back(cir::IntAttr::get(arrayEltTy, e->getCodeUnit(i))); - // Add null terminator - elements.push_back(cir::IntAttr::get(arrayEltTy, 0)); auto elementsAttr = mlir::ArrayAttr::get(&getMLIRContext(), elements); return builder.getConstArray(elementsAttr, arrayTy); diff --git a/clang/test/CIR/CodeGen/string-literals.cpp b/clang/test/CIR/CodeGen/string-literals.cpp index e5a1c4324b8ed..eea41a781efad 100644 --- a/clang/test/CIR/CodeGen/string-literals.cpp +++ b/clang/test/CIR/CodeGen/string-literals.cpp @@ -20,6 +20,7 @@ char const *array[] { "my", "hands", "are", "typing", "words" }; + // CIR: cir.global "private" constant cir_private dso_local @"[[STR:.+]]" = #cir.const_array<"my" : !cir.array<!s8i x 2>, trailing_zeros> : !cir.array<!s8i x 3> // CIR: cir.global "private" constant cir_private dso_local @"[[STR1:.+]]" = #cir.const_array<"hands" : !cir.array<!s8i x 5>, trailing_zeros> : !cir.array<!s8i x 6> // CIR: cir.global "private" constant cir_private dso_local @"[[STR2:.+]]" = #cir.const_array<"are" : !cir.array<!s8i x 3>, trailing_zeros> : !cir.array<!s8i x 4> @@ -41,6 +42,11 @@ char const *array[] { // OGCG: @[[STR4:.+]] = private unnamed_addr constant [6 x i8] c"words\00" // OGCG: @array = global [5 x ptr] [ptr @[[STR]], ptr @[[STR1]], ptr @[[STR2]], ptr @[[STR3]], ptr @[[STR4]]] +wchar_t zeroPadding[20] = L"hi"; +// CIR: cir.global external @zeroPadding = #cir.const_array<[#cir.int<104> : !u32i, #cir.int<105> : !u32i], trailing_zeros> : !cir.array<!u32i x 20> +// LLVM: @zeroPadding = global [20 x i32] [i32 104, i32 105, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0] +// OGCG: @zeroPadding = global [20 x i32] [i32 104, i32 105, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0] + // CIR: cir.global "private" constant cir_private dso_local @[[STR5_GLOBAL:.*]] = #cir.const_array<"abcd" : !cir.array<!s8i x 4>, trailing_zeros> : !cir.array<!s8i x 5> // LLVM: @[[STR5_GLOBAL:.*]] = private constant [5 x i8] c"abcd\00" diff --git a/clang/test/CIR/CodeGen/wide-string.cpp b/clang/test/CIR/CodeGen/wide-string.cpp index 6d6ed1458b952..b0c124150f20a 100644 --- a/clang/test/CIR/CodeGen/wide-string.cpp +++ b/clang/test/CIR/CodeGen/wide-string.cpp @@ -5,12 +5,17 @@ // RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll // RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s +// CIR: cir.global external @some_array = #cir.const_array<[#cir.int<97> : !u16i], trailing_zeros> : !cir.array<!u16i x 10> +// LLVM: @some_array = global [10 x i16] [i16 97, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0] +// OGCG: @some_array = global [10 x i16] [i16 97, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0] +char16_t some_array[10] = u"a"; + // Test with built-in char16_t type const char16_t *test_utf16() { return u"你好世界"; } -// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<20320> : !u16i, #cir.int<22909> : !u16i, #cir.int<19990> : !u16i, #cir.int<30028> : !u16i, #cir.int<0> : !u16i]> : !cir.array<!u16i x 5> +// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<20320> : !u16i, #cir.int<22909> : !u16i, #cir.int<19990> : !u16i, #cir.int<30028> : !u16i], trailing_zeros> : !cir.array<!u16i x 5> // LLVM: @{{.+}} = private constant [5 x i16] [i16 20320, i16 22909, i16 19990, i16 30028, i16 0] // OGCG: @{{.+}} = private unnamed_addr constant [5 x i16] [i16 20320, i16 22909, i16 19990, i16 30028, i16 0] @@ -18,7 +23,7 @@ const char32_t *test_utf32() { return U"你好世界"; } -// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<20320> : !u32i, #cir.int<22909> : !u32i, #cir.int<19990> : !u32i, #cir.int<30028> : !u32i, #cir.int<0> : !u32i]> : !cir.array<!u32i x 5> +// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<20320> : !u32i, #cir.int<22909> : !u32i, #cir.int<19990> : !u32i, #cir.int<30028> : !u32i], trailing_zeros> : !cir.array<!u32i x 5> // LLVM: @{{.+}} = private constant [5 x i32] [i32 20320, i32 22909, i32 19990, i32 30028, i32 0] // OGCG: @{{.+}} = private unnamed_addr constant [5 x i32] [i32 20320, i32 22909, i32 19990, i32 30028, i32 0] @@ -42,7 +47,7 @@ const wchar_t *test_wchar() { return L"1234"; } -// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<49> : !s32i, #cir.int<50> : !s32i, #cir.int<51> : !s32i, #cir.int<52> : !s32i, #cir.int<0> : !s32i]> : !cir.array<!s32i x 5> +// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<49> : !s32i, #cir.int<50> : !s32i, #cir.int<51> : !s32i, #cir.int<52> : !s32i], trailing_zeros> : !cir.array<!s32i x 5> // LLVM: @{{.+}} = private constant [5 x i32] [i32 49, i32 50, i32 51, i32 52, i32 0] // OGCG: @{{.+}} = private unnamed_addr constant [5 x i32] [i32 49, i32 50, i32 51, i32 52, i32 0] @@ -58,6 +63,6 @@ const char16_t *test_char16_typedef() { return u"test"; } -// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<116> : !u16i, #cir.int<101> : !u16i, #cir.int<115> : !u16i, #cir.int<116> : !u16i, #cir.int<0> : !u16i]> : !cir.array<!u16i x 5> +// CIR: cir.global "private" constant cir_private dso_local @{{.+}} = #cir.const_array<[#cir.int<116> : !u16i, #cir.int<101> : !u16i, #cir.int<115> : !u16i, #cir.int<116> : !u16i], trailing_zeros> : !cir.array<!u16i x 5> // LLVM: @{{.+}} = private constant [5 x i16] [i16 116, i16 101, i16 115, i16 116, i16 0] // OGCG: @{{.+}} = private unnamed_addr constant [5 x i16] [i16 116, i16 101, i16 115, i16 116, i16 0] _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
