https://github.com/erichkeane updated https://github.com/llvm/llvm-project/pull/206564
>From d7e4f87928832f5568b0648a68f7023cea0252e6 Mon Sep 17 00:00:00 2001 From: erichkeane <[email protected]> Date: Fri, 26 Jun 2026 09:43:04 -0700 Subject: [PATCH 1/2] [CIR] Implement Flexible Array Members for const-record lowering The previous patch I did rewrote the ConstRecordBuilder in a way that resulted in flexible array members being an NYI. This was because it requires quite a bit of additional work to accomplish this. This patch does this implementation. It does so by: 1- Changing the CIR dialect to just support them. If a struct type ends in a zero size array, it allows constant initialization with a non-zero array size. This patch adds this, as well as tests to do so. 2- Change our LowerToLLVM to detect this pattern, and substitute in the struct-literal type. There is some additional work to allow us to do a padded literal, which results in slightly more matches to classic codegen. But otherwise, it is a pretty straight forward struct replacement with a larger array type. --- clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 13 +-- clang/lib/CIR/Dialect/IR/CIRAttrs.cpp | 31 +++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 95 ++++++++++++++++++- clang/test/CIR/CodeGen/flexible-array-init.c | 60 ++++++++++++ .../CIR/IR/const-record-flexible-array.cir | 21 ++++ clang/test/CIR/IR/invalid-const-record.cir | 33 +++++++ 6 files changed, 241 insertions(+), 12 deletions(-) create mode 100644 clang/test/CIR/CodeGen/flexible-array-init.c create mode 100644 clang/test/CIR/IR/const-record-flexible-array.cir diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index 9ad172211ea60..384b975b3f422 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -1366,15 +1366,16 @@ mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value, while (!elts.empty() && builder.isNullValue(elts.back())) elts.pop_back(); - if (elts.empty()) - return cir::ZeroAttr::get(desiredType); - + // For flexible array members, we need to adjust the size of our result to + // match this. if (desiredType.getSize() == 0 && numElements > 0) { - cgm.errorNYI("ConstExprEmitter::tryEmitPrivate array type as flexible " - "array member"); - return {}; + desiredType = + cir::ArrayType::get(desiredType.getElementType(), numElements); } + if (elts.empty()) + return cir::ZeroAttr::get(desiredType); + return cir::ConstArrayAttr::get( desiredType, mlir::ArrayAttr::get(builder.getContext(), elts)); } diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index 8f235311dc5d9..6ae2fef3515ae 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -230,15 +230,40 @@ ConstRecordAttr::verify(function_ref<InFlightDiagnostic()> emitError, if (sTy.getMembers().size() != members.size()) return emitError() << "number of elements must match"; - unsigned attrIdx = 0; - for (auto &member : sTy.getMembers()) { + for (const auto &[attrIdx, member] : llvm::enumerate(sTy.getMembers())) { auto m = mlir::cast<mlir::TypedAttr>(members[attrIdx]); + + // As a special case, we allow a flexible array member. This can only be the + // last element, the rest of the array type has to match (that is, the + // element type has to match), and the array member must be size zero. + if (attrIdx == sTy.getMembers().size() - 1) { + auto memArrayTy = dyn_cast<cir::ArrayType>(member); + if (memArrayTy && memArrayTy.getSize() == 0) { + + // The FAM must only match another array type initializer. + if (!isa<cir::ArrayType>(m.getType())) + return emitError() + << "element at index " << attrIdx << " has type " + << m.getType() << " but the expected type for this element is " + << member; + + cir::ArrayType initArrayTy = cast<cir::ArrayType>(m.getType()); + // The FAM only matches an equivalent array type. + if (initArrayTy.getElementType() != memArrayTy.getElementType()) + return emitError() + << "flexible array member at index " << attrIdx << " has type " + << m.getType() + << " which doesn't match the expected element type of member " + << member; + continue; + } + } + if (member != m.getType()) return emitError() << "element at index " << attrIdx << " has type " << m.getType() << " but the expected type for this element is " << member; - attrIdx++; } return success(); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index e0fc9e58ed4b7..1d348d2026b54 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -550,9 +550,85 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) { return result; } +// Figure out if we want mark the new struct 'packed' if it isn't already. IF +// it is already, we have to keep that behavior. We pack it with logic similar +// to classic codegen, though will end up missing cases, since we don't want to +// change the type other than the FAM. +// We can do so if: +// 1- Packing it won't change any of the field offsets. +// 2- the non-padded struct would add padding beyond the +// flexible array member. We don't pack if the flexible array member manages +// to not cause trailing padding. +static bool shouldPackFAMStruct(const mlir::DataLayout &dataLayout, + llvm::ArrayRef<mlir::Type> members) { + uint64_t maxAlign = 1; + uint64_t totalSize = 0; + for (mlir::Type member : members) { + uint64_t align = dataLayout.getTypeABIAlignment(member); + maxAlign = std::max(maxAlign, align); + uint64_t size = dataLayout.getTypeSize(member).getFixedValue(); + + if (llvm::alignTo(totalSize, align) != totalSize) + return false; + + totalSize += size; + } + return llvm::alignTo(totalSize, maxAlign) != totalSize; +} + +// CIR supports flexible-array-members in its struct types. That is, a +// zero-length array as the last element, which can be initialized with an +// arbitrary number of elements. A ConstRecordAttr can be created with one of +// these, and our verifier allows it. However, the LLVM implementation does NOT +// permit this. So we have to replace this type in LLVM with special struct for +// this value. +// This function is used to adjust any place we could run into a +// ConstRecordAttr (that is, a global?) that is initialized. It'll return the +// type value unchanged if this isn't a flexible array member case. +static mlir::Type +adjustGlobalTypeForFlexibleArrayInit(mlir::Type llvmType, mlir::Attribute init, + const mlir::TypeConverter &converter, + const mlir::DataLayout &dataLayout) { + // This only applies to ConstRecordAttr initialization. + auto constRecord = mlir::dyn_cast_if_present<cir::ConstRecordAttr>(init); + if (!constRecord) + return llvmType; + + // If this isn't of struct-type, or doesn't have any members, there is nothing + // to do. + auto structTy = mlir::dyn_cast<mlir::LLVM::LLVMStructType>(llvmType); + if (!structTy || structTy.getBody().empty()) + return llvmType; + + // If this isn't a zero-sized array, it isn't a flexible array member. + auto fam = dyn_cast<mlir::LLVM::LLVMArrayType>(structTy.getBody().back()); + if (!fam || fam.getNumElements() != 0) + return llvmType; + + llvm::ArrayRef<mlir::Attribute> initMembers = + constRecord.getMembers().getValue(); + mlir::Type lastInitType = cast<mlir::TypedAttr>(initMembers.back()).getType(); + + // Don't touch it if we don't need to do non-zero elements. + if (cast<cir::ArrayType>(lastInitType).getSize() == 0) + return llvmType; + + + llvm::SmallVector<mlir::Type> newBody{structTy.getBody()}; + newBody[newBody.size() - 1] = converter.convertType(lastInitType); + + bool packed = structTy.isPacked() || shouldPackFAMStruct(dataLayout, newBody); + + return mlir::LLVM::LLVMStructType::getLiteral(structTy.getContext(), newBody, + packed); +} + /// ConstRecord visitor. mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstRecordAttr constRecord) { - const mlir::Type llvmTy = converter->convertType(constRecord.getType()); + mlir::Type llvmTy = converter->convertType(constRecord.getType()); + mlir::DataLayout dataLayout(parentOp->getParentOfType<mlir::ModuleOp>()); + llvmTy = adjustGlobalTypeForFlexibleArrayInit(llvmTy, constRecord, *converter, + dataLayout); const mlir::Location loc = parentOp->getLoc(); mlir::Value result = mlir::LLVM::UndefOp::create(rewriter, loc, llvmTy); @@ -2433,9 +2509,15 @@ CIRToLLVMGlobalOpLowering::lowerGlobalAttributes( /// insertion point to the end of the initializer block. void CIRToLLVMGlobalOpLowering::setupRegionInitializedLLVMGlobalOp( cir::GlobalOp op, mlir::ConversionPatternRewriter &rewriter) const { - const mlir::Type llvmType = + mlir::Type llvmType = convertTypeForMemory(*getTypeConverter(), dataLayout, op.getSymType()); + // Keep the global's type in sync with the value built by CIRAttrToValue: a + // flexible array member initializer requires an oversized anonymous struct. + if (std::optional<mlir::Attribute> init = op.getInitialValue()) + llvmType = adjustGlobalTypeForFlexibleArrayInit( + llvmType, *init, *getTypeConverter(), dataLayout); + // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. This duplicates code // in CIRToLLVMGlobalOpLowering::matchAndRewrite() but that will go @@ -2503,9 +2585,16 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( const mlir::Type cirSymType = op.getSymType(); // This is the LLVM dialect type. - const mlir::Type llvmType = + mlir::Type llvmType = convertTypeForMemory(*getTypeConverter(), dataLayout, cirSymType); + // A flexible array member initializer makes the constant larger than the + // record's declared type, so the global must use an oversized anonymous + // struct instead. + if (init.has_value()) + llvmType = adjustGlobalTypeForFlexibleArrayInit( + llvmType, *init, *getTypeConverter(), dataLayout); + // FIXME: These default values are placeholders until the the equivalent // attributes are available on cir.global ops. const bool isConst = op.getConstant(); diff --git a/clang/test/CIR/CodeGen/flexible-array-init.c b/clang/test/CIR/CodeGen/flexible-array-init.c new file mode 100644 index 0000000000000..5e5c007a3de85 --- /dev/null +++ b/clang/test/CIR/CodeGen/flexible-array-init.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM,LLVMCIR --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM,OGCG --input-file=%t.ll %s + +// CIR: !rec_S = !cir.struct<"S" {!s32i, !cir.array<!s8i x 0>}> +// CIR: !rec_T = !cir.struct<"T" {!cir.ptr<!s32i>, !cir.array<!s32i x 0>}> + +// 's1' lowers via the bulk constant-record path (LowerToLLVM.cpp ~line 2585): +// every member can be lowered to a constant attribute. +struct S { int n; char data[]; }; +struct S s1 = { 3, { 'a', 'b', 'c' } }; + +// CIR: cir.global external @s1 = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i]> : !cir.array<!s8i x 3>}> : !rec_S +// LLVM: @s1 = global <{ i32, [3 x i8] }> <{ i32 3, [3 x i8] c"abc" }> + +struct S s2 = { 3, { 'a', 'b', 'c', 0, 0, 0, 0 } }; +// CIR: cir.global external @s2 = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i], trailing_zeros> : !cir.array<!s8i x 7>}> : !rec_S +// LLVM: @s2 = global <{ i32, [7 x i8] }> <{ i32 3, [7 x i8] c"abc\00\00\00\00" }> + +struct S s3 = {3}; +// CIR: cir.global external @s3 = #cir.const_record<{#cir.int<3> : !s32i, #cir.zero : !cir.array<!s8i x 0>}> : !rec_S +// LLVM: @s3 = global %struct.S { i32 3, [0 x i8] zeroinitializer } + +int arr[4]; + +struct T { int *p; int data[]; }; +struct T t1 = { &arr[2], { 1, 2, 3 } }; + +// CIR: cir.global external @t1 = #cir.const_record<{#cir.global_view<@arr, [2 : i32]> : !cir.ptr<!s32i>, #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]> : !cir.array<!s32i x 3>}> : !rec_T +// LLVM: @t1 = global <{ ptr, [3 x i32] }> <{ ptr getelementptr {{.*}}(i8, ptr @arr, i64 8), [3 x i32] [i32 1, i32 2, i32 3] }> + +struct T t2 = { &arr[2], { 1, 2, 3, 0, 0, 0, 0, 0 } }; +// CIR: cir.global external @t2 = #cir.const_record<{#cir.global_view<@arr, [2 : i32]> : !cir.ptr<!s32i>, #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i], trailing_zeros> : !cir.array<!s32i x 8>}> : !rec_T +// LLVM: @t2 = global { ptr, [8 x i32] } { ptr getelementptr {{.*}}(i8, ptr @arr, i64 8), [8 x i32] [i32 1, i32 2, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0] } + +struct T t3 = { &arr[2], { 0, 0, 0, 0, 0, 0, 0, 0 } }; +// CIR: cir.global external @t3 = #cir.const_record<{#cir.global_view<@arr, [2 : i32]> : !cir.ptr<!s32i>, #cir.zero : !cir.array<!s32i x 8>}> : !rec_T +// LLVM: @t3 = global { ptr, [8 x i32] } { ptr getelementptr {{.*}}(i8, ptr @arr, i64 8), [8 x i32] zeroinitializer } + +struct V { int tag; int count; int data[]; }; +struct V v1 = {1, 3, {10, 20, 30}}; +// CIR: cir.global external @v1 = #cir.const_record<{#cir.int<1> : !s32i, #cir.int<3> : !s32i, #cir.const_array<[#cir.int<10> : !s32i, #cir.int<20> : !s32i, #cir.int<30> : !s32i]> : !cir.array<!s32i x 3>}> : !rec_V +// LLVM: @v1 = global { i32, i32, [3 x i32] } { i32 1, i32 3, [3 x i32] [i32 10, i32 20, i32 30] } + + +struct Padded { char c; int n; int data[]; }; +struct Padded padded1 = {'x', 10, {'a', 'b'}}; +// CIR: cir.global external @padded1 = #cir.const_record<{#cir.int<120> : !s8i, #cir.int<10> : !s32i, #cir.const_array<[#cir.int<97> : !s32i, #cir.int<98> : !s32i]> : !cir.array<!s32i x 2>}> : !rec_Padded +// These differ because classic-codegen has put the padding in place. However, +// they should still match anyway, since llvm aligns each field itself. +// LLVMCIR: @padded1 = global { i8, i32, [2 x i32] } { i8 120, i32 10, [2 x i32] [i32 97, i32 98] } +// OGCG: @padded1 = global { i8, [3 x i8], i32, [2 x i32] } { i8 120, [3 x i8] zeroinitializer, i32 10, [2 x i32] [i32 97, i32 98] } + +struct __attribute__((packed)) Packed { int n; char data[]; }; +struct Packed packed1 = {1, {'a', 'b'}}; +// CIR: cir.global external @packed1 = #cir.const_record<{#cir.int<1> : !s32i, #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i]> : !cir.array<!s8i x 2>}> : !rec_Packed +// LLVM: @packed1 = global <{ i32, [2 x i8] }> <{ i32 1, [2 x i8] c"ab" }> diff --git a/clang/test/CIR/IR/const-record-flexible-array.cir b/clang/test/CIR/IR/const-record-flexible-array.cir new file mode 100644 index 0000000000000..4e44e66ea4074 --- /dev/null +++ b/clang/test/CIR/IR/const-record-flexible-array.cir @@ -0,0 +1,21 @@ +// RUN: cir-opt %s --verify-roundtrip | FileCheck %s + +!s8i = !cir.int<s, 8> +!s32i = !cir.int<s, 32> +!rec_S = !cir.struct<"S" {!s32i, !cir.array<!s8i x 0>}> + +// A flexible array member is declared as a zero-length array, but its +// initializer may be a larger array of the *same element type*. The verifier +// accepts this as a special case for the trailing record member. + +// FAM initialized with a larger array of the same element type. +// CHECK: cir.global external @withData = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i]> : !cir.array<!s8i x 3>}> : !rec_S +cir.global external @withData = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i]> : !cir.array<!s8i x 3>}> : !rec_S + +// FAM initialized with a larger, zeroed array. +// CHECK: cir.global external @withZeros = #cir.const_record<{#cir.int<3> : !s32i, #cir.zero : !cir.array<!s8i x 8>}> : !rec_S +cir.global external @withZeros = #cir.const_record<{#cir.int<3> : !s32i, #cir.zero : !cir.array<!s8i x 8>}> : !rec_S + +// FAM with no extra data: the trailing array is exactly zero-length. +// CHECK: cir.global external @noData = #cir.const_record<{#cir.int<3> : !s32i, #cir.zero : !cir.array<!s8i x 0>}> : !rec_S +cir.global external @noData = #cir.const_record<{#cir.int<3> : !s32i, #cir.zero : !cir.array<!s8i x 0>}> : !rec_S diff --git a/clang/test/CIR/IR/invalid-const-record.cir b/clang/test/CIR/IR/invalid-const-record.cir index c716ec8b7acbc..3c2fe607872e2 100644 --- a/clang/test/CIR/IR/invalid-const-record.cir +++ b/clang/test/CIR/IR/invalid-const-record.cir @@ -50,3 +50,36 @@ cir.global external @u = #cir.const_record<{}> : !rec_U // The single element's type must be one of the union's member types. // expected-error @below {{union element type '!cir.float' is not a member of}} cir.global external @u = #cir.const_record<{#cir.fp<2.000000e+00> : !cir.float}> : !rec_U + +// ----- + +!s8i = !cir.int<s, 8> +!s32i = !cir.int<s, 32> +!rec_S = !cir.struct<"S" {!s32i, !cir.array<!s8i x 0>}> + +// The flexible array member (trailing zero-length array) must be initialized +// with an array; a non-array initializer is rejected. +// expected-error @below {{element at index 1 has type '!cir.int<s, 32>' but the expected type for this element is '!cir.array<!cir.int<s, 8> x 0>'}} +cir.global external @fam_non_array = #cir.const_record<{#cir.int<3> : !s32i, #cir.int<4> : !s32i}> : !rec_S + +// ----- + +!s8i = !cir.int<s, 8> +!s32i = !cir.int<s, 32> +!rec_S = !cir.struct<"S" {!s32i, !cir.array<!s8i x 0>}> + +// The flexible array member initializer must have the same element type as the +// declared zero-length array. +// expected-error @below {{flexible array member at index 1 has type '!cir.array<!cir.int<s, 32> x 2>' which doesn't match the expected element type of member '!cir.array<!cir.int<s, 8> x 0>'}} +cir.global external @fam_wrong_elt = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<1> : !s32i, #cir.int<2> : !s32i]> : !cir.array<!s32i x 2>}> : !rec_S + +// ----- + +!s8i = !cir.int<s, 8> +!s32i = !cir.int<s, 32> +!rec_F = !cir.struct<"F" {!s32i, !cir.array<!s8i x 2>}> + +// The oversized-array relaxation only applies when the declared trailing array +// is zero-length; a non-zero-length array still requires an exact type match. +// expected-error @below {{element at index 1 has type '!cir.array<!cir.int<s, 8> x 3>' but the expected type for this element is '!cir.array<!cir.int<s, 8> x 2>'}} +cir.global external @not_zero_len = #cir.const_record<{#cir.int<3> : !s32i, #cir.const_array<[#cir.int<97> : !s8i, #cir.int<98> : !s8i, #cir.int<99> : !s8i]> : !cir.array<!s8i x 3>}> : !rec_F >From 67bd2d9084cb11e4a10605a0773de9ce444fbefd Mon Sep 17 00:00:00 2001 From: erichkeane <[email protected]> Date: Mon, 29 Jun 2026 11:55:21 -0700 Subject: [PATCH 2/2] Clang-format' --- clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 1d348d2026b54..1b1b03ab479f1 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -613,7 +613,6 @@ adjustGlobalTypeForFlexibleArrayInit(mlir::Type llvmType, mlir::Attribute init, if (cast<cir::ArrayType>(lastInitType).getSize() == 0) return llvmType; - llvm::SmallVector<mlir::Type> newBody{structTy.getBody()}; newBody[newBody.size() - 1] = converter.convertType(lastInitType); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
