https://github.com/erichkeane updated https://github.com/llvm/llvm-project/pull/199292
>From 57563b11a263a01daaf4410203a40a9027a76906 Mon Sep 17 00:00:00 2001 From: erichkeane <[email protected]> Date: Fri, 22 May 2026 14:41:02 -0700 Subject: [PATCH 1/2] [CIR] Fix union layout calculations- remove getLargestMember Our union layout calculations were quite a bit wrong, `getLargestMember` actually got the largest aligned type, which is not the same thing. This showed up in quite a few places. #198361 was JUST submitted, and seems to have fixed the SYMPTOM, but not the actual size calculation in a way that is robust in the future. This patch removes `getLargestMember` and renames it to do what it actually does: gets the storage type of the union. It also corrects the alignment and size calculations (and as a drive-by, fixes the calculation for the size of an empty union). This patch keeps the test changes from the previous fix, to make sure we get that correct. --- .../include/clang/CIR/Dialect/IR/CIRTypes.td | 7 +- clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 88 +++++++++---------- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 8 +- clang/test/CIR/CodeGen/union-layout.cpp | 50 +++++++++++ clang/unittests/CIR/UnionTypeSizeTest.cpp | 2 +- 5 files changed, 97 insertions(+), 58 deletions(-) create mode 100644 clang/test/CIR/CodeGen/union-layout.cpp diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index 9e639df13de70..64a1a542ec740 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -737,12 +737,7 @@ def CIR_RecordType : CIR_Type<"Record", "record", [ bool isComplete() const { return !isIncomplete(); }; bool isIncomplete() const; - mlir::Type getLargestMember(const mlir::DataLayout &dataLayout) const; - - /// Tail-padding member for a padded union (last member appended by - /// lowerUnion). Empty type when the record is not padded. - mlir::Type getPadding() const; - + mlir::Type getUnionStorageType(const mlir::DataLayout &dataLayout) const; size_t getNumElements() const { return getMembers().size(); }; std::string getKindAsStr() { switch (getKind()) { diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 23c327e81831b..2f0a11028ea21 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -322,36 +322,26 @@ void RecordType::complete(ArrayRef<Type> members, bool packed, bool padded) { llvm_unreachable("failed to complete record"); } -/// Return the largest member of in the type. -/// -/// Recurses into union members never returning a union as the largest member. -Type RecordType::getLargestMember(const ::mlir::DataLayout &dataLayout) const { - assert(isUnion() && "Only call getLargestMember on unions"); +/// Return the 'storage' type of the union, that is, without padding, +/// the type that is used to convert to the 'storage' for LLVM-IR. +Type RecordType::getUnionStorageType( + const ::mlir::DataLayout &dataLayout) const { + assert(isUnion() && "Only call getUnionStorageType on unions"); llvm::ArrayRef<Type> members = getMembers(); - if (members.empty()) + if (members.empty() || (getPadded() && members.size() == 1)) return {}; // If the union is padded, we need to ignore the last member, // which is the padding. - auto endIt = getPadded() ? std::prev(members.end()) : members.end(); - if (endIt == members.begin()) - return {}; - return *std::max_element(members.begin(), endIt, [&](Type lhs, Type rhs) { - return dataLayout.getTypeABIAlignment(lhs) < - dataLayout.getTypeABIAlignment(rhs) || - (dataLayout.getTypeABIAlignment(lhs) == - dataLayout.getTypeABIAlignment(rhs) && - dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs)); - }); -} - -mlir::Type RecordType::getPadding() const { - if (!getPadded()) - return {}; - llvm::ArrayRef<mlir::Type> members = getMembers(); - if (members.empty()) - return {}; - return members.back(); + return *std::max_element( + members.begin(), std::prev(members.end(), getPadded()), + [&](Type lhs, Type rhs) { + return dataLayout.getTypeABIAlignment(lhs) < + dataLayout.getTypeABIAlignment(rhs) || + (dataLayout.getTypeABIAlignment(lhs) == + dataLayout.getTypeABIAlignment(rhs) && + dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs)); + }); } bool RecordType::isLayoutIdentical(const RecordType &other) { @@ -388,26 +378,18 @@ llvm::TypeSize RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { if (isUnion()) { - mlir::Type largest = getLargestMember(dataLayout); - if (!largest) - return llvm::TypeSize::getFixed(0); - // `getLargestMember` returns the highest-aligned variant (which dictates - // the union's alignment), not necessarily the largest by size. When the - // union is `padded` -- i.e., its highest-aligned variant is strictly - // smaller than its layout size, as happens for any union containing both - // a small high-alignment scalar and a larger low-alignment array (e.g., - // `union { char[16]; size_t; }`) -- `lowerUnion` appended a trailing - // byte-array member to extend the highest-aligned variant up to the - // layout size, and `LowerToLLVM` mirrors this by emitting the union as - // `{largest, padding}`. Include that padding here so `getTypeSize` - // reports the same size `LowerToLLVM` produces; otherwise a parent - // record containing the union gets a spurious tail-padding member added - // by `insertPadding`, making `sizeof(parent)` and array GEPs off by the - // missing bytes. - llvm::TypeSize size = dataLayout.getTypeSizeInBits(largest); - if (mlir::Type tailPad = getPadding()) - size += dataLayout.getTypeSizeInBits(tailPad); - return size; + // The size of a union is the size of the largest member. + llvm::ArrayRef<Type> members = getMembers(); + if (members.empty() || (getPadded() && members.size() == 1)) + return llvm::TypeSize::getFixed(8); + + mlir::Type largestType = *std::max_element( + members.begin(), std::prev(members.end(), getPadded()), + [&](Type lhs, Type rhs) { + return dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs); + }); + + return dataLayout.getTypeSizeInBits(largestType); } auto recordSize = static_cast<uint64_t>(computeStructSize(dataLayout)); @@ -418,10 +400,20 @@ uint64_t RecordType::getABIAlignment(const ::mlir::DataLayout &dataLayout, ::mlir::DataLayoutEntryListRef params) const { if (isUnion()) { - mlir::Type largest = getLargestMember(dataLayout); - if (!largest) + // The alignment of a union is the alignment of the member with the largest + // alignment. + llvm::ArrayRef<Type> members = getMembers(); + if (members.empty() || (getPadded() && members.size() == 1)) return 1; - return dataLayout.getTypeABIAlignment(largest); + + mlir::Type largestAlignType = *std::max_element( + members.begin(), std::prev(members.end(), getPadded()), + [&](Type lhs, Type rhs) { + return dataLayout.getTypeABIAlignment(lhs) < + dataLayout.getTypeABIAlignment(rhs); + }); + + return dataLayout.getTypeABIAlignment(largestAlignType); } // Packed structures always have an ABI alignment of 1. diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index c4e98e299dfc1..c7fac702af219 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3297,13 +3297,15 @@ static void prepareTypeConverter(mlir::LLVMTypeConverter &converter, for (mlir::Type ty : type.getMembers()) llvmMembers.push_back(convertTypeForMemory(converter, dataLayout, ty)); break; - // Unions are lowered as only the largest member. + // Unions are lowered as the 'storage' type. Which is the type with the + // largest alignment (or, if there are multiples, the largest type with the + // largest alignment). case cir::RecordType::Union: if (type.getMembers().empty()) break; - if (auto largestMember = type.getLargestMember(dataLayout)) + if (auto storageType = type.getUnionStorageType(dataLayout)) llvmMembers.push_back( - convertTypeForMemory(converter, dataLayout, largestMember)); + convertTypeForMemory(converter, dataLayout, storageType)); if (type.getPadded()) { auto last = *type.getMembers().rbegin(); llvmMembers.push_back( diff --git a/clang/test/CIR/CodeGen/union-layout.cpp b/clang/test/CIR/CodeGen/union-layout.cpp new file mode 100644 index 0000000000000..85cd8ee2803ce --- /dev/null +++ b/clang/test/CIR/CodeGen/union-layout.cpp @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +struct HasAnonUnion { + union { + int buf[4]; + long long cap; + }; +}; +// CIR-DAG: !rec_HasAnonUnion = !cir.record<struct "HasAnonUnion" {![[ANON_UNION:.*]]}> +// LLVM-DAG: %struct.HasAnonUnion = type { %[[ANON_UNION:.*]] } + +// CIR-DAG: ![[ANON_UNION]] = !cir.record<union {{.*}} padded {!cir.array<!s32i x 4>, !s64i, !cir.array<!u8i x 8>}> +// LLVM-DAG: %[[ANON_UNION]] = type { i64, [8 x i8] } + +struct ContainsHasAnonUnion { + HasAnonUnion hau; + int thing; +}; +// CIR-DAG: !rec_ContainsHasAnonUnion = !cir.record<struct "ContainsHasAnonUnion" {!rec_HasAnonUnion, !s32i}> +// LLVM-DAG: %struct.ContainsHasAnonUnion = type { %struct.HasAnonUnion, i32 } + +struct HasEmptyUnion { + union { + } u; +}; + +struct ContainsHasEmptyUnion { + HasEmptyUnion heu; + int thing; +}; +// CIR-DAG: !rec_ContainsHasEmptyUnion = !cir.record<struct "ContainsHasEmptyUnion" padded {!cir.array<!u8i x 4>, !s32i}> +// LLVM-DAG: %struct.ContainsHasEmptyUnion = type { [4 x i8], i32 } + + +void use() { + ContainsHasAnonUnion u; + ContainsHasEmptyUnion u2; +} +// CIR: cir.func {{.*}}@_Z3usev() +// CIR: cir.alloca !rec_ContainsHasAnonUnion, !cir.ptr<!rec_ContainsHasAnonUnion>, ["u"] {alignment = 8 : i64} +// CIR: cir.alloca !rec_ContainsHasEmptyUnion, !cir.ptr<!rec_ContainsHasEmptyUnion>, ["u2"] {alignment = 4 : i64} + +// LLVM-LABEL: define {{.*}}@_Z3usev() +// LLVM: alloca %struct.ContainsHasAnonUnion, {{.*}}align 8 +// LLVM: alloca %struct.ContainsHasEmptyUnion, {{.*}}align 4 diff --git a/clang/unittests/CIR/UnionTypeSizeTest.cpp b/clang/unittests/CIR/UnionTypeSizeTest.cpp index a5b3f42cc0e99..b4fb3144797c2 100644 --- a/clang/unittests/CIR/UnionTypeSizeTest.cpp +++ b/clang/unittests/CIR/UnionTypeSizeTest.cpp @@ -73,7 +73,7 @@ TEST_F(UnionTypeSizeTest, EmptyUnion) { mlir::DataLayout dl(module); llvm::TypeSize size = dl.getTypeSizeInBits(ty); - EXPECT_EQ(size.getFixedValue(), 0u); + EXPECT_EQ(size.getFixedValue(), 8u); module->erase(); } >From f52ecd8d8c7fa6eda6d23ad15a21e6ecf7620323 Mon Sep 17 00:00:00 2001 From: erichkeane <[email protected]> Date: Fri, 22 May 2026 17:27:34 -0700 Subject: [PATCH 2/2] Fixup things Adam suggested --- .../include/clang/CIR/Dialect/IR/CIRTypes.td | 1 + clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 19 ++++++++++--------- clang/test/CIR/CodeGen/union-layout.cpp | 16 ++++++++++++++++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index 64a1a542ec740..37e35d79c5a46 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -738,6 +738,7 @@ def CIR_RecordType : CIR_Type<"Record", "record", [ bool isIncomplete() const; mlir::Type getUnionStorageType(const mlir::DataLayout &dataLayout) const; + llvm::ArrayRef<mlir::Type> getMembersWithoutPadding() const; size_t getNumElements() const { return getMembers().size(); }; std::string getKindAsStr() { switch (getKind()) { diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 2f0a11028ea21..e71cc4e01a47f 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -322,6 +322,10 @@ void RecordType::complete(ArrayRef<Type> members, bool packed, bool padded) { llvm_unreachable("failed to complete record"); } +ArrayRef<mlir::Type> RecordType::getMembersWithoutPadding() const { + return {getMembers().begin(), std::prev(getMembers().end(), getPadded())}; +} + /// Return the 'storage' type of the union, that is, without padding, /// the type that is used to convert to the 'storage' for LLVM-IR. Type RecordType::getUnionStorageType( @@ -333,9 +337,8 @@ Type RecordType::getUnionStorageType( // If the union is padded, we need to ignore the last member, // which is the padding. - return *std::max_element( - members.begin(), std::prev(members.end(), getPadded()), - [&](Type lhs, Type rhs) { + return *llvm::max_element( + getMembersWithoutPadding(), [&](Type lhs, Type rhs) { return dataLayout.getTypeABIAlignment(lhs) < dataLayout.getTypeABIAlignment(rhs) || (dataLayout.getTypeABIAlignment(lhs) == @@ -383,9 +386,8 @@ RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, if (members.empty() || (getPadded() && members.size() == 1)) return llvm::TypeSize::getFixed(8); - mlir::Type largestType = *std::max_element( - members.begin(), std::prev(members.end(), getPadded()), - [&](Type lhs, Type rhs) { + mlir::Type largestType = + *llvm::max_element(getMembersWithoutPadding(), [&](Type lhs, Type rhs) { return dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs); }); @@ -406,9 +408,8 @@ RecordType::getABIAlignment(const ::mlir::DataLayout &dataLayout, if (members.empty() || (getPadded() && members.size() == 1)) return 1; - mlir::Type largestAlignType = *std::max_element( - members.begin(), std::prev(members.end(), getPadded()), - [&](Type lhs, Type rhs) { + mlir::Type largestAlignType = + *llvm::max_element(getMembersWithoutPadding(), [&](Type lhs, Type rhs) { return dataLayout.getTypeABIAlignment(lhs) < dataLayout.getTypeABIAlignment(rhs); }); diff --git a/clang/test/CIR/CodeGen/union-layout.cpp b/clang/test/CIR/CodeGen/union-layout.cpp index 85cd8ee2803ce..aedb1ec263e8b 100644 --- a/clang/test/CIR/CodeGen/union-layout.cpp +++ b/clang/test/CIR/CodeGen/union-layout.cpp @@ -36,15 +36,31 @@ struct ContainsHasEmptyUnion { // CIR-DAG: !rec_ContainsHasEmptyUnion = !cir.record<struct "ContainsHasEmptyUnion" padded {!cir.array<!u8i x 4>, !s32i}> // LLVM-DAG: %struct.ContainsHasEmptyUnion = type { [4 x i8], i32 } +struct SizeAlignmentMismatchUnion { + union { char c; int i; }; +}; +// CIR-DAG: !rec_SizeAlignmentMismatchUnion = !cir.record<struct "SizeAlignmentMismatchUnion" {![[ANON_UNION2:.*]]} +// CIR-DAG: ![[ANON_UNION2]] = !cir.record<union {{.*}} {!s8i, !s32i}> +// LLVM-DAG: %struct.SizeAlignmentMismatchUnion = type { %[[ANON_UNION2:.*]] } +// LLVM-DAG: %[[ANON_UNION2]] = type { i32 } +struct ContainsSizeAlignmentMismatchUnion { + SizeAlignmentMismatchUnion u; + int thing; +}; +// CIR-DAG: !rec_ContainsSizeAlignmentMismatchUnion = !cir.record<struct "ContainsSizeAlignmentMismatchUnion" {!rec_SizeAlignmentMismatchUnion, !s32i}> +// LLVM-DAG: %struct.ContainsSizeAlignmentMismatchUnion = type { %struct.SizeAlignmentMismatchUnion, i32 } void use() { ContainsHasAnonUnion u; ContainsHasEmptyUnion u2; + ContainsSizeAlignmentMismatchUnion u3; } // CIR: cir.func {{.*}}@_Z3usev() // CIR: cir.alloca !rec_ContainsHasAnonUnion, !cir.ptr<!rec_ContainsHasAnonUnion>, ["u"] {alignment = 8 : i64} // CIR: cir.alloca !rec_ContainsHasEmptyUnion, !cir.ptr<!rec_ContainsHasEmptyUnion>, ["u2"] {alignment = 4 : i64} +// CIR: cir.alloca !rec_ContainsSizeAlignmentMismatchUnion, !cir.ptr<!rec_ContainsSizeAlignmentMismatchUnion>, ["u3"] {alignment = 4 : i64} // LLVM-LABEL: define {{.*}}@_Z3usev() // LLVM: alloca %struct.ContainsHasAnonUnion, {{.*}}align 8 // LLVM: alloca %struct.ContainsHasEmptyUnion, {{.*}}align 4 +// LLVM: alloca %struct.ContainsSizeAlignmentMismatchUnion, {{.*}}align 4 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
