llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Erich Keane (erichkeane)

<details>
<summary>Changes</summary>

Our union layout calculations were quite a bit wrong, `getLargestMember` 
actually got the largest aligned type, which is not the same thing. This showed 
up in quite a few places. #<!-- -->198361 was JUST submitted, and seems to have 
fixed the SYMPTOM, but not the actual size calculation in a way that is robust 
in the future.

This patch removes `getLargestMember` and renames it to do what it actually 
does: gets the storage type of the union.

It also corrects the alignment and size calculations (and as a drive-by,
 fixes the calculation for the size of an empty union).

This patch keeps the test changes from the previous fix, to make sure we get 
that correct.

---
Full diff: https://github.com/llvm/llvm-project/pull/199292.diff


5 Files Affected:

- (modified) clang/include/clang/CIR/Dialect/IR/CIRTypes.td (+1-6) 
- (modified) clang/lib/CIR/Dialect/IR/CIRTypes.cpp (+40-48) 
- (modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+5-3) 
- (added) clang/test/CIR/CodeGen/union-layout.cpp (+50) 
- (modified) clang/unittests/CIR/UnionTypeSizeTest.cpp (+1-1) 


``````````diff
diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td 
b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
index 9e639df13de70..64a1a542ec740 100644
--- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
+++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -737,12 +737,7 @@ def CIR_RecordType : CIR_Type<"Record", "record", [
     bool isComplete() const { return !isIncomplete(); };
     bool isIncomplete() const;
 
-    mlir::Type getLargestMember(const mlir::DataLayout &dataLayout) const;
-
-    /// Tail-padding member for a padded union (last member appended by
-    /// lowerUnion).  Empty type when the record is not padded.
-    mlir::Type getPadding() const;
-
+    mlir::Type getUnionStorageType(const mlir::DataLayout &dataLayout) const;
     size_t getNumElements() const { return getMembers().size(); };
     std::string getKindAsStr() {
       switch (getKind()) {
diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp 
b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
index 23c327e81831b..2f0a11028ea21 100644
--- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
+++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp
@@ -322,36 +322,26 @@ void RecordType::complete(ArrayRef<Type> members, bool 
packed, bool padded) {
     llvm_unreachable("failed to complete record");
 }
 
-/// Return the largest member of in the type.
-///
-/// Recurses into union members never returning a union as the largest member.
-Type RecordType::getLargestMember(const ::mlir::DataLayout &dataLayout) const {
-  assert(isUnion() && "Only call getLargestMember on unions");
+/// Return the 'storage' type of the union, that is, without padding,
+/// the type that is used to convert to the 'storage' for LLVM-IR.
+Type RecordType::getUnionStorageType(
+    const ::mlir::DataLayout &dataLayout) const {
+  assert(isUnion() && "Only call getUnionStorageType on unions");
   llvm::ArrayRef<Type> members = getMembers();
-  if (members.empty())
+  if (members.empty() || (getPadded() && members.size() == 1))
     return {};
 
   // If the union is padded, we need to ignore the last member,
   // which is the padding.
-  auto endIt = getPadded() ? std::prev(members.end()) : members.end();
-  if (endIt == members.begin())
-    return {};
-  return *std::max_element(members.begin(), endIt, [&](Type lhs, Type rhs) {
-    return dataLayout.getTypeABIAlignment(lhs) <
-               dataLayout.getTypeABIAlignment(rhs) ||
-           (dataLayout.getTypeABIAlignment(lhs) ==
-                dataLayout.getTypeABIAlignment(rhs) &&
-            dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs));
-  });
-}
-
-mlir::Type RecordType::getPadding() const {
-  if (!getPadded())
-    return {};
-  llvm::ArrayRef<mlir::Type> members = getMembers();
-  if (members.empty())
-    return {};
-  return members.back();
+  return *std::max_element(
+      members.begin(), std::prev(members.end(), getPadded()),
+      [&](Type lhs, Type rhs) {
+        return dataLayout.getTypeABIAlignment(lhs) <
+                   dataLayout.getTypeABIAlignment(rhs) ||
+               (dataLayout.getTypeABIAlignment(lhs) ==
+                    dataLayout.getTypeABIAlignment(rhs) &&
+                dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs));
+      });
 }
 
 bool RecordType::isLayoutIdentical(const RecordType &other) {
@@ -388,26 +378,18 @@ llvm::TypeSize
 RecordType::getTypeSizeInBits(const mlir::DataLayout &dataLayout,
                               mlir::DataLayoutEntryListRef params) const {
   if (isUnion()) {
-    mlir::Type largest = getLargestMember(dataLayout);
-    if (!largest)
-      return llvm::TypeSize::getFixed(0);
-    // `getLargestMember` returns the highest-aligned variant (which dictates
-    // the union's alignment), not necessarily the largest by size.  When the
-    // union is `padded` -- i.e., its highest-aligned variant is strictly
-    // smaller than its layout size, as happens for any union containing both
-    // a small high-alignment scalar and a larger low-alignment array (e.g.,
-    // `union { char[16]; size_t; }`) -- `lowerUnion` appended a trailing
-    // byte-array member to extend the highest-aligned variant up to the
-    // layout size, and `LowerToLLVM` mirrors this by emitting the union as
-    // `{largest, padding}`.  Include that padding here so `getTypeSize`
-    // reports the same size `LowerToLLVM` produces; otherwise a parent
-    // record containing the union gets a spurious tail-padding member added
-    // by `insertPadding`, making `sizeof(parent)` and array GEPs off by the
-    // missing bytes.
-    llvm::TypeSize size = dataLayout.getTypeSizeInBits(largest);
-    if (mlir::Type tailPad = getPadding())
-      size += dataLayout.getTypeSizeInBits(tailPad);
-    return size;
+    // The size of a union is the size of the largest member.
+    llvm::ArrayRef<Type> members = getMembers();
+    if (members.empty() || (getPadded() && members.size() == 1))
+      return llvm::TypeSize::getFixed(8);
+
+    mlir::Type largestType = *std::max_element(
+        members.begin(), std::prev(members.end(), getPadded()),
+        [&](Type lhs, Type rhs) {
+          return dataLayout.getTypeSize(lhs) < dataLayout.getTypeSize(rhs);
+        });
+
+    return dataLayout.getTypeSizeInBits(largestType);
   }
 
   auto recordSize = static_cast<uint64_t>(computeStructSize(dataLayout));
@@ -418,10 +400,20 @@ uint64_t
 RecordType::getABIAlignment(const ::mlir::DataLayout &dataLayout,
                             ::mlir::DataLayoutEntryListRef params) const {
   if (isUnion()) {
-    mlir::Type largest = getLargestMember(dataLayout);
-    if (!largest)
+    // The alignment of a union is the alignment of the member with the largest
+    // alignment.
+    llvm::ArrayRef<Type> members = getMembers();
+    if (members.empty() || (getPadded() && members.size() == 1))
       return 1;
-    return dataLayout.getTypeABIAlignment(largest);
+
+    mlir::Type largestAlignType = *std::max_element(
+        members.begin(), std::prev(members.end(), getPadded()),
+        [&](Type lhs, Type rhs) {
+          return dataLayout.getTypeABIAlignment(lhs) <
+                 dataLayout.getTypeABIAlignment(rhs);
+        });
+
+    return dataLayout.getTypeABIAlignment(largestAlignType);
   }
 
   // Packed structures always have an ABI alignment of 1.
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index c4e98e299dfc1..c7fac702af219 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -3297,13 +3297,15 @@ static void 
prepareTypeConverter(mlir::LLVMTypeConverter &converter,
       for (mlir::Type ty : type.getMembers())
         llvmMembers.push_back(convertTypeForMemory(converter, dataLayout, ty));
       break;
-    // Unions are lowered as only the largest member.
+    // Unions are lowered as the 'storage' type. Which is the type with the
+    // largest alignment (or, if there are multiples, the largest type with the
+    // largest alignment).
     case cir::RecordType::Union:
       if (type.getMembers().empty())
         break;
-      if (auto largestMember = type.getLargestMember(dataLayout))
+      if (auto storageType = type.getUnionStorageType(dataLayout))
         llvmMembers.push_back(
-            convertTypeForMemory(converter, dataLayout, largestMember));
+            convertTypeForMemory(converter, dataLayout, storageType));
       if (type.getPadded()) {
         auto last = *type.getMembers().rbegin();
         llvmMembers.push_back(
diff --git a/clang/test/CIR/CodeGen/union-layout.cpp 
b/clang/test/CIR/CodeGen/union-layout.cpp
new file mode 100644
index 0000000000000..85cd8ee2803ce
--- /dev/null
+++ b/clang/test/CIR/CodeGen/union-layout.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o 
%t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o 
%t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+struct HasAnonUnion {
+  union {
+    int buf[4];
+    long long cap;
+  };
+};
+// CIR-DAG: !rec_HasAnonUnion = !cir.record<struct "HasAnonUnion" 
{![[ANON_UNION:.*]]}>
+// LLVM-DAG: %struct.HasAnonUnion = type { %[[ANON_UNION:.*]] }
+
+// CIR-DAG: ![[ANON_UNION]] = !cir.record<union {{.*}} padded 
{!cir.array<!s32i x 4>, !s64i, !cir.array<!u8i x 8>}>
+// LLVM-DAG: %[[ANON_UNION]] = type { i64, [8 x i8] }
+
+struct ContainsHasAnonUnion {
+  HasAnonUnion hau;
+  int thing;
+};
+// CIR-DAG: !rec_ContainsHasAnonUnion = !cir.record<struct 
"ContainsHasAnonUnion" {!rec_HasAnonUnion, !s32i}>
+// LLVM-DAG: %struct.ContainsHasAnonUnion = type { %struct.HasAnonUnion, i32 }
+
+struct HasEmptyUnion {
+  union {
+  } u;
+};
+
+struct ContainsHasEmptyUnion {
+  HasEmptyUnion heu;
+  int thing;
+};
+// CIR-DAG: !rec_ContainsHasEmptyUnion = !cir.record<struct 
"ContainsHasEmptyUnion" padded {!cir.array<!u8i x 4>, !s32i}>
+// LLVM-DAG: %struct.ContainsHasEmptyUnion = type { [4 x i8], i32 }
+
+
+void use() {
+  ContainsHasAnonUnion u;
+  ContainsHasEmptyUnion u2;
+}
+// CIR: cir.func {{.*}}@_Z3usev()
+// CIR: cir.alloca !rec_ContainsHasAnonUnion, 
!cir.ptr<!rec_ContainsHasAnonUnion>, ["u"] {alignment = 8 : i64}
+// CIR: cir.alloca !rec_ContainsHasEmptyUnion, 
!cir.ptr<!rec_ContainsHasEmptyUnion>, ["u2"] {alignment = 4 : i64}
+
+// LLVM-LABEL: define {{.*}}@_Z3usev()
+// LLVM:   alloca %struct.ContainsHasAnonUnion, {{.*}}align 8
+// LLVM:   alloca %struct.ContainsHasEmptyUnion, {{.*}}align 4
diff --git a/clang/unittests/CIR/UnionTypeSizeTest.cpp 
b/clang/unittests/CIR/UnionTypeSizeTest.cpp
index a5b3f42cc0e99..b4fb3144797c2 100644
--- a/clang/unittests/CIR/UnionTypeSizeTest.cpp
+++ b/clang/unittests/CIR/UnionTypeSizeTest.cpp
@@ -73,7 +73,7 @@ TEST_F(UnionTypeSizeTest, EmptyUnion) {
   mlir::DataLayout dl(module);
 
   llvm::TypeSize size = dl.getTypeSizeInBits(ty);
-  EXPECT_EQ(size.getFixedValue(), 0u);
+  EXPECT_EQ(size.getFixedValue(), 8u);
 
   module->erase();
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/199292
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to