Author: Henrich Lauko
Date: 2026-04-03T19:07:25+02:00
New Revision: dec90ffbc99926a5db8b3fee6d837eb788722153

URL: 
https://github.com/llvm/llvm-project/commit/dec90ffbc99926a5db8b3fee6d837eb788722153
DIFF: 
https://github.com/llvm/llvm-project/commit/dec90ffbc99926a5db8b3fee6d837eb788722153.diff

LOG: [CIR] Fix record layout for [[no_unique_address]] fields (#186701)

Fix two bugs in CIR's handling of `[[no_unique_address]]` fields:

- Record layout: Use the base subobject type (without tail padding)
instead of the complete object type for [[no_unique_address]] fields,
allowing subsequent fields to overlap with tail padding.
- Field access: Insert bitcasts from the base subobject pointer to the
complete object pointer after cir.get_member for potentially-overlapping
fields, so downstream code sees the expected type.
- Zero-sized fields: Handle truly empty [[no_unique_address]] fields by
computing their address via byte offsets rather than cir.get_member,
since they have no entry in the record layout.

A known gap (CIR copies 8 bytes where OG copies 5 via
`ConstructorMemcpyizer`) is noted for follow-up.

Added: 
    clang/test/CIR/CodeGen/no-unique-address.cpp

Modified: 
    clang/lib/CIR/CodeGen/CIRGenExpr.cpp
    clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
    clang/test/CIR/CodeGen/assign-operator.cpp
    clang/test/CIR/CodeGen/dtors.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index 060a90ca5fb8c..91de4581cc8a8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -15,6 +15,7 @@
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
 #include "CIRGenValue.h"
+#include "TargetInfo.h"
 #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h"
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Value.h"
@@ -35,36 +36,80 @@ using namespace clang;
 using namespace clang::CIRGen;
 using namespace cir;
 
-/// Get the address of a zero-sized field within a record. The resulting 
address
-/// doesn't necessarily have the right type.
+/// Get the address of a zero-sized field within a record. Zero-sized fields
+/// (e.g. empty bases with [[no_unique_address]]) don't appear in the CIR
+/// record layout, so we compute their address using the ASTContext field
+/// offset and byte-level pointer arithmetic instead of cir.get_member.
+static Address emitAddrOfZeroSizeField(CIRGenFunction &cgf, Address base,
+                                       const FieldDecl *field) {
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  CharUnits offset = cgf.getContext().toCharUnitsFromBits(
+      cgf.getContext().getFieldOffset(field));
+  mlir::Type fieldType = cgf.convertType(field->getType());
+
+  if (offset.isZero()) {
+    return Address(builder.createPtrBitcast(base.getPointer(), fieldType),
+                   base.getAlignment());
+  }
+
+  // Cast to byte pointer, stride by the field offset, then cast to the
+  // field pointer type (CIR pointers are typed, so we need explicit casts
+  // unlike OG's opaque-pointer GEP).
+  mlir::Location loc = cgf.getLoc(field->getLocation());
+  mlir::Value addr =
+      builder.createPtrBitcast(base.getPointer(), builder.getUInt8Ty());
+  addr = builder.createPtrStride(loc, addr,
+                                 builder.getUInt64(offset.getQuantity(), loc));
+  addr = builder.createPtrBitcast(addr, fieldType);
+  return Address(addr, base.getAlignment().alignmentAtOffset(offset));
+}
+
 Address CIRGenFunction::emitAddrOfFieldStorage(Address base,
                                                const FieldDecl *field,
                                                llvm::StringRef fieldName,
                                                unsigned fieldIndex) {
-  if (field->isZeroSize(getContext())) {
-    cgm.errorNYI(field->getSourceRange(),
-                 "emitAddrOfFieldStorage: zero-sized field");
-    return Address::invalid();
-  }
+  if (isEmptyFieldForLayout(getContext(), field))
+    return emitAddrOfZeroSizeField(*this, base, field);
 
   mlir::Location loc = getLoc(field->getLocation());
 
+  // Retrieve layout information for both type resolution and alignment.
+  const RecordDecl *rec = field->getParent();
+  const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rec);
+  unsigned idx = layout.getCIRFieldNo(field);
+
+  // For potentially-overlapping fields (e.g. [[no_unique_address]]), the
+  // record stores the base subobject type (without tail padding) rather than
+  // the complete object type. Use the record's member type for get_member,
+  // then bitcast to the complete type for downstream use.
+  //
+  // For unions, all fields map to index 0, so we use the field's declared type
+  // directly instead of looking up the member type from the layout.
   mlir::Type fieldType = convertType(field->getType());
   auto fieldPtr = cir::PointerType::get(fieldType);
+  bool needsBitcast = false;
+
+  if (!rec->isUnion() && field->isPotentiallyOverlapping()) {
+    mlir::Type memberType = layout.getCIRType().getMembers()[idx];
+    fieldPtr = cir::PointerType::get(memberType);
+    needsBitcast = true;
+  }
+
   // For most cases fieldName is the same as field->getName() but for lambdas,
   // which do not currently carry the name, so it can be passed down from the
   // CaptureStmt.
-  cir::GetMemberOp memberAddr = builder.createGetMember(
-      loc, fieldPtr, base.getPointer(), fieldName, fieldIndex);
+  mlir::Value addr = builder.createGetMember(loc, fieldPtr, base.getPointer(),
+                                             fieldName, fieldIndex);
+
+  // If the field is potentially overlapping, the record member uses the base
+  // subobject type. Cast to the complete object pointer type expected by
+  // callers (analogous to OG's opaque pointer behavior).
+  if (needsBitcast)
+    addr = builder.createPtrBitcast(addr, fieldType);
 
-  // Retrieve layout information, compute alignment and return the final
-  // address.
-  const RecordDecl *rec = field->getParent();
-  const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rec);
-  unsigned idx = layout.getCIRFieldNo(field);
   CharUnits offset = CharUnits::fromQuantity(
       layout.getCIRType().getElementOffset(cgm.getDataLayout().layout, idx));
-  return Address(memberAddr, base.getAlignment().alignmentAtOffset(offset));
+  return Address(addr, base.getAlignment().alignmentAtOffset(offset));
 }
 
 /// Given an expression of pointer type, try to
@@ -477,6 +522,15 @@ LValue CIRGenFunction::emitLValueForField(LValue base, 
const FieldDecl *field) {
   if (cgm.lambdaFieldToName.count(field))
     fieldName = cgm.lambdaFieldToName[field];
 
+  // Empty fields don't have entries in the record layout, so handle them
+  // separately. They just use the base address directly with the right type.
+  if (!rec->isUnion() && isEmptyFieldForLayout(getContext(), field)) {
+    addr = emitAddrOfZeroSizeField(*this, addr, field);
+    LValue lv = makeAddrLValue(addr, fieldType, fieldBaseInfo);
+    lv.getQuals().addCVRQualifiers(recordCVR);
+    return lv;
+  }
+
   if (rec->isUnion())
     fieldIndex = field->getFieldIndex();
   else {
@@ -527,12 +581,15 @@ LValue CIRGenFunction::emitLValueForFieldInitialization(
   if (!fieldType->isReferenceType())
     return emitLValueForField(base, field);
 
-  const CIRGenRecordLayout &layout =
-      cgm.getTypes().getCIRGenRecordLayout(field->getParent());
-  unsigned fieldIndex = layout.getCIRFieldNo(field);
-
-  Address v =
-      emitAddrOfFieldStorage(base.getAddress(), field, fieldName, fieldIndex);
+  Address v = base.getAddress();
+  if (isEmptyFieldForLayout(getContext(), field)) {
+    v = emitAddrOfZeroSizeField(*this, v, field);
+  } else {
+    const CIRGenRecordLayout &layout =
+        cgm.getTypes().getCIRGenRecordLayout(field->getParent());
+    unsigned fieldIndex = layout.getCIRFieldNo(field);
+    v = emitAddrOfFieldStorage(v, field, fieldName, fieldIndex);
+  }
 
   // Make sure that the address is pointing to the right type.
   mlir::Type memTy = convertTypeForMem(fieldType);

diff  --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp 
b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
index 29eda175c208c..7e1f9c6768e61 100644
--- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp
@@ -571,15 +571,21 @@ void CIRRecordLowering::accumulateFields() {
       field = accumulateBitFields(field, fieldEnd);
       assert((field == fieldEnd || !field->isBitField()) &&
              "Failed to accumulate all the bitfields");
-    } else if (!field->isZeroSize(astContext)) {
-      members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)),
-                                   MemberInfo::InfoKind::Field,
-                                   getStorageType(*field), *field));
-      ++field;
-    } else {
+    } else if (isEmptyFieldForLayout(astContext, *field)) {
       // TODO(cir): do we want to do anything special about zero size members?
       assert(!cir::MissingFeatures::zeroSizeRecordMembers());
       ++field;
+    } else {
+      // Use base subobject layout for potentially-overlapping fields,
+      // as it is done in RecordLayoutBuilder.
+      members.push_back(MemberInfo(
+          bitsToCharUnits(getFieldBitOffset(*field)),
+          MemberInfo::InfoKind::Field,
+          field->isPotentiallyOverlapping()
+              ? getStorageType(field->getType()->getAsCXXRecordDecl())
+              : getStorageType(*field),
+          *field));
+      ++field;
     }
   }
 }

diff  --git a/clang/test/CIR/CodeGen/assign-operator.cpp 
b/clang/test/CIR/CodeGen/assign-operator.cpp
index f416bca31387e..e63d179caf4e1 100644
--- a/clang/test/CIR/CodeGen/assign-operator.cpp
+++ b/clang/test/CIR/CodeGen/assign-operator.cpp
@@ -69,9 +69,9 @@ void copy_c(C &c1, C &c2) {
 // CIR:   cir.store %arg0, %[[THIS_ADDR]]
 // CIR:   cir.store %arg1, %[[ARG1_ADDR]]
 // CIR:   %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]]
-// CIR:   %[[A_MEMBER:.*]] = cir.get_member %[[THIS]][0] {name = "a"}
+// CIR:   %[[A_MEMBER:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_C> -> 
!cir.ptr<!rec_A>
 // CIR:   %[[ARG1_LOAD:.*]] = cir.load{{.*}} %[[ARG1_ADDR]]
-// CIR:   %[[A_MEMBER_2:.*]] = cir.get_member %[[ARG1_LOAD]][0] {name = "a"}
+// CIR:   %[[A_MEMBER_2:.*]] = cir.cast bitcast %[[ARG1_LOAD]] : 
!cir.ptr<!rec_C> -> !cir.ptr<!rec_A>
 // CIR:   %[[C_A:.*]] = cir.call @_ZN1AaSERKS_(%[[A_MEMBER]], %[[A_MEMBER_2]])
 // CIR:   %[[B_MEMBER:.*]] = cir.get_member %[[THIS]][1] {name = "b"}
 // CIR:   %[[B_VOID_PTR:.*]] = cir.cast bitcast %[[B_MEMBER]] : 
!cir.ptr<!cir.array<!rec_B x 16>> -> !cir.ptr<!void>
@@ -84,6 +84,32 @@ void copy_c(C &c1, C &c2) {
 // CIR:   %[[RET_VAL:.*]] = cir.load{{.*}} %[[RET_ADDR]]
 // CIR:   cir.return %[[RET_VAL]]
 
+// LLVM: define {{.*}} ptr @_ZN1CaSERKS_(ptr {{.*}} %[[THIS:.*]], ptr {{.*}} 
%[[ARG:.*]])
+// LLVM:   %[[THIS_ADDR:.*]] = alloca ptr
+// LLVM:   %[[ARG_ADDR:.*]] = alloca ptr
+// LLVM:   store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// LLVM:   store ptr %[[ARG]], ptr %[[ARG_ADDR]]
+// LLVM:   %[[THIS_LOAD:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// LLVM:   %[[ARG_LOAD:.*]] = load ptr, ptr %[[ARG_ADDR]]
+// LLVM:   %{{.*}} = call {{.*}} ptr @_ZN1AaSERKS_(ptr {{.*}} %[[THIS_LOAD]], 
ptr {{.*}} %[[ARG_LOAD]])
+// LLVM:   %[[B1:.*]] = getelementptr %struct.C, ptr %[[THIS_LOAD]], i32 0, 
i32 1
+// LLVM:   %[[ARG_LOAD2:.*]] = load ptr, ptr %[[ARG_ADDR]]
+// LLVM:   %[[B2:.*]] = getelementptr %struct.C, ptr %[[ARG_LOAD2]], i32 0, 
i32 1
+// LLVM:   %{{.*}} = call ptr @memcpy(ptr {{.*}} %[[B1]], ptr {{.*}} %[[B2]], 
i64 {{.*}} 64)
+
+// OGCG: define {{.*}} ptr @_ZN1CaSERKS_(ptr {{.*}} %[[THIS:.*]], ptr {{.*}} 
%[[ARG:.*]])
+// OGCG:   %[[THIS_ADDR:.*]] = alloca ptr
+// OGCG:   %[[ARG_ADDR:.*]] = alloca ptr
+// OGCG:   store ptr %[[THIS]], ptr %[[THIS_ADDR]]
+// OGCG:   store ptr %[[ARG]], ptr %[[ARG_ADDR]]
+// OGCG:   %[[THIS_LOAD:.*]] = load ptr, ptr %[[THIS_ADDR]]
+// OGCG:   %[[ARG_LOAD:.*]] = load ptr, ptr %[[ARG_ADDR]]
+// OGCG:   %{{.*}} = call {{.*}} ptr @_ZN1AaSERKS_(ptr {{.*}} %[[THIS_LOAD]], 
ptr {{.*}} %[[ARG_LOAD]])
+// OGCG:   %[[B1:.*]] = getelementptr inbounds {{.*}} %struct.C, ptr 
%[[THIS_LOAD]], i32 0, i32 1
+// OGCG:   %[[ARG_LOAD2:.*]] = load ptr, ptr %[[ARG_ADDR]]
+// OGCG:   %[[B2:.*]] = getelementptr inbounds {{.*}} %struct.C, ptr 
%[[ARG_LOAD2]], i32 0, i32 1
+// OGCG:   call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[B1]], ptr {{.*}} 
%[[B2]], i64 64, i1 false)
+
 // CIR: cir.func{{.*}} @_Z6copy_cR1CS0_(%arg0: !cir.ptr<!rec_C> {{.*}}, %arg1: 
!cir.ptr<!rec_C> {{.*}})
 // CIR:   %[[C1_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C>, 
!cir.ptr<!cir.ptr<!rec_C>>, ["c1", init, const]
 // CIR:   %[[C2_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C>, 
!cir.ptr<!cir.ptr<!rec_C>>, ["c2", init, const]

diff  --git a/clang/test/CIR/CodeGen/dtors.cpp 
b/clang/test/CIR/CodeGen/dtors.cpp
index 37cb766b1be62..fc433a1cd3fdb 100644
--- a/clang/test/CIR/CodeGen/dtors.cpp
+++ b/clang/test/CIR/CodeGen/dtors.cpp
@@ -264,11 +264,14 @@ struct D {
 };
 
 // CIR: cir.func {{.*}} @_ZN1DD2Ev
-// CIR:   %[[C:.*]] = cir.get_member %{{.*}}[1] {name = "c"}
+// CIR:   %[[BASE:.*]] = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_D> -> 
!cir.ptr<!u8i>
+// CIR:   %[[OFFSET:.*]] = cir.const #cir.int<4> : !u64i
+// CIR:   %[[PTR:.*]] = cir.ptr_stride %[[BASE]], %[[OFFSET]] : 
(!cir.ptr<!u8i>, !u64i) -> !cir.ptr<!u8i>
+// CIR:   %[[C:.*]] = cir.cast bitcast %[[PTR]] : !cir.ptr<!u8i> -> 
!cir.ptr<!rec_C>
 // CIR:   cir.call @_ZN1CD1Ev(%[[C]])
 
 // LLVM: define {{.*}} void @_ZN1DD2Ev
-// LLVM:   %[[C:.*]] = getelementptr %struct.D, ptr %{{.*}}, i32 0, i32 1
+// LLVM:   %[[C:.*]] = getelementptr i8, ptr %{{.*}}, i64 4
 // LLVM:   call void @_ZN1CD1Ev(ptr {{.*}} %[[C]])
 
 // This destructor is defined after the calling function in OGCG.

diff  --git a/clang/test/CIR/CodeGen/no-unique-address.cpp 
b/clang/test/CIR/CodeGen/no-unique-address.cpp
new file mode 100644
index 0000000000000..4f81e194783d3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/no-unique-address.cpp
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
+// RUN:   -fclangir -emit-cir %s -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
+// RUN:   -fclangir -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \
+// RUN:   -emit-llvm %s -o %t.og.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s
+
+struct Base {
+  int x;
+};
+
+struct Middle : Base {
+  char c;
+  // sizeof(Middle) = 8 (4 for x, 1 for c, 3 tail padding)
+  // data size = 5
+};
+
+struct Outer {
+  [[no_unique_address]] Middle m;
+  char extra;
+  Outer(const Middle &m, char e) : m(m), extra(e) {}
+};
+
+// The record layout should use the base subobject type for the
+// [[no_unique_address]] field, allowing 'extra' to overlap with
+// Middle's tail padding.
+
+// CIR: !rec_Middle2Ebase = !cir.record<struct "Middle.base" packed 
{!rec_Base, !s8i}>
+// CIR: !rec_Outer = !cir.record<struct "Outer" padded {!rec_Middle2Ebase, 
!s8i,
+
+// CIR-LABEL: cir.func {{.*}} @_ZN5OuterC2ERK6Middlec(
+// CIR:         %[[THIS:.*]] = cir.load %{{.+}} : 
!cir.ptr<!cir.ptr<!rec_Outer>>, !cir.ptr<!rec_Outer>
+// CIR:         %[[M_BASE:.*]] = cir.get_member %[[THIS]][0] {name = "m"} : 
!cir.ptr<!rec_Outer> -> !cir.ptr<!rec_Middle2Ebase>
+// CIR-NEXT:    %[[M_COMPLETE:.*]] = cir.cast bitcast %[[M_BASE]] : 
!cir.ptr<!rec_Middle2Ebase> -> !cir.ptr<!rec_Middle>
+// CIR:         cir.copy %{{.+}} to %[[M_COMPLETE]] : !cir.ptr<!rec_Middle>
+// CIR:         %[[EXTRA:.*]] = cir.get_member %[[THIS]][1] {name = "extra"} : 
!cir.ptr<!rec_Outer> -> !cir.ptr<!s8i>
+
+// LLVM-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec(
+// LLVM:         %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 
0, i32 0
+// LLVM:         call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, 
i64 8, i1 false)
+
+// OGCG-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec(
+// OGCG:         %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr 
%{{.+}}, i32 0, i32 0
+// TODO(CIR): OG emits i64 5 here via ConstructorMemcpyizer, which CIR
+// doesn't have yet. CIR copies the full 8-byte type instead.
+// OGCG:         call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr 
{{.*}} %{{.+}}, i64 5, i1 false)
+
+void test(const Middle &m) {
+  Outer o(m, 'x');
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to