Author: Henrich Lauko Date: 2026-04-03T19:07:25+02:00 New Revision: dec90ffbc99926a5db8b3fee6d837eb788722153
URL: https://github.com/llvm/llvm-project/commit/dec90ffbc99926a5db8b3fee6d837eb788722153 DIFF: https://github.com/llvm/llvm-project/commit/dec90ffbc99926a5db8b3fee6d837eb788722153.diff LOG: [CIR] Fix record layout for [[no_unique_address]] fields (#186701) Fix two bugs in CIR's handling of `[[no_unique_address]]` fields: - Record layout: Use the base subobject type (without tail padding) instead of the complete object type for [[no_unique_address]] fields, allowing subsequent fields to overlap with tail padding. - Field access: Insert bitcasts from the base subobject pointer to the complete object pointer after cir.get_member for potentially-overlapping fields, so downstream code sees the expected type. - Zero-sized fields: Handle truly empty [[no_unique_address]] fields by computing their address via byte offsets rather than cir.get_member, since they have no entry in the record layout. A known gap (CIR copies 8 bytes where OG copies 5 via `ConstructorMemcpyizer`) is noted for follow-up. Added: clang/test/CIR/CodeGen/no-unique-address.cpp Modified: clang/lib/CIR/CodeGen/CIRGenExpr.cpp clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp clang/test/CIR/CodeGen/assign-operator.cpp clang/test/CIR/CodeGen/dtors.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 060a90ca5fb8c..91de4581cc8a8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -15,6 +15,7 @@ #include "CIRGenFunction.h" #include "CIRGenModule.h" #include "CIRGenValue.h" +#include "TargetInfo.h" #include "mlir/Dialect/Ptr/IR/MemorySpaceInterfaces.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Value.h" @@ -35,36 +36,80 @@ using namespace clang; using namespace clang::CIRGen; using namespace cir; -/// Get the address of a zero-sized field within a record. The resulting address -/// doesn't necessarily have the right type. +/// Get the address of a zero-sized field within a record. Zero-sized fields +/// (e.g. empty bases with [[no_unique_address]]) don't appear in the CIR +/// record layout, so we compute their address using the ASTContext field +/// offset and byte-level pointer arithmetic instead of cir.get_member. +static Address emitAddrOfZeroSizeField(CIRGenFunction &cgf, Address base, + const FieldDecl *field) { + CIRGenBuilderTy &builder = cgf.getBuilder(); + CharUnits offset = cgf.getContext().toCharUnitsFromBits( + cgf.getContext().getFieldOffset(field)); + mlir::Type fieldType = cgf.convertType(field->getType()); + + if (offset.isZero()) { + return Address(builder.createPtrBitcast(base.getPointer(), fieldType), + base.getAlignment()); + } + + // Cast to byte pointer, stride by the field offset, then cast to the + // field pointer type (CIR pointers are typed, so we need explicit casts + // unlike OG's opaque-pointer GEP). + mlir::Location loc = cgf.getLoc(field->getLocation()); + mlir::Value addr = + builder.createPtrBitcast(base.getPointer(), builder.getUInt8Ty()); + addr = builder.createPtrStride(loc, addr, + builder.getUInt64(offset.getQuantity(), loc)); + addr = builder.createPtrBitcast(addr, fieldType); + return Address(addr, base.getAlignment().alignmentAtOffset(offset)); +} + Address CIRGenFunction::emitAddrOfFieldStorage(Address base, const FieldDecl *field, llvm::StringRef fieldName, unsigned fieldIndex) { - if (field->isZeroSize(getContext())) { - cgm.errorNYI(field->getSourceRange(), - "emitAddrOfFieldStorage: zero-sized field"); - return Address::invalid(); - } + if (isEmptyFieldForLayout(getContext(), field)) + return emitAddrOfZeroSizeField(*this, base, field); mlir::Location loc = getLoc(field->getLocation()); + // Retrieve layout information for both type resolution and alignment. + const RecordDecl *rec = field->getParent(); + const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rec); + unsigned idx = layout.getCIRFieldNo(field); + + // For potentially-overlapping fields (e.g. [[no_unique_address]]), the + // record stores the base subobject type (without tail padding) rather than + // the complete object type. Use the record's member type for get_member, + // then bitcast to the complete type for downstream use. + // + // For unions, all fields map to index 0, so we use the field's declared type + // directly instead of looking up the member type from the layout. mlir::Type fieldType = convertType(field->getType()); auto fieldPtr = cir::PointerType::get(fieldType); + bool needsBitcast = false; + + if (!rec->isUnion() && field->isPotentiallyOverlapping()) { + mlir::Type memberType = layout.getCIRType().getMembers()[idx]; + fieldPtr = cir::PointerType::get(memberType); + needsBitcast = true; + } + // For most cases fieldName is the same as field->getName() but for lambdas, // which do not currently carry the name, so it can be passed down from the // CaptureStmt. - cir::GetMemberOp memberAddr = builder.createGetMember( - loc, fieldPtr, base.getPointer(), fieldName, fieldIndex); + mlir::Value addr = builder.createGetMember(loc, fieldPtr, base.getPointer(), + fieldName, fieldIndex); + + // If the field is potentially overlapping, the record member uses the base + // subobject type. Cast to the complete object pointer type expected by + // callers (analogous to OG's opaque pointer behavior). + if (needsBitcast) + addr = builder.createPtrBitcast(addr, fieldType); - // Retrieve layout information, compute alignment and return the final - // address. - const RecordDecl *rec = field->getParent(); - const CIRGenRecordLayout &layout = cgm.getTypes().getCIRGenRecordLayout(rec); - unsigned idx = layout.getCIRFieldNo(field); CharUnits offset = CharUnits::fromQuantity( layout.getCIRType().getElementOffset(cgm.getDataLayout().layout, idx)); - return Address(memberAddr, base.getAlignment().alignmentAtOffset(offset)); + return Address(addr, base.getAlignment().alignmentAtOffset(offset)); } /// Given an expression of pointer type, try to @@ -477,6 +522,15 @@ LValue CIRGenFunction::emitLValueForField(LValue base, const FieldDecl *field) { if (cgm.lambdaFieldToName.count(field)) fieldName = cgm.lambdaFieldToName[field]; + // Empty fields don't have entries in the record layout, so handle them + // separately. They just use the base address directly with the right type. + if (!rec->isUnion() && isEmptyFieldForLayout(getContext(), field)) { + addr = emitAddrOfZeroSizeField(*this, addr, field); + LValue lv = makeAddrLValue(addr, fieldType, fieldBaseInfo); + lv.getQuals().addCVRQualifiers(recordCVR); + return lv; + } + if (rec->isUnion()) fieldIndex = field->getFieldIndex(); else { @@ -527,12 +581,15 @@ LValue CIRGenFunction::emitLValueForFieldInitialization( if (!fieldType->isReferenceType()) return emitLValueForField(base, field); - const CIRGenRecordLayout &layout = - cgm.getTypes().getCIRGenRecordLayout(field->getParent()); - unsigned fieldIndex = layout.getCIRFieldNo(field); - - Address v = - emitAddrOfFieldStorage(base.getAddress(), field, fieldName, fieldIndex); + Address v = base.getAddress(); + if (isEmptyFieldForLayout(getContext(), field)) { + v = emitAddrOfZeroSizeField(*this, v, field); + } else { + const CIRGenRecordLayout &layout = + cgm.getTypes().getCIRGenRecordLayout(field->getParent()); + unsigned fieldIndex = layout.getCIRFieldNo(field); + v = emitAddrOfFieldStorage(v, field, fieldName, fieldIndex); + } // Make sure that the address is pointing to the right type. mlir::Type memTy = convertTypeForMem(fieldType); diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index 29eda175c208c..7e1f9c6768e61 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -571,15 +571,21 @@ void CIRRecordLowering::accumulateFields() { field = accumulateBitFields(field, fieldEnd); assert((field == fieldEnd || !field->isBitField()) && "Failed to accumulate all the bitfields"); - } else if (!field->isZeroSize(astContext)) { - members.push_back(MemberInfo(bitsToCharUnits(getFieldBitOffset(*field)), - MemberInfo::InfoKind::Field, - getStorageType(*field), *field)); - ++field; - } else { + } else if (isEmptyFieldForLayout(astContext, *field)) { // TODO(cir): do we want to do anything special about zero size members? assert(!cir::MissingFeatures::zeroSizeRecordMembers()); ++field; + } else { + // Use base subobject layout for potentially-overlapping fields, + // as it is done in RecordLayoutBuilder. + members.push_back(MemberInfo( + bitsToCharUnits(getFieldBitOffset(*field)), + MemberInfo::InfoKind::Field, + field->isPotentiallyOverlapping() + ? getStorageType(field->getType()->getAsCXXRecordDecl()) + : getStorageType(*field), + *field)); + ++field; } } } diff --git a/clang/test/CIR/CodeGen/assign-operator.cpp b/clang/test/CIR/CodeGen/assign-operator.cpp index f416bca31387e..e63d179caf4e1 100644 --- a/clang/test/CIR/CodeGen/assign-operator.cpp +++ b/clang/test/CIR/CodeGen/assign-operator.cpp @@ -69,9 +69,9 @@ void copy_c(C &c1, C &c2) { // CIR: cir.store %arg0, %[[THIS_ADDR]] // CIR: cir.store %arg1, %[[ARG1_ADDR]] // CIR: %[[THIS:.*]] = cir.load{{.*}} %[[THIS_ADDR]] -// CIR: %[[A_MEMBER:.*]] = cir.get_member %[[THIS]][0] {name = "a"} +// CIR: %[[A_MEMBER:.*]] = cir.cast bitcast %[[THIS]] : !cir.ptr<!rec_C> -> !cir.ptr<!rec_A> // CIR: %[[ARG1_LOAD:.*]] = cir.load{{.*}} %[[ARG1_ADDR]] -// CIR: %[[A_MEMBER_2:.*]] = cir.get_member %[[ARG1_LOAD]][0] {name = "a"} +// CIR: %[[A_MEMBER_2:.*]] = cir.cast bitcast %[[ARG1_LOAD]] : !cir.ptr<!rec_C> -> !cir.ptr<!rec_A> // CIR: %[[C_A:.*]] = cir.call @_ZN1AaSERKS_(%[[A_MEMBER]], %[[A_MEMBER_2]]) // CIR: %[[B_MEMBER:.*]] = cir.get_member %[[THIS]][1] {name = "b"} // CIR: %[[B_VOID_PTR:.*]] = cir.cast bitcast %[[B_MEMBER]] : !cir.ptr<!cir.array<!rec_B x 16>> -> !cir.ptr<!void> @@ -84,6 +84,32 @@ void copy_c(C &c1, C &c2) { // CIR: %[[RET_VAL:.*]] = cir.load{{.*}} %[[RET_ADDR]] // CIR: cir.return %[[RET_VAL]] +// LLVM: define {{.*}} ptr @_ZN1CaSERKS_(ptr {{.*}} %[[THIS:.*]], ptr {{.*}} %[[ARG:.*]]) +// LLVM: %[[THIS_ADDR:.*]] = alloca ptr +// LLVM: %[[ARG_ADDR:.*]] = alloca ptr +// LLVM: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// LLVM: store ptr %[[ARG]], ptr %[[ARG_ADDR]] +// LLVM: %[[THIS_LOAD:.*]] = load ptr, ptr %[[THIS_ADDR]] +// LLVM: %[[ARG_LOAD:.*]] = load ptr, ptr %[[ARG_ADDR]] +// LLVM: %{{.*}} = call {{.*}} ptr @_ZN1AaSERKS_(ptr {{.*}} %[[THIS_LOAD]], ptr {{.*}} %[[ARG_LOAD]]) +// LLVM: %[[B1:.*]] = getelementptr %struct.C, ptr %[[THIS_LOAD]], i32 0, i32 1 +// LLVM: %[[ARG_LOAD2:.*]] = load ptr, ptr %[[ARG_ADDR]] +// LLVM: %[[B2:.*]] = getelementptr %struct.C, ptr %[[ARG_LOAD2]], i32 0, i32 1 +// LLVM: %{{.*}} = call ptr @memcpy(ptr {{.*}} %[[B1]], ptr {{.*}} %[[B2]], i64 {{.*}} 64) + +// OGCG: define {{.*}} ptr @_ZN1CaSERKS_(ptr {{.*}} %[[THIS:.*]], ptr {{.*}} %[[ARG:.*]]) +// OGCG: %[[THIS_ADDR:.*]] = alloca ptr +// OGCG: %[[ARG_ADDR:.*]] = alloca ptr +// OGCG: store ptr %[[THIS]], ptr %[[THIS_ADDR]] +// OGCG: store ptr %[[ARG]], ptr %[[ARG_ADDR]] +// OGCG: %[[THIS_LOAD:.*]] = load ptr, ptr %[[THIS_ADDR]] +// OGCG: %[[ARG_LOAD:.*]] = load ptr, ptr %[[ARG_ADDR]] +// OGCG: %{{.*}} = call {{.*}} ptr @_ZN1AaSERKS_(ptr {{.*}} %[[THIS_LOAD]], ptr {{.*}} %[[ARG_LOAD]]) +// OGCG: %[[B1:.*]] = getelementptr inbounds {{.*}} %struct.C, ptr %[[THIS_LOAD]], i32 0, i32 1 +// OGCG: %[[ARG_LOAD2:.*]] = load ptr, ptr %[[ARG_ADDR]] +// OGCG: %[[B2:.*]] = getelementptr inbounds {{.*}} %struct.C, ptr %[[ARG_LOAD2]], i32 0, i32 1 +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[B1]], ptr {{.*}} %[[B2]], i64 64, i1 false) + // CIR: cir.func{{.*}} @_Z6copy_cR1CS0_(%arg0: !cir.ptr<!rec_C> {{.*}}, %arg1: !cir.ptr<!rec_C> {{.*}}) // CIR: %[[C1_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["c1", init, const] // CIR: %[[C2_ADDR:.*]] = cir.alloca !cir.ptr<!rec_C>, !cir.ptr<!cir.ptr<!rec_C>>, ["c2", init, const] diff --git a/clang/test/CIR/CodeGen/dtors.cpp b/clang/test/CIR/CodeGen/dtors.cpp index 37cb766b1be62..fc433a1cd3fdb 100644 --- a/clang/test/CIR/CodeGen/dtors.cpp +++ b/clang/test/CIR/CodeGen/dtors.cpp @@ -264,11 +264,14 @@ struct D { }; // CIR: cir.func {{.*}} @_ZN1DD2Ev -// CIR: %[[C:.*]] = cir.get_member %{{.*}}[1] {name = "c"} +// CIR: %[[BASE:.*]] = cir.cast bitcast %{{.*}} : !cir.ptr<!rec_D> -> !cir.ptr<!u8i> +// CIR: %[[OFFSET:.*]] = cir.const #cir.int<4> : !u64i +// CIR: %[[PTR:.*]] = cir.ptr_stride %[[BASE]], %[[OFFSET]] : (!cir.ptr<!u8i>, !u64i) -> !cir.ptr<!u8i> +// CIR: %[[C:.*]] = cir.cast bitcast %[[PTR]] : !cir.ptr<!u8i> -> !cir.ptr<!rec_C> // CIR: cir.call @_ZN1CD1Ev(%[[C]]) // LLVM: define {{.*}} void @_ZN1DD2Ev -// LLVM: %[[C:.*]] = getelementptr %struct.D, ptr %{{.*}}, i32 0, i32 1 +// LLVM: %[[C:.*]] = getelementptr i8, ptr %{{.*}}, i64 4 // LLVM: call void @_ZN1CD1Ev(ptr {{.*}} %[[C]]) // This destructor is defined after the calling function in OGCG. diff --git a/clang/test/CIR/CodeGen/no-unique-address.cpp b/clang/test/CIR/CodeGen/no-unique-address.cpp new file mode 100644 index 0000000000000..4f81e194783d3 --- /dev/null +++ b/clang/test/CIR/CodeGen/no-unique-address.cpp @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \ +// RUN: -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \ +// RUN: -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \ +// RUN: -emit-llvm %s -o %t.og.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s + +struct Base { + int x; +}; + +struct Middle : Base { + char c; + // sizeof(Middle) = 8 (4 for x, 1 for c, 3 tail padding) + // data size = 5 +}; + +struct Outer { + [[no_unique_address]] Middle m; + char extra; + Outer(const Middle &m, char e) : m(m), extra(e) {} +}; + +// The record layout should use the base subobject type for the +// [[no_unique_address]] field, allowing 'extra' to overlap with +// Middle's tail padding. + +// CIR: !rec_Middle2Ebase = !cir.record<struct "Middle.base" packed {!rec_Base, !s8i}> +// CIR: !rec_Outer = !cir.record<struct "Outer" padded {!rec_Middle2Ebase, !s8i, + +// CIR-LABEL: cir.func {{.*}} @_ZN5OuterC2ERK6Middlec( +// CIR: %[[THIS:.*]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!rec_Outer>>, !cir.ptr<!rec_Outer> +// CIR: %[[M_BASE:.*]] = cir.get_member %[[THIS]][0] {name = "m"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!rec_Middle2Ebase> +// CIR-NEXT: %[[M_COMPLETE:.*]] = cir.cast bitcast %[[M_BASE]] : !cir.ptr<!rec_Middle2Ebase> -> !cir.ptr<!rec_Middle> +// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] : !cir.ptr<!rec_Middle> +// CIR: %[[EXTRA:.*]] = cir.get_member %[[THIS]][1] {name = "extra"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!s8i> + +// LLVM-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec( +// LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 0 +// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false) + +// OGCG-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec( +// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 0 +// TODO(CIR): OG emits i64 5 here via ConstructorMemcpyizer, which CIR +// doesn't have yet. CIR copies the full 8-byte type instead. +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false) + +void test(const Middle &m) { + Outer o(m, 'x'); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
