https://github.com/xlauko updated https://github.com/llvm/llvm-project/pull/186702
>From 45f1692c5096d8219c344e7b9ed7ffa71693da2a Mon Sep 17 00:00:00 2001 From: xlauko <[email protected]> Date: Sun, 5 Apr 2026 12:34:07 +0200 Subject: [PATCH] [CIR] Use data size in emitAggregateCopy for overlapping copies Add skip_tail_padding property to cir.copy to handle potentially-overlapping subobject copies directly, instead of falling back to cir.libc.memcpy. When set, the lowering uses the record's data size (excluding tail padding) for the memcpy length. This keeps typed semantics and promotability of cir.copy. Also fix CXXABILowering to preserve op properties when recreating operations, and expose RecordType::computeStructDataSize() for computing data size of padded record types. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 6 +- clang/include/clang/CIR/Dialect/IR/CIROps.td | 26 +++++-- .../include/clang/CIR/Dialect/IR/CIRTypes.td | 3 + clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp | 14 ++-- clang/lib/CIR/CodeGen/CIRGenValue.h | 2 +- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 5 ++ clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp | 3 +- clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 23 ++++++ .../CIR/Dialect/Transforms/CXXABILowering.cpp | 4 + .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 3 +- .../CIR/CodeGen/aggregate-copy-overlap.cpp | 73 +++++++++++++++++++ clang/test/CIR/CodeGen/no-unique-address.cpp | 6 +- clang/test/CIR/IR/copy.cir | 6 ++ clang/test/CIR/IR/invalid-copy.cir | 11 +++ 14 files changed, 165 insertions(+), 20 deletions(-) create mode 100644 clang/test/CIR/CodeGen/aggregate-copy-overlap.cpp diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index a6ddfd5ca4d4f..a370622ec0022 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -366,8 +366,10 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { /// Create a copy with inferred length. cir::CopyOp createCopy(mlir::Value dst, mlir::Value src, - bool isVolatile = false) { - return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile); + bool isVolatile = false, + bool skipTailPadding = false) { + return cir::CopyOp::create(*this, dst.getLoc(), dst, src, isVolatile, + skipTailPadding); } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index f72d891ecd941..1e189d89eb8fb 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4149,21 +4149,32 @@ def CIR_CopyOp : CIR_Op<"copy",[ The `volatile` keyword indicates that the operation is volatile. + The `skip_tail_padding` keyword indicates that only the data bytes should + be copied, excluding any tail padding. This is used when copying + potentially-overlapping subobjects where the tail padding might be occupied + by other objects (e.g. fields marked with `[[no_unique_address]]`). This + is only valid when the pointee type is a record type. + Examples: ```mlir // Copying contents from one record to another: cir.copy %0 to %1 : !cir.ptr<!record_ty> + + // Copying without tail padding (for overlapping subobjects): + cir.copy %0 to %1 skip_tail_padding : !cir.ptr<!record_ty> ``` }]; let arguments = (ins Arg<CIR_PointerType, "", [MemWrite]>:$dst, Arg<CIR_PointerType, "", [MemRead]>:$src, - UnitAttr:$is_volatile + UnitAttr:$is_volatile, + UnitProp:$skip_tail_padding ); let assemblyFormat = [{$src `to` $dst (`volatile` $is_volatile^)? + (`skip_tail_padding` $skip_tail_padding^)? attr-dict `:` qualified(type($dst)) }]; let hasVerifier = 1; @@ -4172,10 +4183,15 @@ def CIR_CopyOp : CIR_Op<"copy",[ /// Returns the pointer type being copied. cir::PointerType getType() { return getSrc().getType(); } - /// Returns the number of bytes to be copied. - unsigned getLength(const mlir::DataLayout &dt) { - return dt.getTypeSize(getType().getPointee()); - } + /// Returns the number of bytes to be copied. When skip_tail_padding is + /// set, returns the data size (excluding tail padding) of the pointee + /// record type. Otherwise returns the full type size. + unsigned getCopySizeInBytes(const mlir::DataLayout &dl) { + if (getSkipTailPadding()) + return mlir::cast<cir::RecordType>(getType().getPointee()) + .computeStructDataSize(dl); + return dl.getTypeSize(getType().getPointee()); + } }]; } diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index f3ee572251bbf..5433d4fa7fd76 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -766,6 +766,9 @@ def CIR_RecordType : CIR_Type<"Record", "record", [ return isComplete(); } + /// Returns the data size (excluding tail padding) for struct types. + unsigned computeStructDataSize(const mlir::DataLayout &dataLayout) const; + private: static constexpr char abi_conversion_prefix[] = "__post_abi_"; diff --git a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp index 8e63a64b3e664..dc6540bd9f334 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprAggregate.cpp @@ -1244,16 +1244,18 @@ void CIRGenFunction::emitAggregateCopy(LValue dest, LValue src, QualType ty, assert(!cir::MissingFeatures::aggValueSlotVolatile()); - // NOTE(cir): original codegen would normally convert destPtr and srcPtr to - // i8* since memcpy operates on bytes. We don't need that in CIR because - // cir.copy will operate on any CIR pointer that points to a sized type. - // Don't do any of the memmove_collectable tests if GC isn't set. if (cgm.getLangOpts().getGC() != LangOptions::NonGC) cgm.errorNYI("emitAggregateCopy: GC"); - [[maybe_unused]] cir::CopyOp copyOp = - builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile); + // If the data size (excluding tail padding) differs from the full type size, + // use skip_tail_padding to avoid clobbering tail padding that may be occupied + // by other objects (e.g. fields marked with [[no_unique_address]]). + CharUnits dataSize = typeInfo.Width; + bool skipTailPadding = + mayOverlap && dataSize != getContext().getTypeSizeInChars(ty); + builder.createCopy(destPtr.getPointer(), srcPtr.getPointer(), isVolatile, + skipTailPadding); assert(!cir::MissingFeatures::opTBAA()); } diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index e5b925f82a635..e1bbc54cfb9b1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -377,7 +377,7 @@ class AggValueSlot { enum IsDestructed_t { IsNotDestructed, IsDestructed }; enum IsZeroed_t { IsNotZeroed, IsZeroed }; enum IsAliased_t { IsNotAliased, IsAliased }; - enum Overlap_t { MayOverlap, DoesNotOverlap }; + enum Overlap_t { DoesNotOverlap, MayOverlap }; /// Returns an aggregate value slot indicating that the aggregate /// value is being ignored. diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 8ccc83a25537b..a0b11f7f46b97 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -2856,6 +2856,11 @@ LogicalResult cir::CopyOp::verify() { if (getSrc() == getDst()) return emitError() << "source and destination are the same"; + if (getSkipTailPadding() && + !mlir::isa<cir::RecordType>(getType().getPointee())) + return emitError() + << "skip_tail_padding is only valid for record pointee types"; + return mlir::success(); } diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp index f79a52e2fb9b3..8a82bcb19454e 100644 --- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -155,7 +155,8 @@ bool cir::CopyOp::canUsesBeRemoved( if (getDst() == getSrc()) return false; - return getLength(dataLayout) == dataLayout.getTypeSize(slot.elemType); + return getCopySizeInBytes(dataLayout) == + dataLayout.getTypeSize(slot.elemType); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index bbdd9ea2731f7..b637484ac93d1 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -427,6 +427,29 @@ RecordType::computeStructSize(const mlir::DataLayout &dataLayout) const { return recordSize; } +unsigned +RecordType::computeStructDataSize(const mlir::DataLayout &dataLayout) const { + assert(isComplete() && "Cannot get layout of incomplete records"); + + // Compute the data size (excluding tail padding) for this record type. For + // padded records, the last member is the tail padding array added by + // CIRGenRecordLayoutBuilder::appendPaddingBytes, so we exclude it. For + // non-padded records, data size equals the full struct size without + // alignment. + auto members = getMembers(); + unsigned numMembers = + getPadded() && members.size() > 1 ? members.size() - 1 : members.size(); + unsigned recordSize = 0; + for (unsigned i = 0; i < numMembers; ++i) { + mlir::Type ty = members[i]; + const uint64_t tyAlign = + (getPacked() ? 1 : dataLayout.getTypeABIAlignment(ty)); + recordSize = llvm::alignTo(recordSize, tyAlign); + recordSize += dataLayout.getTypeSize(ty); + } + return recordSize; +} + // We also compute the alignment as part of computeStructSize, but this is more // efficient. Ideally, we'd like to compute both at once and cache the result, // but that's implemented yet. diff --git a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp index 968b0d7c56164..5d8192dd79bd4 100644 --- a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp @@ -294,6 +294,10 @@ class CIRGenericCXXABILoweringPattern : public mlir::ConversionPattern { loweredOpState.addOperands(operands); loweredOpState.addSuccessors(op->getSuccessors()); + // Copy properties from the original operation. + if (op->getPropertiesStorageSize()) + loweredOpState.propertiesAttr = op->getPropertiesAsAttribute(); + // Lower all attributes. llvm::SmallVector<mlir::NamedAttribute> attrs; for (const mlir::NamedAttribute &na : op->getAttrs()) diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 119269447ec37..1efaf4190b8da 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -188,7 +188,8 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( mlir::ConversionPatternRewriter &rewriter) const { mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>()); const mlir::Value length = mlir::LLVM::ConstantOp::create( - rewriter, op.getLoc(), rewriter.getI64Type(), op.getLength(layout)); + rewriter, op.getLoc(), rewriter.getI64Type(), + op.getCopySizeInBytes(layout)); assert(!cir::MissingFeatures::aggValueSlotVolatile()); rewriter.replaceOpWithNewOp<mlir::LLVM::MemcpyOp>( op, adaptor.getDst(), adaptor.getSrc(), length, op.getIsVolatile()); diff --git a/clang/test/CIR/CodeGen/aggregate-copy-overlap.cpp b/clang/test/CIR/CodeGen/aggregate-copy-overlap.cpp new file mode 100644 index 0000000000000..0e323546b4431 --- /dev/null +++ b/clang/test/CIR/CodeGen/aggregate-copy-overlap.cpp @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \ +// RUN: -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \ +// RUN: -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu \ +// RUN: -emit-llvm %s -o %t.og.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.og.ll %s + +// Test that emitAggregateCopy uses the data size (excluding tail padding) +// when copying potentially-overlapping subobjects, and uses full type size +// otherwise. + +struct Base { int x; }; + +struct HasPadding : Base { + char c; + // sizeof(HasPadding) = 8 (4 for x, 1 for c, 3 tail padding) + // data size = 5 +}; + +struct VBase { int v; }; + +// Outer has a virtual base, so its nvsize (14) is smaller than its full +// sizeof (24). Because [[no_unique_address]] HasPadding extends beyond +// nvsize (offset 8 + sizeof 8 = 16 > 14), getOverlapForFieldInit returns +// MayOverlap, and emitAggregateCopy must use the data size (5) instead of +// the full sizeof (8). +struct Outer : virtual VBase { + [[no_unique_address]] HasPadding hp; + char extra; + Outer(const HasPadding &hp, char e) : hp(hp), extra(e) {} +}; + +// With virtual bases, only the C1 (complete) constructor is emitted. +// CIR-LABEL: cir.func {{.*}} @_ZN5OuterC1ERK10HasPaddingc( +// CIR: cir.copy %{{.+}} to %{{.+}} skip_tail_padding : !cir.ptr<!rec_HasPadding> + +// LLVM-LABEL: define {{.*}} void @_ZN5OuterC1ERK10HasPaddingc( +// LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 1 +// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 5, i1 false) + +// OGCG-LABEL: define {{.*}} void @_ZN5OuterC1ERK10HasPaddingc( +// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 1 +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false) + +void test_overlap(const HasPadding &hp) { + Outer o(hp, 'x'); +} + +// NonOverlapping does NOT have [[no_unique_address]], so the copy uses +// cir.copy (full type size) rather than cir.libc.memcpy. +struct NonOverlapping { + HasPadding hp; + char extra; + NonOverlapping(const HasPadding &hp, char e) : hp(hp), extra(e) {} +}; + +// CIR-LABEL: cir.func {{.*}} @_ZN14NonOverlappingC2ERK10HasPaddingc( +// CIR: cir.copy %{{.+}} to %{{.+}} : !cir.ptr<!rec_HasPadding> + +// LLVM-LABEL: define {{.*}} void @_ZN14NonOverlappingC2ERK10HasPaddingc( +// LLVM: %[[GEP:.*]] = getelementptr %struct.NonOverlapping, ptr %{{.+}}, i32 0, i32 0 +// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false) + +// OGCG-LABEL: define {{.*}} void @_ZN14NonOverlappingC2ERK10HasPaddingc( +// OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.NonOverlapping, ptr %{{.+}}, i32 0, i32 0 +// OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 8, i1 false) + +void test_no_overlap(const HasPadding &hp) { + NonOverlapping o(hp, 'x'); +} diff --git a/clang/test/CIR/CodeGen/no-unique-address.cpp b/clang/test/CIR/CodeGen/no-unique-address.cpp index 4f81e194783d3..fb9386bce56ef 100644 --- a/clang/test/CIR/CodeGen/no-unique-address.cpp +++ b/clang/test/CIR/CodeGen/no-unique-address.cpp @@ -35,17 +35,15 @@ struct Outer { // CIR: %[[THIS:.*]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!rec_Outer>>, !cir.ptr<!rec_Outer> // CIR: %[[M_BASE:.*]] = cir.get_member %[[THIS]][0] {name = "m"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!rec_Middle2Ebase> // CIR-NEXT: %[[M_COMPLETE:.*]] = cir.cast bitcast %[[M_BASE]] : !cir.ptr<!rec_Middle2Ebase> -> !cir.ptr<!rec_Middle> -// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] : !cir.ptr<!rec_Middle> +// CIR: cir.copy %{{.+}} to %[[M_COMPLETE]] skip_tail_padding : !cir.ptr<!rec_Middle> // CIR: %[[EXTRA:.*]] = cir.get_member %[[THIS]][1] {name = "extra"} : !cir.ptr<!rec_Outer> -> !cir.ptr<!s8i> // LLVM-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec( // LLVM: %[[GEP:.*]] = getelementptr %struct.Outer, ptr %{{.+}}, i32 0, i32 0 -// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 8, i1 false) +// LLVM: call void @llvm.memcpy.p0.p0.i64(ptr %[[GEP]], ptr %{{.+}}, i64 5, i1 false) // OGCG-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec( // OGCG: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 0 -// TODO(CIR): OG emits i64 5 here via ConstructorMemcpyizer, which CIR -// doesn't have yet. CIR copies the full 8-byte type instead. // OGCG: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}} %[[GEP]], ptr {{.*}} %{{.+}}, i64 5, i1 false) void test(const Middle &m) { diff --git a/clang/test/CIR/IR/copy.cir b/clang/test/CIR/IR/copy.cir index f9db29aa0e01f..1d6c8161943c6 100644 --- a/clang/test/CIR/IR/copy.cir +++ b/clang/test/CIR/IR/copy.cir @@ -1,10 +1,16 @@ // RUN: cir-opt %s --verify-roundtrip | FileCheck %s !s32i = !cir.int<s, 32> +!rec = !cir.record<struct "S" {!s32i, !cir.int<s, 8>}> module { cir.func @shouldParseCopyOp(%arg0 : !cir.ptr<!s32i>, %arg1 : !cir.ptr<!s32i>) { cir.copy %arg0 to %arg1 : !cir.ptr<!s32i> // CHECK: cir.copy %arg0 to %arg1 : !cir.ptr<!s32i> cir.return } + cir.func @shouldParseCopyOpSkipTailPadding(%arg0 : !cir.ptr<!rec>, %arg1 : !cir.ptr<!rec>) { + cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!rec> + // CHECK: cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!rec_S> + cir.return + } } diff --git a/clang/test/CIR/IR/invalid-copy.cir b/clang/test/CIR/IR/invalid-copy.cir index 5801b7401acd7..14b7ebccee2ed 100644 --- a/clang/test/CIR/IR/invalid-copy.cir +++ b/clang/test/CIR/IR/invalid-copy.cir @@ -19,3 +19,14 @@ module { cir.return } } + +// ----- + +module { + // skip_tail_padding is only valid for record pointee types. + cir.func @invalid_skip_tail_padding(%arg0 : !cir.ptr<!cir.int<s, 32>>, %arg1 : !cir.ptr<!cir.int<s, 32>>) { + // expected-error@+1 {{skip_tail_padding is only valid for record pointee types}} + cir.copy %arg0 to %arg1 skip_tail_padding : !cir.ptr<!cir.int<s, 32>> + cir.return + } +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
