https://github.com/kumarak updated https://github.com/llvm/llvm-project/pull/204360
>From 81e726b77b53e89ff209a70deccb24a32dff5b57 Mon Sep 17 00:00:00 2001 From: AkshayK <[email protected]> Date: Wed, 17 Jun 2026 10:16:06 -0400 Subject: [PATCH 1/3] [CIR] Use the AST result type for sizeof/alignof constants Port of llvm/llvm-project#203942. Form the IntAttr for sizeof / __alignof / __builtin_vectorelements from size_t (cgm.sizeTy) instead of a hardcoded 64-bit type, so the constant width matches the APInt returned by EvaluateKnownConstInt on targets where size_t is narrower than 64 bits. --- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 8 ++-- .../unary-expr-or-type-trait-32bit.cpp | 38 +++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index c7e19c38dbba1..5e8bb9df83ab3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -2773,16 +2773,18 @@ mlir::Value ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( e->getSourceRange(), "VisitUnaryExprOrTypeTraitExpr: sizeOf scalable vector"); return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, + loc, cir::IntAttr::get(cgf.cgm.sizeTy, e->EvaluateKnownConstInt(cgf.getContext()))); } return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, vecTy.getSize())); + loc, cir::IntAttr::get(cgf.cgm.sizeTy, vecTy.getSize())); } + // The result type is size_t (target-dependent width); use it so the IntAttr + // width matches the APInt from EvaluateKnownConstInt. return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, + loc, cir::IntAttr::get(cgf.cgm.sizeTy, e->EvaluateKnownConstInt(cgf.getContext()))); } diff --git a/clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp b/clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp new file mode 100644 index 0000000000000..e24278c09e622 --- /dev/null +++ b/clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -std=c++20 -triple i686-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple i686-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple i686-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM + +// The result of sizeof/alignof/__builtin_vectorelements is size_t, which is +// 32 bits wide on this target. The emitted constants must use that width +// rather than a hardcoded 64-bit type. + +using size_t = decltype(sizeof(int)); + +size_t size_of_int() { return sizeof(int); } + +// CIR-LABEL: cir.func {{.*}}@_Z11size_of_intv() -> {{.*}}!u32i +// CIR: cir.const #cir.int<4> : !u32i + +// LLVM-LABEL: define {{.*}}i32 @_Z11size_of_intv() +// LLVM: {{.*}}i32 4 + +size_t align_of_double() { return alignof(double); } + +// CIR-LABEL: cir.func {{.*}}@_Z15align_of_doublev() -> {{.*}}!u32i +// CIR: cir.const #cir.int<4> : !u32i + +// LLVM-LABEL: define {{.*}}i32 @_Z15align_of_doublev() +// LLVM: {{.*}}i32 4 + +typedef int vi4 __attribute__((vector_size(16))); + +size_t vector_elements(vi4 v) { return __builtin_vectorelements(v); } + +// CIR-LABEL: cir.func {{.*}}@_Z15vector_elementsDv4_i({{.*}}) -> {{.*}}!u32i +// CIR: cir.const #cir.int<4> : !u32i + +// LLVM-LABEL: define {{.*}}i32 @_Z15vector_elementsDv4_i +// LLVM: {{.*}}i32 4 >From 5b7cee08a0d669fe8e894e3226f24ddb00f1743d Mon Sep 17 00:00:00 2001 From: AkshayK <[email protected]> Date: Wed, 17 Jun 2026 10:16:06 -0400 Subject: [PATCH 2/3] [CIR] Pointer and vptr width from a CIR-native data-layout entry Port of llvm/llvm-project#204185. Attach a cir.ptr data-layout entry at module setup storing {size, abi-align} read from the target DataLayout; PointerType and VPtrType read their width from it (falling back to 64/8 when absent), and the CIR->LLVM lowering strips the entry. Fixes record-layout crashes on targets with 32-bit pointers (e.g. nvptx, spirv32). --- clang/lib/CIR/CodeGen/CIRGenerator.cpp | 25 ++++++++- clang/lib/CIR/Dialect/IR/CIRTypes.cpp | 56 +++++++++++++++++-- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 17 ++++++ .../test/CIR/CodeGen/pointer-width-32bit.cpp | 44 +++++++++++++++ 4 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 clang/test/CIR/CodeGen/pointer-width-32bit.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp b/clang/lib/CIR/CodeGen/CIRGenerator.cpp index d4fcbb6e42f3e..61efaebad9b82 100644 --- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp @@ -21,6 +21,7 @@ #include "clang/AST/DeclGroup.h" #include "clang/CIR/CIRGenerator.h" #include "clang/CIR/InitAllDialects.h" +#include "clang/CIR/MissingFeatures.h" #include "llvm/IR/DataLayout.h" using namespace cir; @@ -42,7 +43,29 @@ static void setMLIRDataLayout(mlir::ModuleOp &mod, const llvm::DataLayout &dl) { mlir::MLIRContext *mlirContext = mod.getContext(); mlir::DataLayoutSpecInterface dlSpec = mlir::translateDataLayout(dl, mlirContext); - mod->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec); + + // Add a CIR-native pointer data-layout entry so cir.ptr / cir.vptr size and + // alignment are driven by the data layout rather than hardcoded. + // The value stores {size-in-bits, abi-align-in-bits} keyed on cir.ptr. + // + // TODO(cir): Only the default address space is recorded and + // address-space-dependent pointer sizes are not modeled yet. Emit + // per-address-space entries. + assert(!cir::MissingFeatures::dataLayoutPtrHandlingBasedOnLangAS()); + constexpr unsigned kBitsInByte = 8; + unsigned ptrSizeBits = dl.getPointerSizeInBits(/*AS=*/0); + unsigned ptrAlignBits = + dl.getPointerABIAlignment(/*AS=*/0).value() * kBitsInByte; + auto ptrKey = cir::PointerType::get(cir::VoidType::get(mlirContext)); + auto ptrVal = mlir::DenseI32ArrayAttr::get( + mlirContext, + {static_cast<int32_t>(ptrSizeBits), static_cast<int32_t>(ptrAlignBits)}); + llvm::SmallVector<mlir::DataLayoutEntryInterface> entries( + dlSpec.getEntries().begin(), dlSpec.getEntries().end()); + entries.push_back(mlir::DataLayoutEntryAttr::get(ptrKey, ptrVal)); + + mod->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, + mlir::DataLayoutSpecAttr::get(mlirContext, entries)); } void CIRGenerator::Initialize(ASTContext &astContext) { diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 9c2a40e3681aa..04da6c85bbbb1 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -580,20 +580,62 @@ void RecordType::removeABIConversionNamePrefix() { // Data Layout information for types //===----------------------------------------------------------------------===// +// A CIR-native pointer data-layout entry stores {size-in-bits, +// abi-align-in-bits} as a dense i32 array keyed on a cir.ptr type (see +// setMLIRDataLayout in CIRGenerator). +namespace { +constexpr static uint64_t kBitsInByte = 8; + +// Defaults used only when the module carries no cir.ptr data-layout entry +// (e.g. CIR parsed from text without a data layout). These mirror the MLIR LLVM +// dialect's pointer defaults. +constexpr static uint64_t kDefaultPointerSizeBits = 64; +constexpr static uint64_t kDefaultPointerAlignment = 8; + +enum class CIRPtrDLPos { Size = 0, AbiAlign = 1 }; + +// Returns the requested field of the cir.ptr data-layout entry. +std::optional<uint64_t> getPointerSpecValue(mlir::DataLayoutEntryListRef params, + CIRPtrDLPos pos) { + for (mlir::DataLayoutEntryInterface entry : params) { + if (!entry.isTypeEntry()) + continue; + auto spec = mlir::dyn_cast<mlir::DenseI32ArrayAttr>(entry.getValue()); + assert(spec && spec.size() == 2 && + "malformed cir.ptr data layout entry: expected a pair of i32 " + "{size-in-bits, abi-align-in-bits}"); + return static_cast<uint64_t>(spec[static_cast<int>(pos)]); + } + return std::nullopt; +} +} // namespace + llvm::TypeSize PointerType::getTypeSizeInBits(const ::mlir::DataLayout &dataLayout, ::mlir::DataLayoutEntryListRef params) const { + // The pointer width comes from the CIR-native data-layout entry keyed on + // cir.ptr, which records the width for the default address space; fall back + // to 64 bits if the module carries no such entry. // FIXME: improve this in face of address spaces assert(!cir::MissingFeatures::dataLayoutPtrHandlingBasedOnLangAS()); - return llvm::TypeSize::getFixed(64); + if (std::optional<uint64_t> size = + getPointerSpecValue(params, CIRPtrDLPos::Size)) + return llvm::TypeSize::getFixed(*size); + return llvm::TypeSize::getFixed(kDefaultPointerSizeBits); } uint64_t PointerType::getABIAlignment(const ::mlir::DataLayout &dataLayout, ::mlir::DataLayoutEntryListRef params) const { + // As with the size, the alignment is taken from the default-address-space + // cir.ptr data-layout entry. Address-space-dependent alignments are not yet + // modeled. // FIXME: improve this in face of address spaces assert(!cir::MissingFeatures::dataLayoutPtrHandlingBasedOnLangAS()); - return 8; + if (std::optional<uint64_t> align = + getPointerSpecValue(params, CIRPtrDLPos::AbiAlign)) + return *align / kBitsInByte; + return kDefaultPointerAlignment; } llvm::TypeSize @@ -1112,14 +1154,16 @@ DataMemberType::getABIAlignment(const ::mlir::DataLayout &dataLayout, llvm::TypeSize VPtrType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { - // FIXME: consider size differences under different ABIs - return llvm::TypeSize::getFixed(64); + // The vtable pointer is an ordinary data pointer; route the query through a + // cir.ptr so it picks up the same data-layout-driven width. + return dataLayout.getTypeSizeInBits( + cir::PointerType::get(cir::VoidType::get(getContext()))); } uint64_t VPtrType::getABIAlignment(const mlir::DataLayout &dataLayout, mlir::DataLayoutEntryListRef params) const { - // FIXME: consider alignment differences under different ABIs - return 8; + return dataLayout.getTypeABIAlignment( + cir::PointerType::get(cir::VoidType::get(getContext()))); } //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index c844375a000e0..1cc7f986de1c9 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -3703,6 +3703,23 @@ void ConvertCIRToLLVMPass::runOnOperation() { if (failed(applyPartialConversion(ops, target, std::move(patterns)))) signalPassFailure(); + // The CIR-native pointer data-layout entry (keyed on cir.ptr) drives pointer + // widths during CIR codegen and lowering, but cir.ptr has no meaning once the + // module is translated to LLVM IR. Drop it so the resulting data layout only + // references LLVM types. + if (auto dlSpec = mlir::dyn_cast_or_null<mlir::DataLayoutSpecAttr>( + module->getAttr(mlir::DLTIDialect::kDataLayoutAttrName))) { + llvm::SmallVector<mlir::DataLayoutEntryInterface> kept; + for (mlir::DataLayoutEntryInterface entry : dlSpec.getEntries()) { + if (entry.isTypeEntry() && + mlir::isa<cir::PointerType>(mlir::cast<mlir::Type>(entry.getKey()))) + continue; + kept.push_back(entry); + } + module->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, + mlir::DataLayoutSpecAttr::get(module.getContext(), kept)); + } + // Emit the llvm.global_ctors array. buildCtorDtorList(module, cir::CIRDialect::getGlobalCtorsAttrName(), "llvm.global_ctors", [](mlir::Attribute attr) { diff --git a/clang/test/CIR/CodeGen/pointer-width-32bit.cpp b/clang/test/CIR/CodeGen/pointer-width-32bit.cpp new file mode 100644 index 0000000000000..15f3ec92d7e16 --- /dev/null +++ b/clang/test/CIR/CodeGen/pointer-width-32bit.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -std=c++20 -triple nvptx-nvidia-cuda -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -std=c++20 -triple nvptx-nvidia-cuda -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -std=c++20 -triple nvptx-nvidia-cuda -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// On a target with 32-bit pointers (e.g. nvptx) both a data pointer (!cir.ptr) +// and the vtable pointer (!cir.vptr) are 4 bytes wide. The pointer width is +// carried by a CIR-native data-layout entry keyed on cir.ptr, so the field +// following a pointer lands at the AST-mandated offset. Sizing pointers as a +// hardcoded 64 bits previously tripped the record layout builder (insertPadding: +// assertion `offset >= size`) on every record containing a pointer. + +struct S { + int *p; + int x; +}; + +S s; + +class A { +public: + virtual void f(); + int x; +}; + +void A::f() {} + +// The module carries a CIR-native pointer data-layout entry ({size, abi-align} +// in bits) that drives both cir.ptr and cir.vptr widths. The 4-byte pointer is +// immediately followed by 'x' at offset 4 with no padding, and each record is +// 4-byte aligned. +// CIR-DAG: !rec_S = !cir.struct<"S" {!cir.ptr<!s32i>, !s32i}> +// CIR-DAG: !rec_A = !cir.struct<class "A" {!cir.vptr, !s32i}> +// CIR-DAG: !cir.ptr<!cir.void> = array<i32: 32, 32> +// CIR: cir.global external @s = #cir.zero : !rec_S {alignment = 4 : i64} +// CIR: cir.global{{.*}}@_ZTV1A = #cir.vtable<{{.*}}{alignment = 4 : i64} + +// LLVM: @s = global %struct.S zeroinitializer, align 4 +// LLVM: @_ZTV1A = global { [3 x ptr] } {{.*}}, align 4 + +// OGCG: @s = global %struct.S zeroinitializer, align 4 +// OGCG: @_ZTV1A = {{.*}}constant { [3 x ptr] } {{.*}}, align 4 >From c7f7bec0222c869218cb61ddb4d107582169e5a1 Mon Sep 17 00:00:00 2001 From: AkshayK <[email protected]> Date: Wed, 17 Jun 2026 10:21:59 -0400 Subject: [PATCH 3/3] [CIR] Add 32-bit ARM (GenericARM) codegen and lowering support - Handle GenericARM in the transform-pass CXXABI dispatch, selecting the ARM method-pointer ABI and matching non-virtual member-pointer layout. - Use the two-word array cookie for new[]/delete[] on 32-bit ARM. - Return 'this' from constructors and non-deleting destructors, and add the 'returned' attribute on the 'this' argument, mirroring classic CodeGen. - Fix a verification crash when a static-storage object has a non-trivial destructor by building the destructor call against the structor's real ('this'-returning) signature. - Drive the size_t width of __cxa_allocate_exception and the cir.copy memcpy length from the data layout instead of hardcoding i64. - Implement the NEON vget_lane/vgetq_lane intrinsics as a vector element extraction; other ARM builtins still report NYI. Adds CIR/CodeGen tests for the array cookie, this-return, global destructor, exception allocation size, aggregate copy size, NEON lane reads and 32-bit ARM record layout. --- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 37 ++++++++++++- clang/lib/CIR/CodeGen/CIRGenCXX.cpp | 9 ++- clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp | 4 ++ clang/lib/CIR/CodeGen/CIRGenCXXABI.h | 4 ++ clang/lib/CIR/CodeGen/CIRGenCall.cpp | 10 +--- clang/lib/CIR/CodeGen/CIRGenFunction.cpp | 8 ++- clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 ++ clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp | 55 ++++++++++++++++--- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 12 ++++ .../CIR/Dialect/Transforms/CXXABILowering.cpp | 11 +++- .../TargetLowering/LowerItaniumCXXABI.cpp | 28 ++++++++-- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 26 ++++++--- .../CIR/CodeGen/arm-aggregate-copy-size.cpp | 16 ++++++ clang/test/CIR/CodeGen/arm-array-cookie.cpp | 32 +++++++++++ clang/test/CIR/CodeGen/arm-global-dtor.cpp | 29 ++++++++++ clang/test/CIR/CodeGen/arm-neon-vget-lane.c | 20 +++++++ clang/test/CIR/CodeGen/arm-record-layout.cpp | 39 +++++++++++++ clang/test/CIR/CodeGen/arm-this-return.cpp | 28 ++++++++++ .../test/CIR/CodeGen/arm-throw-alloc-size.cpp | 16 ++++++ 19 files changed, 354 insertions(+), 34 deletions(-) create mode 100644 clang/test/CIR/CodeGen/arm-aggregate-copy-size.cpp create mode 100644 clang/test/CIR/CodeGen/arm-array-cookie.cpp create mode 100644 clang/test/CIR/CodeGen/arm-global-dtor.cpp create mode 100644 clang/test/CIR/CodeGen/arm-neon-vget-lane.c create mode 100644 clang/test/CIR/CodeGen/arm-record-layout.cpp create mode 100644 clang/test/CIR/CodeGen/arm-this-return.cpp create mode 100644 clang/test/CIR/CodeGen/arm-throw-alloc-size.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index a483eb635f0e2..8a9c0c7edd088 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -24,6 +24,7 @@ #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/OperatorKinds.h" +#include "clang/Basic/TargetBuiltins.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "clang/CIR/MissingFeatures.h" #include "llvm/IR/Intrinsics.h" @@ -2647,6 +2648,38 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, return getUndefRValue(e->getType()); } +std::optional<mlir::Value> +CIRGenFunction::emitARMBuiltinExpr(unsigned builtinID, const CallExpr *expr, + ReturnValueSlot returnValue, + llvm::Triple::ArchType arch) { + // Only the NEON lane-read intrinsics are implemented for 32-bit ARM so far; + // they lower to a vector element extraction, matching classic CodeGen + // (clang/lib/CodeGen/TargetBuiltins/ARM.cpp) and the AArch64 path. + switch (builtinID) { + default: + return std::nullopt; + case NEON::BI__builtin_neon_vget_lane_i8: + case NEON::BI__builtin_neon_vget_lane_i16: + case NEON::BI__builtin_neon_vget_lane_i32: + case NEON::BI__builtin_neon_vget_lane_i64: + case NEON::BI__builtin_neon_vget_lane_bf16: + case NEON::BI__builtin_neon_vget_lane_f32: + case NEON::BI__builtin_neon_vgetq_lane_i8: + case NEON::BI__builtin_neon_vgetq_lane_i16: + case NEON::BI__builtin_neon_vgetq_lane_i32: + case NEON::BI__builtin_neon_vgetq_lane_i64: + case NEON::BI__builtin_neon_vgetq_lane_bf16: + case NEON::BI__builtin_neon_vgetq_lane_f32: + case NEON::BI__builtin_neon_vduph_lane_bf16: + case NEON::BI__builtin_neon_vduph_laneq_bf16: { + mlir::Location loc = getLoc(expr->getExprLoc()); + mlir::Value vec = emitScalarExpr(expr->getArg(0)); + mlir::Value index = emitScalarExpr(expr->getArg(1)); + return cir::VecExtractOp::create(builder, loc, vec, index); + } + } +} + static std::optional<mlir::Value> emitTargetArchBuiltinExpr(CIRGenFunction *cgf, unsigned builtinID, const CallExpr *e, ReturnValueSlot &returnValue, @@ -2666,9 +2699,7 @@ emitTargetArchBuiltinExpr(CIRGenFunction *cgf, unsigned builtinID, case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: - // These are actually NYI, but that will be reported by emitBuiltinExpr. - // At this point, we don't even know that the builtin is target-specific. - return std::nullopt; + return cgf->emitARMBuiltinExpr(builtinID, e, returnValue, arch); case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: diff --git a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp index 9c008acbc6f68..54659c03da092 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCXX.cpp @@ -176,8 +176,13 @@ static void emitDeclDestroy(CIRGenFunction &cgf, const VarDecl *vd, mlir::cast<cir::PointerType>(thisAddr.getType()).getAddrSpace()); if (realPtrTy != thisAddr.getType()) thisAddr = builder.createBitcast(thisAddr.getLoc(), thisAddr, realPtrTy); - builder.createCallOp(cgf.getLoc(vd->getSourceRange()), - mlir::FlatSymbolRefAttr::get(fnOp.getSymNameAttr()), + // Build the call against the destructor's actual signature rather than + // forcing a void result. Under the ARM C++ ABI structors return `this`, + // so a void-typed call would have fewer results than the callee and fail + // verification. The FuncOp overload derives the result type from the + // destructor itself; the returned `this` is unused here and is discarded + // when LoweringPrepare registers the destructor with __cxa_atexit. + builder.createCallOp(cgf.getLoc(vd->getSourceRange()), fnOp, mlir::ValueRange{thisAddr}); assert(fnOp && "expected cir.func"); // TODO(cir): This doesn't do anything but check for unhandled conditions. diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp index 83062c3906edf..ad493668ab06d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp @@ -80,6 +80,10 @@ void CIRGenCXXABI::setCXXABIThisValue(CIRGenFunction &cgf, cgf.cxxabiThisValue = thisPtr; } +mlir::Value CIRGenCXXABI::getThisValue(CIRGenFunction &cgf) { + return cgf.cxxabiThisValue; +} + CharUnits CIRGenCXXABI::getArrayCookieSize(const CXXNewExpr *e) { if (!requiresArrayCookie(e)) return CharUnits::Zero(); diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h index bb100f7afd929..c36fc4517f961 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h @@ -166,6 +166,10 @@ class CIRGenCXXABI { /// Loads the incoming C++ this pointer as it was passed by the caller. mlir::Value loadIncomingCXXThis(CIRGenFunction &cgf); + /// Returns the C++ this pointer as set by setCXXABIThisValue, before any + /// adjustment. + mlir::Value getThisValue(CIRGenFunction &cgf); + virtual CatchTypeInfo getAddrOfCXXCatchHandlerType(mlir::Location loc, QualType ty, QualType catchHandlerType) = 0; diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index f648eff375a77..83665b5ec3947 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -839,9 +839,6 @@ CIRGenTypes::arrangeCXXStructorDeclaration(GlobalDecl gd) { ? astContext.VoidPtrTy : astContext.VoidTy; - assert(!theCXXABI.hasThisReturn(gd) && - "Please send PR with a test and remove this"); - assert(!cir::MissingFeatures::opCallCIRGenFuncInfoExtParamInfo()); assert(!cir::MissingFeatures::opCallFnInfoOpts()); @@ -973,13 +970,12 @@ const CIRGenFunctionInfo &CIRGenTypes::arrangeCXXConstructorCall( : RequiredArgs::All; GlobalDecl gd(d, ctorKind); - if (theCXXABI.hasThisReturn(gd)) - cgm.errorNYI(d->getSourceRange(), - "arrangeCXXConstructorCall: hasThisReturn"); if (theCXXABI.hasMostDerivedReturn(gd)) cgm.errorNYI(d->getSourceRange(), "arrangeCXXConstructorCall: hasMostDerivedReturn"); - CanQualType resultType = astContext.VoidTy; + // args[0] is the implicit 'this'; ABIs that return 'this' use its type. + CanQualType resultType = + theCXXABI.hasThisReturn(gd) ? argTypes.front() : astContext.VoidTy; assert(!cir::MissingFeatures::opCallFnInfoOpts()); assert(!cir::MissingFeatures::opCallCIRGenFuncInfoExtParamInfo()); diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 4b020c96964a7..856bcb530974c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -1031,11 +1031,13 @@ clang::QualType CIRGenFunction::buildFunctionArgList(clang::GlobalDecl gd, // object as a regular parameter that fd->parameters() already enumerates. const auto *md = dyn_cast<CXXMethodDecl>(fd); if (md && md->isImplicitObjectMemberFunction()) { - if (cgm.getCXXABI().hasThisReturn(gd)) - cgm.errorNYI(fd->getSourceRange(), "this return"); - else if (cgm.getCXXABI().hasMostDerivedReturn(gd)) + if (cgm.getCXXABI().hasMostDerivedReturn(gd)) cgm.errorNYI(fd->getSourceRange(), "most derived return"); cgm.getCXXABI().buildThisParam(*this, args); + // ABIs that return 'this' make the function's return type the 'this' + // pointer, so a return slot is allocated and the prolog can store into it. + if (cgm.getCXXABI().hasThisReturn(gd)) + retTy = args.front()->getType(); } bool passedParams = true; diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 317151c8d61c6..d3853067af5b0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1537,6 +1537,10 @@ class CIRGenFunction : public CIRGenTypeCache { emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, ReturnValueSlot returnValue, llvm::Triple::ArchType arch); + std::optional<mlir::Value> emitARMBuiltinExpr(unsigned builtinID, + const CallExpr *expr, + ReturnValueSlot returnValue, + llvm::Triple::ArchType arch); std::optional<mlir::Value> emitAArch64SMEBuiltinExpr(unsigned builtinID, const CallExpr *expr); std::optional<mlir::Value> emitAArch64SVEBuiltinExpr(unsigned builtinID, diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp index 552d73966e97b..3953e9badf604 100644 --- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp @@ -37,12 +37,31 @@ class CIRGenItaniumCXXABI : public CIRGenCXXABI { /// All the vtables which have been defined. llvm::DenseMap<const CXXRecordDecl *, cir::GlobalOp> vtables; + /// 32-bit ARM uses a two-word array cookie ({element_size, element_count}) + /// rather than the generic single-size_t cookie. + bool useARMArrayCookieABI; + + /// 32-bit ARM returns 'this' from constructors and non-deleting destructors. + bool useARMThisReturnABI; + public: - CIRGenItaniumCXXABI(CIRGenModule &cgm) : CIRGenCXXABI(cgm) { + CIRGenItaniumCXXABI(CIRGenModule &cgm, bool useARMArrayCookieABI = false, + bool useARMThisReturnABI = false) + : CIRGenCXXABI(cgm), useARMArrayCookieABI(useARMArrayCookieABI), + useARMThisReturnABI(useARMThisReturnABI) { assert(!cir::MissingFeatures::cxxabiUseARMMethodPtrABI()); assert(!cir::MissingFeatures::cxxabiUseARMGuardVarABI()); } + bool hasThisReturn(clang::GlobalDecl gd) const override { + if (!useARMThisReturnABI) + return false; + // Constructors and non-deleting destructors return 'this'. + return isa<CXXConstructorDecl>(gd.getDecl()) || + (isa<CXXDestructorDecl>(gd.getDecl()) && + gd.getDtorType() != Dtor_Deleting); + } + AddedStructorArgs getImplicitConstructorArgs(CIRGenFunction &cgf, const CXXConstructorDecl *d, CXXCtorType type, @@ -262,8 +281,10 @@ void CIRGenItaniumCXXABI::emitInstanceFunctionProlog(SourceLocation loc, /// 2) in theory, an ABI could implement 'this' returns some other way; /// HasThisReturn only specifies a contract, not the implementation if (hasThisReturn(cgf.curGD)) { - cgf.cgm.errorNYI(cgf.curFuncDecl->getLocation(), - "emitInstanceFunctionProlog: hasThisReturn"); + // Store 'this' into the return slot at function entry; the epilogue + // returns whatever is in that slot. + cgf.getBuilder().createStore(cgf.getLoc(loc), getThisValue(cgf), + cgf.returnValue); } } @@ -1872,9 +1893,14 @@ CIRGenCXXABI *clang::CIRGen::CreateCIRGenItaniumCXXABI(CIRGenModule &cgm) { switch (cgm.getASTContext().getCXXABIKind()) { case TargetCXXABI::GenericItanium: case TargetCXXABI::GenericAArch64: - case TargetCXXABI::GenericARM: return new CIRGenItaniumCXXABI(cgm); + case TargetCXXABI::GenericARM: + // 32-bit ARM uses the two-word array cookie and returns 'this' from + // constructors and non-deleting destructors. + return new CIRGenItaniumCXXABI(cgm, /*useARMArrayCookieABI=*/true, + /*useARMThisReturnABI=*/true); + case TargetCXXABI::AppleARM64: // The general Itanium ABI will do until we implement something that // requires special handling. @@ -2420,6 +2446,12 @@ void CIRGenItaniumCXXABI::emitVirtualObjectDelete( /************************** Array allocation cookies **************************/ CharUnits CIRGenItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) { + if (useARMArrayCookieABI) { + // On 32-bit ARM the cookie is always two size_t words: + // struct array_cookie { size_t element_size; size_t element_count; }; + return cgm.getSizeSize() * 2; + } + // The array cookie is a size_t; pad that up to the element alignment. // The cookie is actually right-justified in that space. return std::max( @@ -2444,9 +2476,7 @@ Address CIRGenItaniumCXXABI::initializeArrayCookie(CIRGenFunction &cgf, mlir::Location loc = cgf.getLoc(e->getSourceRange()); // The size of the cookie. - CharUnits cookieSize = - std::max(sizeSize, ctx.getPreferredTypeAlignInChars(elementType)); - assert(cookieSize == getArrayCookieSizeImpl(elementType)); + CharUnits cookieSize = getArrayCookieSizeImpl(elementType); mlir::Type u8Ty = cgf.getBuilder().getUInt8Ty(); cir::PointerType u8PtrTy = cgf.getBuilder().getUInt8PtrTy(); @@ -2472,6 +2502,17 @@ Address CIRGenItaniumCXXABI::initializeArrayCookie(CIRGenFunction &cgf, cookiePtr.withElementType(cgf.getBuilder(), cgf.sizeTy); cgf.getBuilder().createStore(loc, numElements, numElementsPtr); + // On 32-bit ARM the cookie's first word holds the element size. + if (useARMArrayCookieABI) { + CharUnits eltSize = ctx.getTypeSizeInChars(elementType); + Address eltSizePtr(baseBytePtr, u8Ty, baseAlignment); + Address eltSizeSlot = + eltSizePtr.withElementType(cgf.getBuilder(), cgf.sizeTy); + mlir::Value eltSizeVal = + cgf.getBuilder().getConstInt(loc, cgf.sizeTy, eltSize.getQuantity()); + cgf.getBuilder().createStore(loc, eltSizeVal, eltSizeSlot); + } + // Finally, compute a pointer to the actual data buffer by skipping // over the cookie completely. mlir::Value dataOffset = diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index b377f84e8d370..c01c1a46d29b0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -16,6 +16,7 @@ #include "CIRGenConstantEmitter.h" #include "CIRGenFunction.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/OpenMP/OpenMPOffloadUtils.h" #include "mlir/IR/SymbolTable.h" #include "clang/AST/ASTContext.h" @@ -3016,6 +3017,17 @@ void CIRGenModule::setFunctionAttributes(GlobalDecl globalDecl, getTypes().arrangeGlobalDeclaration(globalDecl), func, isThunk); + // Add the `returned` attribute for "this" on constructors/destructors that + // return it (e.g. under the ARM C++ ABI), except for iOS 5 and earlier where + // substantial code, including the libstdc++ dylib, was compiled with GCC and + // does not actually return "this". + if (!isThunk && getCXXABI().hasThisReturn(globalDecl) && + !(getTriple().isiOS() && getTriple().isOSVersionLT(6))) { + assert(func.getNumArguments() != 0 && "unexpected this return"); + func.setArgAttr(0, mlir::LLVM::LLVMDialect::getReturnedAttrName(), + mlir::UnitAttr::get(&getMLIRContext())); + } + if (!isIncompleteFunction && func.isDeclaration()) getTargetCIRGenInfo().setTargetAttributes(funcDecl, func, *this); diff --git a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp index 0bcfe124723e6..d288da4619ea8 100644 --- a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp @@ -671,13 +671,22 @@ mlir::LogicalResult CIRDeleteArrayOpABILowering::matchAndRewrite( [&](mlir::OpBuilder &b, mlir::Location l) { if (dtorFn) { auto eltPtrTy = cir::PointerType::get(ptrTy.getPointee()); + // The element destructor returns void on most targets, but returns + // 'this' under ABIs with ctor/dtor this-return (e.g. 32-bit ARM). + // Match the callee's result type so the call verifies. + mlir::Type dtorResultTy = cir::VoidType::get(rewriter.getContext()); + if (auto dtorFunc = + mlir::SymbolTable::lookupNearestSymbolFrom<cir::FuncOp>( + op, dtorFn)) + if (!dtorFunc.getFunctionType().hasVoidReturn()) + dtorResultTy = dtorFunc.getFunctionType().getReturnType(); auto arrayDtor = cir::ArrayDtor::create( b, l, loweredAddress, numElements, [&](mlir::OpBuilder &bb, mlir::Location ll) { mlir::Value arg = bb.getInsertionBlock()->addArgument(eltPtrTy, ll); auto dtorCall = cir::CallOp::create( - bb, ll, dtorFn, cir::VoidType(), mlir::ValueRange{arg}); + bb, ll, dtorFn, dtorResultTy, mlir::ValueRange{arg}); if (!op.getDtorMayThrow()) dtorCall.setNothrowAttr(bb.getUnitAttr()); cir::YieldOp::create(bb, ll); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp index 0e246c8612f25..1ed83e0c6148a 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp @@ -34,12 +34,15 @@ class LowerItaniumCXXABI : public CIRCXXABI { protected: bool useARMMethodPtrABI; bool use32BitVTableOffsetABI; + bool useARMArrayCookieABI; public: LowerItaniumCXXABI(LowerModule &lm, bool useARMMethodPtrABI = false, - bool use32BitVTableOffsetABI = false) + bool use32BitVTableOffsetABI = false, + bool useARMArrayCookieABI = false) : CIRCXXABI(lm), useARMMethodPtrABI(useARMMethodPtrABI), - use32BitVTableOffsetABI(use32BitVTableOffsetABI) {} + use32BitVTableOffsetABI(use32BitVTableOffsetABI), + useARMArrayCookieABI(useARMArrayCookieABI) {} /// Lower the given data member pointer type to its ABI type. The returned /// type is also a CIR type. @@ -144,6 +147,16 @@ std::unique_ptr<CIRCXXABI> createItaniumCXXABI(LowerModule &lm) { /*useARMMethodPtrABI=*/true, /*use32BitVTableOffsetABI=*/true); + case clang::TargetCXXABI::GenericARM: + // 32-bit ARM uses the ARM method-pointer encoding and the two-word array + // cookie but, unlike AppleARM64, does not use 32-bit vtable offsets. The + // non-trivial constructor/destructor return values are not yet modeled. + return std::make_unique<LowerItaniumCXXABI>( + lm, + /*useARMMethodPtrABI=*/true, + /*use32BitVTableOffsetABI=*/false, + /*useARMArrayCookieABI=*/true); + case clang::TargetCXXABI::GenericItanium: return std::make_unique<LowerItaniumCXXABI>(lm); @@ -861,10 +874,17 @@ LowerItaniumCXXABI::lowerVTableGetTypeInfo(cir::VTableGetTypeInfoOp op, clang::CharUnits LowerItaniumCXXABI::getArrayCookieSizeImpl( mlir::Type elementType, const mlir::DataLayout &dataLayout) const { - // The array cookie is a size_t; pad that up to the element alignment. - // The cookie is actually right-justified in that space. clang::CharUnits sizeOfSizeT = clang::CharUnits::fromQuantity(getPtrSizeInBits() / 8); + + // On 32-bit ARM the cookie is always two size_t words: {element_size, + // element_count}. The element count stays right-justified, so the read + // logic below is unchanged. + if (useARMArrayCookieABI) + return sizeOfSizeT * 2; + + // The array cookie is a size_t; pad that up to the element alignment. + // The cookie is actually right-justified in that space. clang::CharUnits eltAlign = clang::CharUnits::fromQuantity( dataLayout.getTypePreferredAlignment(elementType)); return std::max(sizeOfSizeT, eltAlign); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 1cc7f986de1c9..e3bd925b384c3 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -212,9 +212,14 @@ mlir::LogicalResult CIRToLLVMCopyOpLowering::matchAndRewrite( cir::CopyOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>()); + // The llvm.memcpy length is size_t-wide; its width is target-dependent (e.g. + // 32 bits on 32-bit ARM), so drive it from the data layout rather than + // hardcoding i64. + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); + mlir::Type lenTy = + rewriter.getIntegerType(layout.getTypeSizeInBits(llvmPtrTy)); const mlir::Value length = mlir::LLVM::ConstantOp::create( - rewriter, op.getLoc(), rewriter.getI64Type(), - op.getCopySizeInBytes(layout)); + rewriter, op.getLoc(), lenTy, op.getCopySizeInBytes(layout)); assert(!cir::MissingFeatures::aggValueSlotVolatile()); rewriter.replaceOpWithNewOp<mlir::LLVM::MemcpyOp>( op, adaptor.getDst(), adaptor.getSrc(), length, op.getIsVolatile()); @@ -3880,15 +3885,22 @@ mlir::LogicalResult CIRToLLVMThrowOpLowering::matchAndRewrite( mlir::LogicalResult CIRToLLVMAllocExceptionOpLowering::matchAndRewrite( cir::AllocExceptionOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { - // Get or create `declare ptr @__cxa_allocate_exception(i64)` + // Get or create `declare ptr @__cxa_allocate_exception(size_t)`. The + // thrown_size parameter is size_t, whose width is target-dependent (e.g. 32 + // bits on 32-bit ARM), so drive it from the data layout rather than + // hardcoding i64; otherwise the call mismatches the runtime on 32-bit + // targets. StringRef fnName = "__cxa_allocate_exception"; auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(rewriter.getContext()); - auto int64Ty = mlir::IntegerType::get(rewriter.getContext(), 64); - auto fnTy = mlir::LLVM::LLVMFunctionType::get(llvmPtrTy, {int64Ty}); + mlir::DataLayout layout(op->getParentOfType<mlir::ModuleOp>()); + auto sizeTTy = mlir::IntegerType::get(rewriter.getContext(), + layout.getTypeSizeInBits(llvmPtrTy)); + auto fnTy = mlir::LLVM::LLVMFunctionType::get(llvmPtrTy, {sizeTTy}); createLLVMFuncOpIfNotExist(rewriter, op, fnName, fnTy); - auto exceptionSize = mlir::LLVM::ConstantOp::create(rewriter, op.getLoc(), - adaptor.getSizeAttr()); + auto exceptionSize = mlir::LLVM::ConstantOp::create( + rewriter, op.getLoc(), sizeTTy, + rewriter.getIntegerAttr(sizeTTy, op.getSize())); auto allocaExceptionCall = mlir::LLVM::CallOp::create( rewriter, op.getLoc(), mlir::TypeRange{llvmPtrTy}, fnName, diff --git a/clang/test/CIR/CodeGen/arm-aggregate-copy-size.cpp b/clang/test/CIR/CodeGen/arm-aggregate-copy-size.cpp new file mode 100644 index 0000000000000..0b070c66cddfa --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-aggregate-copy-size.cpp @@ -0,0 +1,16 @@ +// The length operand of the llvm.memcpy emitted for a cir.copy (e.g. passing an +// aggregate by value) is size_t-wide, whose width is target-dependent. On +// 32-bit ARM it must be i32, not the hardcoded i64 used by 64-bit targets. +// +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=ARM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-x86.ll +// RUN: FileCheck --check-prefix=X86 --input-file=%t-x86.ll %s + +struct P { int x; int y; }; +int sum(P p); +int use() { P p; p.x = 1; p.y = 2; return sum(p); } + +// ARM: call void @llvm.memcpy.p0.p0.i32(ptr {{.*}}, ptr {{.*}}, i32 8, i1 false) + +// X86: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}, ptr {{.*}}, i64 8, i1 false) diff --git a/clang/test/CIR/CodeGen/arm-array-cookie.cpp b/clang/test/CIR/CodeGen/arm-array-cookie.cpp new file mode 100644 index 0000000000000..dc80a95264a20 --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-array-cookie.cpp @@ -0,0 +1,32 @@ +// 32-bit ARM uses a two-word array cookie { element_size, element_count }, so a +// new[] allocation is two size_t words larger than the element data: the data +// starts 8 bytes in, and delete[] recovers the allocation base 8 bytes before +// the data. (The generic Itanium cookie is a single size_t.) +// +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -emit-llvm %s -o %t-ogcg.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t-ogcg.ll %s + +struct D { + ~D(); + int v; +}; + +void make() { + D *p = new D[10]; + delete[] p; +} + +// 10 elements * 4 bytes + 8-byte cookie = 48. +// LLVM: call {{.*}}@_Znaj(i32 noundef 48) +// LLVM-DAG: store i32 4, ptr %{{[0-9]+}} +// LLVM-DAG: store i32 10, ptr %{{[0-9]+}} +// LLVM: getelementptr i8, ptr %{{[0-9]+}}, i32 8 +// LLVM: getelementptr i8, ptr %{{[0-9]+}}, i32 -8 + +// OGCG: call {{.*}}@_Znaj(i32 noundef 48) +// OGCG-DAG: store i32 4, ptr +// OGCG-DAG: store i32 10, ptr +// OGCG: getelementptr inbounds i8, ptr %{{.*}}, i32 8 +// OGCG: getelementptr inbounds i8, ptr %{{.*}}, i32 -8 diff --git a/clang/test/CIR/CodeGen/arm-global-dtor.cpp b/clang/test/CIR/CodeGen/arm-global-dtor.cpp new file mode 100644 index 0000000000000..1e29beb350a66 --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-global-dtor.cpp @@ -0,0 +1,29 @@ +// A static-storage object whose type has a non-trivial destructor must compile +// on 32-bit ARM. There, structors return 'this', so the implicit destructor +// call emitted for the global must be typed against the destructor's real +// (pointer-returning) signature; a void-typed call used to fail verification +// with "'cir.call' op incorrect number of results for callee". +// +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=ARM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-x86.ll +// RUN: FileCheck --check-prefix=X86 --input-file=%t-x86.ll %s + +struct S { + S(); + ~S(); + int x; +}; + +S g; + +// On ARM the constructor returns 'this' and the non-trivial destructor is +// registered with __cxa_atexit for the global. +// ARM-LABEL: define internal void @__cxx_global_var_init() +// ARM: call noundef ptr @_ZN1SC1Ev(ptr {{.*}} @g) +// ARM: call void @__cxa_atexit(ptr @_ZN1SD1Ev, ptr @g, ptr @__dso_handle) + +// On x86_64 the structors return void; registration is otherwise identical. +// X86-LABEL: define internal void @__cxx_global_var_init() +// X86: call void @_ZN1SC1Ev(ptr {{.*}} @g) +// X86: call void @__cxa_atexit(ptr @_ZN1SD1Ev, ptr @g, ptr @__dso_handle) diff --git a/clang/test/CIR/CodeGen/arm-neon-vget-lane.c b/clang/test/CIR/CodeGen/arm-neon-vget-lane.c new file mode 100644 index 0000000000000..9f35e0c16fa98 --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-neon-vget-lane.c @@ -0,0 +1,20 @@ +// The NEON lane-read intrinsics (vget_lane/vgetq_lane) lower to __builtin_neon_* +// builtins on 32-bit ARM (unlike AArch64, where arm_neon.h uses generic vector +// subscripting). Make sure CIR codegen lowers them to a vector element +// extraction instead of reporting them as unimplemented. + +// RUN: %clang_cc1 -triple armv7-unknown-linux-gnueabihf -target-feature +neon -ffreestanding -fclangir -emit-llvm %s -o - | FileCheck %s + +#include <arm_neon.h> + +// CHECK-LABEL: define dso_local i32 @get_s32( +// CHECK: extractelement <4 x i32> %{{.*}}, i32 2 +int get_s32(int32x4_t v) { return vgetq_lane_s32(v, 2); } + +// CHECK-LABEL: define dso_local float @get_f32( +// CHECK: extractelement <4 x float> %{{.*}}, i32 1 +float get_f32(float32x4_t v) { return vgetq_lane_f32(v, 1); } + +// CHECK-LABEL: define dso_local i16 @get_s16( +// CHECK: extractelement <4 x i16> %{{.*}}, i32 3 +short get_s16(int16x4_t v) { return vget_lane_s16(v, 3); } diff --git a/clang/test/CIR/CodeGen/arm-record-layout.cpp b/clang/test/CIR/CodeGen/arm-record-layout.cpp new file mode 100644 index 0000000000000..53a8d6024f0f5 --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-record-layout.cpp @@ -0,0 +1,39 @@ +// 32-bit ARM lowers end-to-end through CIR, including the GenericARM CXXABI +// path (virtual classes / vtables). A record containing a pointer lays out with +// a 4-byte pointer followed by the next field with no padding, and both records +// are 4-byte aligned. +// +// +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -emit-llvm %s -o %t-ogcg.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t-ogcg.ll %s + +struct S { + int *p; + int x; +}; + +S s; + +class A { +public: + virtual void f(); + int x; +}; + +void A::f() {} + +// CIR-DAG: !rec_S = !cir.struct<"S" {!cir.ptr<!s32i>, !s32i}> +// CIR-DAG: !rec_A = !cir.struct<class "A" {!cir.vptr, !s32i}> +// CIR-DAG: !cir.ptr<!cir.void> = array<i32: 32, 32> +// CIR: cir.global external @s = #cir.zero : !rec_S {alignment = 4 : i64} +// CIR: cir.global {{.*}}@_ZTV1A = #cir.vtable<{{.*}}{alignment = 4 : i64} + +// LLVM: @s = global %struct.S zeroinitializer, align 4 +// LLVM: @_ZTV1A = global { [3 x ptr] } {{.*}}, align 4 + +// OGCG: @s = global %struct.S zeroinitializer, align 4 +// OGCG: @_ZTV1A = {{.*}}constant { [3 x ptr] } {{.*}}, align 4 diff --git a/clang/test/CIR/CodeGen/arm-this-return.cpp b/clang/test/CIR/CodeGen/arm-this-return.cpp new file mode 100644 index 0000000000000..9d111a86dda52 --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-this-return.cpp @@ -0,0 +1,28 @@ +// 32-bit ARM returns 'this' from constructors and non-deleting destructors; +// other targets return void. +// +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=ARM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-x86.ll +// RUN: FileCheck --check-prefix=X86 --input-file=%t-x86.ll %s + +struct S { + S(); + ~S(); + int x; +}; + +S::S() { x = 1; } +S::~S() {} + +// On ARM the constructor and destructor return the 'this' pointer, and the +// 'this' argument carries the 'returned' attribute. +// ARM: define{{.*}} ptr @_ZN1SC2Ev(ptr {{.*}} returned {{.*}}) +// ARM: ret ptr +// ARM: define{{.*}} ptr @_ZN1SD2Ev(ptr {{.*}} returned {{.*}}) +// ARM: ret ptr + +// On x86_64 they return void and there is no 'returned' attribute. +// X86: define{{.*}} void @_ZN1SC2Ev(ptr +// X86-NOT: returned +// X86: define{{.*}} void @_ZN1SD2Ev(ptr diff --git a/clang/test/CIR/CodeGen/arm-throw-alloc-size.cpp b/clang/test/CIR/CodeGen/arm-throw-alloc-size.cpp new file mode 100644 index 0000000000000..b74a52eceaa25 --- /dev/null +++ b/clang/test/CIR/CodeGen/arm-throw-alloc-size.cpp @@ -0,0 +1,16 @@ +// The thrown_size argument to __cxa_allocate_exception is size_t, whose width +// is target-dependent. On 32-bit ARM it must be i32, not the hardcoded i64 +// used by 64-bit targets. +// +// RUN: %clang_cc1 -std=c++20 -triple arm-linux-gnueabihf -fcxx-exceptions -fexceptions -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=ARM --input-file=%t.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fcxx-exceptions -fexceptions -fclangir -emit-llvm %s -o %t-x86.ll +// RUN: FileCheck --check-prefix=X86 --input-file=%t-x86.ll %s + +void f() { throw 42; } + +// ARM: declare ptr @__cxa_allocate_exception(i32) +// ARM: call ptr @__cxa_allocate_exception(i32 4) + +// X86: declare ptr @__cxa_allocate_exception(i64) +// X86: call ptr @__cxa_allocate_exception(i64 4) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
