https://github.com/topperc created https://github.com/llvm/llvm-project/pull/203601
The ABI for this isn't documented. The riscv_rvv_vector_bits feature was copied from aarch_sve_vector_bits which passes the types in vector registers. At the time I implemented RISC-V I didn't think about struct arguments so no ABI support was added. I think it makes sense to pass structs of these types in vector registers similar to the vls_cc ABI. We already do use vector registers for these types when the vls_cc ABI is enabled for a function(unclear if that was intentional), but we should do it unconditionally. I will work with gcc maintainers to see if they can do the same. In practice, there probably isn't much compiler mixing with these types. This patch was heavily assisted by AI, but I've reviewed it and the changes seem reasonable to me. >From 27db028e805766649aa8127b86460d18f88a50ea Mon Sep 17 00:00:00 2001 From: Craig Topper <[email protected]> Date: Fri, 12 Jun 2026 10:49:19 -0700 Subject: [PATCH] [RISCV] Pass structs containing riscv_rvv_vector_bits types using vector registers. The ABI for this isn't documented. The riscv_rvv_vector_bits feature was copied from aarch_sve_vector_bits which passes the types in vector registers. At the time I implemented RISC-V I didn't think about struct arguments so no ABI support was added. I think it makes sense to pass structs of these types in vector registers similar to the vls_cc ABI. We already do when the vls_cc ABI is enabled for a function(unclear if that was intentional), but we should do it unconditionally. I will work with gcc maintainers to see if they can do the same. In practice, there probably isn't much compiler mixing with these types. This patch was heavily assisted by AI, but I've reviewed it and the changes seem reasonable to me. --- clang/lib/CodeGen/Targets/RISCV.cpp | 104 +++++++ .../RISCV/attr-rvv-vector-bits-struct-call.c | 260 ++++++++++++++++++ .../riscv-rvv-fixed-length-struct-call.cpp | 63 +++++ 3 files changed, 427 insertions(+) create mode 100644 clang/test/CodeGen/RISCV/attr-rvv-vector-bits-struct-call.c create mode 100644 clang/test/CodeGenCXX/riscv-rvv-fixed-length-struct-call.cpp diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp index ffe1cc6086215..dd2a1196a5433 100644 --- a/clang/lib/CodeGen/Targets/RISCV.cpp +++ b/clang/lib/CodeGen/Targets/RISCV.cpp @@ -38,6 +38,8 @@ class RISCVABIInfo : public DefaultABIInfo { llvm::Type *detectVLSCCEligibleStruct(QualType Ty, unsigned ABIVLen) const; + llvm::Type *detectHomogeneousRVVFixedLengthStruct(QualType Ty) const; + public: RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen, bool EABI) @@ -520,6 +522,105 @@ llvm::Type *RISCVABIInfo::detectVLSCCEligibleStruct(QualType Ty, NumElts); } +llvm::Type *RISCVABIInfo::detectHomogeneousRVVFixedLengthStruct( + QualType Ty) const { + const auto *RT = Ty->getAsCanonical<RecordType>(); + if (!RT) + return nullptr; + + const RecordDecl *RD = RT->getDecl()->getDefinitionOrSelf(); + if (RD->isUnion()) + return nullptr; + if (getRecordArgABI(Ty, getCXXABI())) + return nullptr; + + // Reject C++ types with base classes. + if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) + if (CXXRD->getNumBases() != 0) + return nullptr; + + SmallVector<const FieldDecl *, 8> Fields(RD->fields()); + + if (Fields.empty()) + return nullptr; + + auto IsFixedLengthRVVVector = [](const VectorType *VT) { + switch (VT->getVectorKind()) { + case VectorKind::RVVFixedLengthData: + case VectorKind::RVVFixedLengthMask: + case VectorKind::RVVFixedLengthMask_1: + case VectorKind::RVVFixedLengthMask_2: + case VectorKind::RVVFixedLengthMask_4: + return true; + default: + return false; + } + }; + + QualType CommonTy; + unsigned Count = 0; + + // Single array field: struct { fixed-length RVV T a[N]; } + if (Fields.size() == 1) { + QualType FieldTy = Fields[0]->getType().getCanonicalType(); + if (const ConstantArrayType *AT = + getContext().getAsConstantArrayType(FieldTy)) { + QualType EltTy = AT->getElementType().getCanonicalType(); + if (const auto *VT = EltTy->getAs<VectorType>(); + VT && IsFixedLengthRVVVector(VT)) { + CommonTy = EltTy; + Count = AT->getZExtSize(); + } + } + } + + // All fields are the same fixed-length RVV vector type (data or mask). + if (CommonTy.isNull()) { + if (Fields.size() > 8) + return nullptr; + for (const FieldDecl *FD : Fields) { + QualType FieldTy = FD->getType().getCanonicalType(); + const auto *VT = FieldTy->getAs<VectorType>(); + if (!VT || !IsFixedLengthRVVVector(VT)) + return nullptr; + if (CommonTy.isNull()) + CommonTy = FieldTy; + else if (!getContext().hasSameType(CommonTy, FieldTy)) + return nullptr; + } + Count = Fields.size(); + } + + if (Count == 0 || Count > 8) + return nullptr; + + const auto *VT = CommonTy->castAs<VectorType>(); + llvm::Type *EltType = CGT.ConvertType(VT->getElementType()); + auto VScale = getContext().getTargetInfo().getVScaleRange( + getContext().getLangOpts(), TargetInfo::ArmStreamingKind::NotStreaming); + + // Ensure total register usage does not exceed 8. + if (Count > 1 && + Count * llvm::divideCeil((uint64_t)VT->getNumElements() * + EltType->getScalarSizeInBits(), + VScale->first * llvm::RISCV::RVVBitsPerBlock) > + 8) + return nullptr; + + unsigned MinElts = llvm::divideCeil(VT->getNumElements(), VScale->first); + if (Count == 1) + return llvm::ScalableVectorType::get(EltType, MinElts); + + unsigned I8EltCount = + llvm::divideCeil((uint64_t)VT->getNumElements() * + EltType->getScalarSizeInBits(), + VScale->first * 8); + auto *I8Vec = llvm::ScalableVectorType::get( + llvm::Type::getInt8Ty(getVMContext()), I8EltCount); + return llvm::TargetExtType::get(getVMContext(), "riscv.vector.tuple", I8Vec, + Count); +} + // Fixed-length RVV vectors are represented as scalable vectors in function // args/return and must be coerced from fixed vectors. ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty, unsigned ABIVLen) const { @@ -656,6 +757,9 @@ ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, } if (IsFixed && Ty->isStructureOrClassType()) { + if (llvm::Type *CoerceTy = detectHomogeneousRVVFixedLengthStruct(Ty)) + return ABIArgInfo::getTargetSpecific(CoerceTy); + if (llvm::Type *VLSType = detectVLSCCEligibleStruct(Ty, ABIVLen)) return ABIArgInfo::getTargetSpecific(VLSType); } diff --git a/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-struct-call.c b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-struct-call.c new file mode 100644 index 0000000000000..1324e519cf729 --- /dev/null +++ b/clang/test/CodeGen/RISCV/attr-rvv-vector-bits-struct-call.c @@ -0,0 +1,260 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +v \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s + +#include <riscv_vector.h> + +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); +typedef vint32m4_t fixed_int32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); +typedef vint32mf2_t fixed_int32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); +// Mask types. getRVVEltType returns UnsignedCharTy, so LLVM type is <N x i8>. +// vbool1_t = nxv64i1; VScale=4 -> ExpectedSize=256 bits -> Clang <32 x i8> +// vbool4_t = nxv16i1; VScale=4 -> ExpectedSize=64 bits -> Clang <8 x i8> +// vbool64_t = nxv1i1; VScale=4 -> ExpectedSize=4 bits -> Clang <1 x i8> (sub-byte) +typedef vbool1_t fixed_bool1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); +typedef vbool4_t fixed_bool4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); +typedef vbool64_t fixed_bool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 64))); + +//===----------------------------------------------------------------------===// +// Eligible: single field → coerced to scalable vector +//===----------------------------------------------------------------------===// + +struct st_1field { fixed_int32m1_t x; }; +// CHECK-LABEL: @test_1field( +// CHECK-SAME: <vscale x 2 x i32> +void test_1field(struct st_1field s) {} + +// Return type also coerced +// CHECK: define{{.*}} <vscale x 2 x i32> @test_return_1field( +struct st_1field test_return_1field(struct st_1field s) { return s; } + +// double element type +struct st_f64_1field { fixed_float64m1_t x; }; +// CHECK-LABEL: @test_f64_1field( +// CHECK-SAME: <vscale x 1 x double> +void test_f64_1field(struct st_f64_1field s) {} + +//===----------------------------------------------------------------------===// +// Eligible: single array[1] field → coerced to scalable vector +//===----------------------------------------------------------------------===// + +struct st_arr1 { fixed_int32m1_t x[1]; }; +// CHECK-LABEL: @test_arr1( +// CHECK-SAME: <vscale x 2 x i32> +void test_arr1(struct st_arr1 s) {} + +//===----------------------------------------------------------------------===// +// Eligible: multiple same-type fields → coerced to vector tuple +//===----------------------------------------------------------------------===// + +// 2 fields: 2 * LMUL1 = 2 registers +struct st_2field { fixed_int32m1_t x; fixed_int32m1_t y; }; +// CHECK-LABEL: @test_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 2) +void test_2field(struct st_2field s) {} + +// Return type also coerced to tuple +// CHECK: define{{.*}} target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_return_2field( +struct st_2field test_return_2field(struct st_2field s) { return s; } + +// 3 fields: 3 * LMUL1 = 3 registers +struct st_3field { fixed_int32m1_t a; fixed_int32m1_t b; fixed_int32m1_t c; }; +// CHECK-LABEL: @test_3field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 3) +void test_3field(struct st_3field s) {} + +// 8 fields: 8 * LMUL1 = 8 registers (at the limit) +struct st_8field { + fixed_int32m1_t a, b, c, d, e, f, g, h; +}; +// CHECK-LABEL: @test_8field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 8) +void test_8field(struct st_8field s) {} + +// double element type, 2 fields +struct st_f64_2field { fixed_float64m1_t x; fixed_float64m1_t y; }; +// CHECK-LABEL: @test_f64_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 2) +void test_f64_2field(struct st_f64_2field s) {} + +//===----------------------------------------------------------------------===// +// Eligible: single array[N] field → coerced to vector tuple +//===----------------------------------------------------------------------===// + +// array[4]: 4 * LMUL1 = 4 registers +struct st_arr4 { fixed_int32m1_t x[4]; }; +// CHECK-LABEL: @test_arr4( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 4) +void test_arr4(struct st_arr4 s) {} + +// array[8]: 8 * LMUL1 = 8 registers (at the limit) +struct st_arr8 { fixed_int32m1_t x[8]; }; +// CHECK-LABEL: @test_arr8( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 8) +void test_arr8(struct st_arr8 s) {} + +//===----------------------------------------------------------------------===// +// Eligible: high-LMUL fields +//===----------------------------------------------------------------------===// + +// 2 * LMUL4 = 8 registers (at the limit) +struct st_m4_2field { fixed_int32m4_t x; fixed_int32m4_t y; }; +// CHECK-LABEL: @test_m4_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 32 x i8>, 2) +void test_m4_2field(struct st_m4_2field s) {} + +//===----------------------------------------------------------------------===// +// Fractional LMUL +//===----------------------------------------------------------------------===// + +struct st_mf2_1field { fixed_int32mf2_t x; }; +// CHECK-LABEL: @test_mf2_1field( +// CHECK-SAME: <vscale x 1 x i32> +void test_mf2_1field(struct st_mf2_1field s) {} + +struct st_mf2_2field { fixed_int32mf2_t x; fixed_int32mf2_t y; }; +// CHECK-LABEL: @test_mf2_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 4 x i8>, 2) +void test_mf2_2field(struct st_mf2_2field s) {} + +struct st_mf2_8field { fixed_int32mf2_t a, b, c, d, e, f, g, h; }; +// CHECK-LABEL: @test_mf2_8field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 4 x i8>, 8) +void test_mf2_8field(struct st_mf2_8field s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: too many fields or array elements → passed indirectly +//===----------------------------------------------------------------------===// + +// 9 fields: exceeds max count of 8 +struct st_9field { + fixed_int32m1_t a, b, c, d, e, f, g, h, i; +}; +// CHECK-LABEL: @test_9field( +// CHECK-SAME: ptr +void test_9field(struct st_9field s) {} + +// array[9]: exceeds max count of 8 +struct st_arr9 { fixed_int32m1_t x[9]; }; +// CHECK-LABEL: @test_arr9( +// CHECK-SAME: ptr +void test_arr9(struct st_arr9 s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: register limit exceeded → passed indirectly +//===----------------------------------------------------------------------===// + +// 3 * LMUL4 = 12 registers > 8 +struct st_m4_3field { fixed_int32m4_t x; fixed_int32m4_t y; fixed_int32m4_t z; }; +// CHECK-LABEL: @test_m4_3field( +// CHECK-SAME: ptr +void test_m4_3field(struct st_m4_3field s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: heterogeneous or non-vector fields → passed indirectly +//===----------------------------------------------------------------------===// + +// Mixed vector element types +struct st_hetero { fixed_int32m1_t x; fixed_float64m1_t y; }; +// CHECK-LABEL: @test_hetero( +// CHECK-SAME: ptr +void test_hetero(struct st_hetero s) {} + +// Non-vector field +struct st_mixed { fixed_int32m1_t x; int y; }; +// CHECK-LABEL: @test_mixed( +// CHECK-SAME: ptr +void test_mixed(struct st_mixed s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: union → passed indirectly +//===----------------------------------------------------------------------===// + +union u_vecs { fixed_int32m1_t x; fixed_int32m1_t y; }; +// CHECK-LABEL: @test_union( +// CHECK-SAME: ptr +void test_union(union u_vecs u) {} + +//===----------------------------------------------------------------------===// +// Ineligible: multiple array fields → passed indirectly +//===----------------------------------------------------------------------===// + +struct st_two_arrays { fixed_int32m1_t x[2]; fixed_int32m1_t y[2]; }; +// CHECK-LABEL: @test_two_arrays( +// CHECK-SAME: ptr +void test_two_arrays(struct st_two_arrays s) {} + +//===----------------------------------------------------------------------===// +// Mask (bool) types: single field → scalable vector +// MinElts = divideCeil(NumElts, VScale) with i8 element type. +//===----------------------------------------------------------------------===// + +// fixed_bool1_t: <32 x i8>, MinElts = divideCeil(32,4) = 8 -> <vscale x 8 x i8> +struct st_bool1_1field { fixed_bool1_t x; }; +// CHECK-LABEL: @test_bool1_1field( +// CHECK-SAME: <vscale x 8 x i8> +void test_bool1_1field(struct st_bool1_1field s) {} + +// fixed_bool4_t: <8 x i8>, MinElts = divideCeil(8,4) = 2 -> <vscale x 2 x i8> +struct st_bool4_1field { fixed_bool4_t x; }; +// CHECK-LABEL: @test_bool4_1field( +// CHECK-SAME: <vscale x 2 x i8> +void test_bool4_1field(struct st_bool4_1field s) {} + +// fixed_bool64_t: sub-byte <1 x i8>, MinElts = divideCeil(1,4) = 1 -> <vscale x 1 x i8> +struct st_bool64_1field { fixed_bool64_t x; }; +// CHECK-LABEL: @test_bool64_1field( +// CHECK-SAME: <vscale x 1 x i8> +void test_bool64_1field(struct st_bool64_1field s) {} + +//===----------------------------------------------------------------------===// +// Mask types: multiple fields → vector tuple +// I8EltCount = divideCeil(NumElts * 8, VScale * 8) +//===----------------------------------------------------------------------===// + +// 2 * fixed_bool1_t: I8EltCount = divideCeil(32*8, 4*8) = 8 +struct st_bool1_2field { fixed_bool1_t x; fixed_bool1_t y; }; +// CHECK-LABEL: @test_bool1_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 2) +void test_bool1_2field(struct st_bool1_2field s) {} + +// 2 * fixed_bool4_t: I8EltCount = divideCeil(8*8, 4*8) = 2 +struct st_bool4_2field { fixed_bool4_t x; fixed_bool4_t y; }; +// CHECK-LABEL: @test_bool4_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 2 x i8>, 2) +void test_bool4_2field(struct st_bool4_2field s) {} + +// 2 * fixed_bool64_t (sub-byte): I8EltCount = divideCeil(1*8, 4*8) = 1 +struct st_bool64_2field { fixed_bool64_t x; fixed_bool64_t y; }; +// CHECK-LABEL: @test_bool64_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 1 x i8>, 2) +void test_bool64_2field(struct st_bool64_2field s) {} + +// 8 * fixed_bool1_t: 8 * divideCeil(32*8, 4*64) = 8*1 = 8 registers (at limit) +struct st_bool1_8field { fixed_bool1_t a, b, c, d, e, f, g, h; }; +// CHECK-LABEL: @test_bool1_8field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 8) +void test_bool1_8field(struct st_bool1_8field s) {} + +//===----------------------------------------------------------------------===// +// Mask types: ineligible cases +//===----------------------------------------------------------------------===// + +// 9 fields: exceeds count limit of 8 +struct st_bool1_9field { fixed_bool1_t a, b, c, d, e, f, g, h, i; }; +// CHECK-LABEL: @test_bool1_9field( +// CHECK-SAME: ptr +void test_bool1_9field(struct st_bool1_9field s) {} + +// Mixed mask kinds: different canonical types → ineligible +struct st_bool_hetero { fixed_bool1_t x; fixed_bool4_t y; }; +// CHECK-LABEL: @test_bool_hetero( +// CHECK-SAME: ptr +void test_bool_hetero(struct st_bool_hetero s) {} + +// Mixed mask and data: different kinds → ineligible +struct st_bool_data_mix { fixed_bool1_t x; fixed_int32m1_t y; }; +// CHECK-LABEL: @test_bool_data_mix( +// CHECK-SAME: ptr +void test_bool_data_mix(struct st_bool_data_mix s) {} diff --git a/clang/test/CodeGenCXX/riscv-rvv-fixed-length-struct-call.cpp b/clang/test/CodeGenCXX/riscv-rvv-fixed-length-struct-call.cpp new file mode 100644 index 0000000000000..b2862a900d4f0 --- /dev/null +++ b/clang/test/CodeGenCXX/riscv-rvv-fixed-length-struct-call.cpp @@ -0,0 +1,63 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -std=c++11 -triple riscv64-none-linux-gnu -target-feature +v \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s + +#include <riscv_vector.h> + +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + +//===----------------------------------------------------------------------===// +// Eligible cases work in C++ the same as C +//===----------------------------------------------------------------------===// + +struct st_1field { fixed_int32m1_t x; }; +// CHECK-LABEL: @test_1field( +// CHECK-SAME: <vscale x 2 x i32> +extern "C" void test_1field(st_1field s) {} + +struct st_2field { fixed_int32m1_t x; fixed_int32m1_t y; }; +// CHECK-LABEL: @test_2field( +// CHECK-SAME: target("riscv.vector.tuple", <vscale x 8 x i8>, 2) +extern "C" void test_2field(st_2field s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: struct with base class → passed indirectly +// +// detectHomogeneousRVVFixedLengthStruct rejects any struct with base classes +// to avoid having to reason about their layout contribution. +//===----------------------------------------------------------------------===// + +struct empty_base {}; +struct derived_1field : empty_base { fixed_int32m1_t x; }; +// CHECK-LABEL: @test_base_class( +// CHECK-SAME: ptr +extern "C" void test_base_class(derived_1field s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: non-trivial copy constructor → passed indirectly +// +// Handled by getRecordArgABI before detectHomogeneousRVVFixedLengthStruct +// is ever consulted. +//===----------------------------------------------------------------------===// + +struct nontrivial_copy { + fixed_int32m1_t x; + nontrivial_copy(const nontrivial_copy &) {} +}; +// CHECK-LABEL: @test_nontrivial_copy( +// CHECK-SAME: ptr +extern "C" void test_nontrivial_copy(nontrivial_copy s) {} + +//===----------------------------------------------------------------------===// +// Ineligible: non-trivial destructor → passed indirectly +// +// Also handled by getRecordArgABI before reaching our function. +//===----------------------------------------------------------------------===// + +struct nontrivial_dtor { + fixed_int32m1_t x; + ~nontrivial_dtor() {} +}; +// CHECK-LABEL: @test_nontrivial_dtor( +// CHECK-SAME: ptr +extern "C" void test_nontrivial_dtor(nontrivial_dtor s) {} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
