https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/169865
>From 829faa3884be286fe97ca07a2e3ab8d76b6a91fc Mon Sep 17 00:00:00 2001 From: Alex Voicu <[email protected]> Date: Fri, 28 Nov 2025 01:26:40 +0000 Subject: [PATCH 1/2] Use AMDGPU ABI for AMDGCNSPIRV; add lowering for `byref`. --- clang/lib/CodeGen/Targets/SPIR.cpp | 292 +++++++++++++--- .../amdgpu-kernel-arg-pointer-type.cu | 144 ++++---- clang/test/CodeGenCUDA/kernel-args.cu | 8 +- .../amdgcnspirv-uses-amdgpu-abi.cpp | 321 ++++++++++++++++++ llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 5 +- .../ptr-argument-byref-amdgcnspirv.ll | 24 ++ 6 files changed, 669 insertions(+), 125 deletions(-) create mode 100644 clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp create mode 100644 llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 1a8c85d8871ec..3540093074bfe 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -9,6 +9,11 @@ #include "ABIInfoImpl.h" #include "HLSLBufferLayoutBuilder.h" #include "TargetInfo.h" +#include "clang/Basic/LangOptions.h" +#include "llvm/IR/DerivedTypes.h" + +#include <stdint.h> +#include <utility> using namespace clang; using namespace clang::CodeGen; @@ -33,9 +38,41 @@ class SPIRVABIInfo : public CommonSPIRABIInfo { void computeInfo(CGFunctionInfo &FI) const override; private: + ABIArgInfo classifyKernelArgumentType(QualType Ty) const; +}; + +class AMDGCNSPIRVABIInfo : public SPIRVABIInfo { + // TODO: this should be unified / shared with AMDGPU, ideally we'd like to + // re-use AMDGPUABIInfo eventually, rather than duplicate. + static constexpr unsigned MaxNumRegsForArgsRet = 16; // 16 32-bit registers + mutable unsigned NumRegsLeft = 0; + + unsigned numRegsForType(QualType Ty) const; + + bool isHomogeneousAggregateBaseType(QualType Ty) const override { + return true; + } + bool isHomogeneousAggregateSmallEnough(const Type *Base, + uint64_t Members) const override { + uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32; + + // Homogeneous Aggregates may occupy at most 16 registers. + return Members * NumRegs <= MaxNumRegsForArgsRet; + } + + // Coerce HIP scalar pointer arguments from generic pointers to global ones. + llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, + unsigned ToAS) const; + ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyKernelArgumentType(QualType Ty) const; ABIArgInfo classifyArgumentType(QualType Ty) const; +public: + AMDGCNSPIRVABIInfo(CodeGenTypes &CGT) : SPIRVABIInfo(CGT) {} + void computeInfo(CGFunctionInfo &FI) const override; + + llvm::FixedVectorType *getOptimalVectorMemoryType( + llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const override; }; } // end anonymous namespace namespace { @@ -83,7 +120,10 @@ class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo { class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo { public: SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT) - : CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {} + : CommonSPIRTargetCodeGenInfo( + (CGT.getTarget().getTriple().getVendor() == llvm::Triple::AMD) + ? std::make_unique<AMDGCNSPIRVABIInfo>(CGT) + : std::make_unique<SPIRVABIInfo>(CGT)) {} void setCUDAKernelCallingConvention(const FunctionType *&FT) const override; LangAS getGlobalVarAddressSpace(CodeGenModule &CGM, const VarDecl *D) const override; @@ -132,25 +172,6 @@ void CommonSPIRABIInfo::setCCs() { RuntimeCC = llvm::CallingConv::SPIR_FUNC; } -ABIArgInfo SPIRVABIInfo::classifyReturnType(QualType RetTy) const { - if (getTarget().getTriple().getVendor() != llvm::Triple::AMD) - return DefaultABIInfo::classifyReturnType(RetTy); - if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI())) - return DefaultABIInfo::classifyReturnType(RetTy); - - if (const auto *RD = RetTy->getAsRecordDecl(); - RD && RD->hasFlexibleArrayMember()) - return DefaultABIInfo::classifyReturnType(RetTy); - - // TODO: The AMDGPU ABI is non-trivial to represent in SPIR-V; in order to - // avoid encoding various architecture specific bits here we return everything - // as direct to retain type info for things like aggregates, for later perusal - // when translating back to LLVM/lowering in the BE. This is also why we - // disable flattening as the outcomes can mismatch between SPIR-V and AMDGPU. - // This will be revisited / optimised in the future. - return ABIArgInfo::getDirect(CGT.ConvertType(RetTy), 0u, nullptr, false); -} - ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { if (getContext().getLangOpts().isTargetDevice()) { // Coerce pointer arguments with default address space to CrossWorkGroup @@ -167,18 +188,6 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { } if (isAggregateTypeForABI(Ty)) { - if (getTarget().getTriple().getVendor() == llvm::Triple::AMD) - // TODO: The AMDGPU kernel ABI passes aggregates byref, which is not - // currently expressible in SPIR-V; SPIR-V passes aggregates byval, - // which the AMDGPU kernel ABI does not allow. Passing aggregates as - // direct works around this impedance mismatch, as it retains type info - // and can be correctly handled, post reverse-translation, by the AMDGPU - // BE, which has to support this CC for legacy OpenCL purposes. It can - // be brittle and does lead to performance degradation in certain - // pathological cases. This will be revisited / optimised in the future, - // once a way to deal with the byref/byval impedance mismatch is - // identified. - return ABIArgInfo::getDirect(LTy, 0, nullptr, false); // Force copying aggregate type in kernel arguments by value when // compiling CUDA targeting SPIR-V. This is required for the object // copied to be valid on the device. @@ -193,11 +202,150 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { return classifyArgumentType(Ty); } -ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const { - if (getTarget().getTriple().getVendor() != llvm::Triple::AMD) - return DefaultABIInfo::classifyArgumentType(Ty); - if (!isAggregateTypeForABI(Ty)) - return DefaultABIInfo::classifyArgumentType(Ty); +void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const { + // The logic is same as in DefaultABIInfo with an exception on the kernel + // arguments handling. + llvm::CallingConv::ID CC = FI.getCallingConvention(); + + if (!getCXXABI().classifyReturnType(FI)) + FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + + for (auto &I : FI.arguments()) { + if (CC == llvm::CallingConv::SPIR_KERNEL) { + I.info = classifyKernelArgumentType(I.type); + } else { + I.info = classifyArgumentType(I.type); + } + } +} + +unsigned AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const { + // This duplicates the AMDGPUABI computation. + unsigned NumRegs = 0; + + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Compute from the number of elements. The reported size is based on the + // in-memory size, which includes the padding 4th element for 3-vectors. + QualType EltTy = VT->getElementType(); + unsigned EltSize = getContext().getTypeSize(EltTy); + + // 16-bit element vectors should be passed as packed. + if (EltSize == 16) + return (VT->getNumElements() + 1) / 2; + + unsigned EltNumRegs = (EltSize + 31) / 32; + return EltNumRegs * VT->getNumElements(); + } + + if (const auto *RD = Ty->getAsRecordDecl()) { + assert(!RD->hasFlexibleArrayMember()); + + for (const FieldDecl *Field : RD->fields()) { + QualType FieldTy = Field->getType(); + NumRegs += numRegsForType(FieldTy); + } + + return NumRegs; + } + + return (getContext().getTypeSize(Ty) + 31) / 32; +} + +llvm::Type * +AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, + unsigned ToAS) const { + // Single value types. + auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); + if (PtrTy && PtrTy->getAddressSpace() == FromAS) + return llvm::PointerType::get(Ty->getContext(), ToAS); + return Ty; +} + +ABIArgInfo AMDGCNSPIRVABIInfo::classifyReturnType(QualType RetTy) const { + if (!isAggregateTypeForABI(RetTy) || getRecordArgABI(RetTy, getCXXABI())) + return DefaultABIInfo::classifyReturnType(RetTy); + + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), RetTy, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just return a regular value. + if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + + if (const auto *RD = RetTy->getAsRecordDecl(); + RD && RD->hasFlexibleArrayMember()) + return DefaultABIInfo::classifyReturnType(RetTy); + + // Pack aggregates <= 4 bytes into single VGPR or pair. + uint64_t Size = getContext().getTypeSize(RetTy); + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // TODO: This carried over from AMDGPU oddity, we retain it to + // ensure consistency, but it might be reasonable to return Int64. + if (Size <= 64) { + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet) + return ABIArgInfo::getDirect(); + return DefaultABIInfo::classifyReturnType(RetTy); +} + +/// For kernels all parameters are really passed in a special buffer. It doesn't +/// make sense to pass anything byval, so everything must be direct. +ABIArgInfo AMDGCNSPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const { + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: Can we omit empty structs? + + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + Ty = QualType(SeltTy, 0); + + llvm::Type *OrigLTy = CGT.ConvertType(Ty); + llvm::Type *LTy = OrigLTy; + if (getContext().getLangOpts().isTargetDevice()) { + LTy = coerceKernelArgumentType( + OrigLTy, /*FromAS=*/getContext().getTargetAddressSpace(LangAS::Default), + /*ToAS=*/getContext().getTargetAddressSpace(LangAS::opencl_global)); + } + + // FIXME: This doesn't apply the optimization of coercing pointers in structs + // to global address space when using byref. This would require implementing a + // new kind of coercion of the in-memory type when for indirect arguments. + if (LTy == OrigLTy && isAggregateTypeForABI(Ty)) { + return ABIArgInfo::getIndirectAliased( + getContext().getTypeAlignInChars(Ty), + getContext().getTargetAddressSpace(LangAS::opencl_constant), + false /*Realign*/, nullptr /*Padding*/); + } + + // TODO: inhibiting flattening is an AMDGPU workaround for Clover, which might + // be vestigial and should be revisited. + return ABIArgInfo::getDirect(LTy, 0, nullptr, false); +} + +ABIArgInfo AMDGCNSPIRVABIInfo::classifyArgumentType(QualType Ty) const { + assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow"); + + Ty = useFirstFieldIfTransparentUnion(Ty); + + // TODO: support for variadics. + + if (!isAggregateTypeForABI(Ty)) { + ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty); + if (!ArgInfo.isIndirect()) { + unsigned NumRegs = numRegsForType(Ty); + NumRegsLeft -= std::min(NumRegs, NumRegsLeft); + } + + return ArgInfo; + } // Records with non-trivial destructors/copy-constructors should not be // passed by value. @@ -205,37 +353,87 @@ ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const { return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), RAA == CGCXXABI::RAA_DirectInMemory); + // Ignore empty structs/unions. + if (isEmptyRecord(getContext(), Ty, true)) + return ABIArgInfo::getIgnore(); + + // Lower single-element structs to just pass a regular value. TODO: We + // could do reasonable-size multiple-element structs too, using getExpand(), + // though watch out for things like bitfields. + if (const Type *SeltTy = isSingleElementStruct(Ty, getContext())) + return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0))); + if (const auto *RD = Ty->getAsRecordDecl(); RD && RD->hasFlexibleArrayMember()) return DefaultABIInfo::classifyArgumentType(Ty); - return ABIArgInfo::getDirect(CGT.ConvertType(Ty), 0u, nullptr, false); + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 64) { + // Pack aggregates <= 8 bytes into single VGPR or pair. + unsigned NumRegs = (Size + 31) / 32; + NumRegsLeft -= std::min(NumRegsLeft, NumRegs); + + if (Size <= 16) + return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); + + if (Size <= 32) + return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); + + // TODO: This is an AMDGPU oddity, and might be vestigial, we retain it to + // ensure consistency, but it should be revisited. + llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2)); + } + + if (NumRegsLeft > 0) { + unsigned NumRegs = numRegsForType(Ty); + if (NumRegsLeft >= NumRegs) { + NumRegsLeft -= NumRegs; + return ABIArgInfo::getDirect(); + } + } + + // Use pass-by-reference in stead of pass-by-value for struct arguments in + // function ABI. + return ABIArgInfo::getIndirectAliased( + getContext().getTypeAlignInChars(Ty), + getContext().getTargetAddressSpace(LangAS::opencl_private)); } -void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const { - // The logic is same as in DefaultABIInfo with an exception on the kernel - // arguments handling. +void AMDGCNSPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const { llvm::CallingConv::ID CC = FI.getCallingConvention(); if (!getCXXABI().classifyReturnType(FI)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); + NumRegsLeft = MaxNumRegsForArgsRet; for (auto &I : FI.arguments()) { - if (CC == llvm::CallingConv::SPIR_KERNEL) { + if (CC == llvm::CallingConv::SPIR_KERNEL) I.info = classifyKernelArgumentType(I.type); - } else { + else I.info = classifyArgumentType(I.type); - } } } +llvm::FixedVectorType *AMDGCNSPIRVABIInfo::getOptimalVectorMemoryType( + llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const { + // AMDGPU has legal instructions for 96-bit so 3x32 can be supported. + if (Ty->getNumElements() == 3 && getDataLayout().getTypeSizeInBits(Ty) == 96) + return Ty; + return DefaultABIInfo::getOptimalVectorMemoryType(Ty, LangOpt); +} + namespace clang { namespace CodeGen { void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) { - if (CGM.getTarget().getTriple().isSPIRV()) - SPIRVABIInfo(CGM.getTypes()).computeInfo(FI); - else + if (CGM.getTarget().getTriple().isSPIRV()) { + if (CGM.getTarget().getTriple().getVendor() == llvm::Triple::AMD) + AMDGCNSPIRVABIInfo(CGM.getTypes()).computeInfo(FI); + else + SPIRVABIInfo(CGM.getTypes()).computeInfo(FI); + } else { CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI); + } } } } diff --git a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu index a48affaec3c8a..bf45a353851b4 100644 --- a/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu +++ b/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu @@ -95,7 +95,7 @@ __global__ void kernel1(int *x) { // CHECK-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr [[X_ASCAST]], align 8 // CHECK-NEXT: [[X1:%.*]] = load ptr, ptr [[X_ASCAST]], align 8 // CHECK-NEXT: store ptr [[X1]], ptr [[X_ADDR_ASCAST]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR_ASCAST]], align 8, !nonnull [[META4:![0-9]+]], !align [[META5:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 @@ -111,7 +111,7 @@ __global__ void kernel1(int *x) { // CHECK-SPIRV-NEXT: store ptr addrspace(1) [[X_COERCE]], ptr addrspace(4) [[X_ASCAST]], align 8 // CHECK-SPIRV-NEXT: [[X1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ASCAST]], align 8 // CHECK-SPIRV-NEXT: store ptr addrspace(4) [[X1]], ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 -// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X_ADDR_ASCAST]], align 8, !align [[META6:![0-9]+]] // CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 // CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 @@ -302,28 +302,23 @@ struct S { // CHECK-NEXT: ret void // // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( -// CHECK-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// CHECK-SPIRV-SAME: ptr addrspace(2) noundef byref([[STRUCT_S:%.*]]) align 8 [[TMP0:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] -// CHECK-SPIRV-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 8 -// CHECK-SPIRV-NEXT: [[S1:%.*]] = addrspacecast ptr [[S]] to ptr addrspace(4) -// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 -// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 -// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 -// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 -// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 -// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP2]], align 8 -// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 0 -// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 -// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP4]], i64 0 -// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 -// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-SPIRV-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-SPIRV-NEXT: [[S:%.*]] = addrspacecast ptr [[COERCE]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: call addrspace(4) void @llvm.memcpy.p4.p2.i64(ptr addrspace(4) align 8 [[S]], ptr addrspace(2) align 8 [[TMP0]], i64 16, i1 false) +// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr addrspace(4) [[TMP1]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[ARRAYIDX]], align 4 +// CHECK-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[ARRAYIDX]], align 4 -// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S1]], i32 0, i32 1 -// CHECK-SPIRV-NEXT: [[TMP6:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 -// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP6]], i64 0 -// CHECK-SPIRV-NEXT: [[TMP7:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 -// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP7]], 1.000000e+00 -// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 +// CHECK-SPIRV-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr addrspace(4) [[S]], i32 0, i32 1 +// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[Y]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP3]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX1]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP4]], 1.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX1]], align 4 // CHECK-SPIRV-NEXT: ret void // // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel41S( @@ -343,16 +338,17 @@ struct S { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel41S( -// OPT-SPIRV-SAME: [[STRUCT_S:%.*]] [[S_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly byref([[STRUCT_S:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 0 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_S]] [[S_COERCE]], 1 -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 -// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[TMP0]], align 4 -// OPT-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4 -// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[TMP0]], align 8 +// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[TMP0]], i64 8 +// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[COERCE_SROA_2_0__SROA_IDX]], align 8 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4 +// OPT-SPIRV-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// OPT-SPIRV-NEXT: store i32 [[INC]], ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel41S( @@ -511,27 +507,25 @@ struct T { // CHECK-NEXT: ret void // // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( -// CHECK-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// CHECK-SPIRV-SAME: ptr addrspace(2) noundef byref([[STRUCT_T:%.*]]) align 8 [[TMP0:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] -// CHECK-SPIRV-NEXT: [[T:%.*]] = alloca [[STRUCT_T]], align 8 -// CHECK-SPIRV-NEXT: [[T1:%.*]] = addrspacecast ptr [[T]] to ptr addrspace(4) -// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 -// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 -// CHECK-SPIRV-NEXT: store [2 x ptr addrspace(4)] [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 -// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_T]], align 8 +// CHECK-SPIRV-NEXT: [[T:%.*]] = addrspacecast ptr [[COERCE]] to ptr addrspace(4) +// CHECK-SPIRV-NEXT: call addrspace(4) void @llvm.memcpy.p4.p2.i64(ptr addrspace(4) align 8 [[T]], ptr addrspace(2) align 8 [[TMP0]], i64 16, i1 false) +// CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T]], i32 0, i32 0 // CHECK-SPIRV-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X]], i64 0, i64 0 -// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8 -// CHECK-SPIRV-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP2]], i64 0 -// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX2]], align 4 -// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 1.000000e+00 -// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX2]], align 4 -// CHECK-SPIRV-NEXT: [[X3:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T1]], i32 0, i32 0 -// CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X3]], i64 0, i64 1 -// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX4]], align 8 -// CHECK-SPIRV-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP4]], i64 0 -// CHECK-SPIRV-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX5]], align 4 -// CHECK-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP5]], 2.000000e+00 -// CHECK-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[ARRAYIDX5]], align 4 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP1]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX1]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 1.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[ARRAYIDX1]], align 4 +// CHECK-SPIRV-NEXT: [[X2:%.*]] = getelementptr inbounds nuw [[STRUCT_T]], ptr addrspace(4) [[T]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x ptr addrspace(4)], ptr addrspace(4) [[X2]], i64 0, i64 1 +// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ARRAYIDX3]], align 8 +// CHECK-SPIRV-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr addrspace(4) [[TMP3]], i64 0 +// CHECK-SPIRV-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(4) [[ARRAYIDX4]], align 4 +// CHECK-SPIRV-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP4]], 2.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD5]], ptr addrspace(4) [[ARRAYIDX4]], align 4 // CHECK-SPIRV-NEXT: ret void // // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel61T( @@ -551,17 +545,17 @@ struct T { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel61T( -// OPT-SPIRV-SAME: [[STRUCT_T:%.*]] [[T_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(2) noundef readonly byref([[STRUCT_T:%.*]]) align 8 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_T]] [[T_COERCE]], 0 -// OPT-SPIRV-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 0 -// OPT-SPIRV-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x ptr addrspace(4)] [[TMP0]], 1 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 +// OPT-SPIRV-NEXT: [[COERCE_SROA_0_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[TMP0]], align 8 +// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(2) [[TMP0]], i64 8 +// OPT-SPIRV-NEXT: [[COERCE_SROA_2_0_COPYLOAD:%.*]] = load ptr addrspace(4), ptr addrspace(2) [[COERCE_SROA_2_0__SROA_IDX]], align 8 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4 // OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 1.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[DOTFCA_0_EXTRACT]], align 4 -// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 -// OPT-SPIRV-NEXT: [[ADD6:%.*]] = fadd contract float [[TMP2]], 2.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD6]], ptr addrspace(4) [[DOTFCA_1_EXTRACT]], align 4 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[COERCE_SROA_0_0_COPYLOAD]], align 4 +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4 +// OPT-SPIRV-NEXT: [[ADD5:%.*]] = fadd contract float [[TMP2]], 2.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD5]], ptr addrspace(4) [[COERCE_SROA_2_0_COPYLOAD]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel61T( @@ -677,18 +671,17 @@ struct SS { // CHECK-NEXT: ret void // // CHECK-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( -// CHECK-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// CHECK-SPIRV-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // CHECK-SPIRV-NEXT: [[ENTRY:.*:]] -// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS]], align 8 +// CHECK-SPIRV-NEXT: [[A:%.*]] = alloca [[STRUCT_SS:%.*]], align 8 // CHECK-SPIRV-NEXT: [[A1:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 -// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 -// CHECK-SPIRV-NEXT: store ptr addrspace(4) [[TMP1]], ptr addrspace(4) [[TMP0]], align 8 +// CHECK-SPIRV-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 +// CHECK-SPIRV-NEXT: store ptr addrspace(1) [[A_COERCE]], ptr addrspace(4) [[COERCE_DIVE]], align 8 // CHECK-SPIRV-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_SS]], ptr addrspace(4) [[A1]], i32 0, i32 0 -// CHECK-SPIRV-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 -// CHECK-SPIRV-NEXT: [[TMP3:%.*]] = load float, ptr addrspace(4) [[TMP2]], align 4 -// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP3]], 3.000000e+00 -// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP2]], align 4 +// CHECK-SPIRV-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[X]], align 8 +// CHECK-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 +// CHECK-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 +// CHECK-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4 // CHECK-SPIRV-NEXT: ret void // // OPT-LABEL: define dso_local amdgpu_kernel void @_Z7kernel82SS( @@ -700,12 +693,13 @@ struct SS { // OPT-NEXT: ret void // // OPT-SPIRV-LABEL: define spir_kernel void @_Z7kernel82SS( -// OPT-SPIRV-SAME: [[STRUCT_SS:%.*]] [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { +// OPT-SPIRV-SAME: ptr addrspace(1) [[A_COERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META5]] { // OPT-SPIRV-NEXT: [[ENTRY:.*:]] -// OPT-SPIRV-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_SS]] [[A_COERCE]], 0 -// OPT-SPIRV-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(4) [[TMP0]], align 4 -// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP1]], 3.000000e+00 -// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP0]], align 4 +// OPT-SPIRV-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(1) [[A_COERCE]] to i64 +// OPT-SPIRV-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// OPT-SPIRV-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[TMP1]], align 4 +// OPT-SPIRV-NEXT: [[ADD:%.*]] = fadd contract float [[TMP2]], 3.000000e+00 +// OPT-SPIRV-NEXT: store float [[ADD]], ptr addrspace(4) [[TMP1]], align 4 // OPT-SPIRV-NEXT: ret void // // HOST-LABEL: define dso_local void @_Z22__device_stub__kernel82SS( @@ -727,7 +721,11 @@ __global__ void kernel8(struct SS a) { *a.x += 3.f; } //. +// CHECK: [[META4]] = !{} +// CHECK: [[META5]] = !{i64 4} +//. // CHECK-SPIRV: [[META5]] = !{i32 1024, i32 1, i32 1} +// CHECK-SPIRV: [[META6]] = !{i64 4} //. // OPT: [[META4]] = !{} //. diff --git a/clang/test/CodeGenCUDA/kernel-args.cu b/clang/test/CodeGenCUDA/kernel-args.cu index 8d17d89b315de..386fb8f2bfd11 100644 --- a/clang/test/CodeGenCUDA/kernel-args.cu +++ b/clang/test/CodeGenCUDA/kernel-args.cu @@ -12,7 +12,7 @@ struct A { }; // AMDGCN: define{{.*}} amdgpu_kernel void @_Z6kernel1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}) -// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(%struct.A %{{.+}}) +// AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z6kernel1A(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}}) // NVPTX: define{{.*}} void @_Z6kernel1A(ptr noundef byval(%struct.A) align 8 %x) __global__ void kernel(A x) { } @@ -20,7 +20,7 @@ __global__ void kernel(A x) { class Kernel { public: // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}}) - // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(%struct.A %{{.+}}) + // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel12memberKernelE1A(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}}) // NVPTX: define{{.*}} void @_ZN6Kernel12memberKernelE1A(ptr noundef byval(%struct.A) align 8 %x) static __global__ void memberKernel(A x){} template<typename T> static __global__ void templateMemberKernel(T x) {} @@ -35,12 +35,12 @@ void launch(void*); void test() { Kernel K; // AMDGCN: define{{.*}} amdgpu_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}} - // AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(%struct.A %{{.+}}) + // AMDGCNSPIRV: define{{.*}} spir_kernel void @_Z14templateKernelI1AEvT_(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}}) // NVPTX: define{{.*}} void @_Z14templateKernelI1AEvT_(ptr noundef byval(%struct.A) align 8 %x) launch((void*)templateKernel<A>); // AMDGCN: define{{.*}} amdgpu_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(4) noundef byref(%struct.A) align 8 %{{.+}} - // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(%struct.A %{{.+}} + // AMDGCNSPIRV: define{{.*}} spir_kernel void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr addrspace(2) noundef byref(%struct.A) align 8 %{{.+}} // NVPTX: define{{.*}} void @_ZN6Kernel20templateMemberKernelI1AEEvT_(ptr noundef byval(%struct.A) align 8 %x) launch((void*)Kernel::templateMemberKernel<A>); } diff --git a/clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp b/clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp new file mode 100644 index 0000000000000..8f92d1fed1f9f --- /dev/null +++ b/clang/test/CodeGenHIP/amdgcnspirv-uses-amdgpu-abi.cpp @@ -0,0 +1,321 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -O3 \ +// RUN: -o - %s | FileCheck --check-prefix=AMDGCNSPIRV %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -target-cpu gfx906 -emit-llvm -fcuda-is-device -O3 \ +// RUN: -o - %s | FileCheck --check-prefix=AMDGPU %s + +#define __global__ __attribute__((global)) +#define __device__ __attribute__((device)) + +union Transparent { unsigned x; }; +using V1 = unsigned __attribute__((ext_vector_type(1))); +using V2 = unsigned __attribute__((ext_vector_type(2))); +using V3 = unsigned __attribute__((ext_vector_type(3))); +using V4 = unsigned __attribute__((ext_vector_type(4))); +struct SingleElement { unsigned x; }; +struct ByRef { unsigned x[17]; }; + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k0s( +// AMDGCNSPIRV-SAME: i16 noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0:[0-9]+]] !max_work_group_size [[META9:![0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k0s( +// AMDGPU-SAME: i16 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k0(short) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k1j( +// AMDGCNSPIRV-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k1j( +// AMDGPU-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k1(unsigned) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k2d( +// AMDGCNSPIRV-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k2d( +// AMDGPU-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k2(double) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k311Transparent( +// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k311Transparent( +// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k3(Transparent) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k413SingleElement( +// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k413SingleElement( +// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k4(SingleElement) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k55ByRef( +// AMDGCNSPIRV-SAME: ptr addrspace(2) noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k55ByRef( +// AMDGPU-SAME: ptr addrspace(4) noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k5(ByRef) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k6Dv1_jDv2_jDv3_jDv4_j( +// AMDGCNSPIRV-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k6Dv1_jDv2_jDv3_jDv4_j( +// AMDGPU-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k6(V1, V2, V3, V4) { } + +// AMDGCNSPIRV-LABEL: define spir_kernel void @_Z2k7Pj( +// AMDGCNSPIRV-SAME: ptr addrspace(1) noundef readnone captures(none) [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR0]] !max_work_group_size [[META9]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local amdgpu_kernel void @_Z2k7Pj( +// AMDGPU-SAME: ptr addrspace(1) noundef readnone captures(none) [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR0]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__global__ void k7(unsigned*) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f0s( +// AMDGCNSPIRV-SAME: i16 noundef signext [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f0s( +// AMDGPU-SAME: i16 noundef signext [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f0(short) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f1j( +// AMDGCNSPIRV-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f1j( +// AMDGPU-SAME: i32 noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f1(unsigned) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f2d( +// AMDGCNSPIRV-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f2d( +// AMDGPU-SAME: double noundef [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f2(double) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f311Transparent( +// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f311Transparent( +// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f3(Transparent) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f413SingleElement( +// AMDGCNSPIRV-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f413SingleElement( +// AMDGPU-SAME: i32 [[DOTCOERCE:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f4(SingleElement) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f55ByRef( +// AMDGCNSPIRV-SAME: ptr noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f55ByRef( +// AMDGPU-SAME: ptr addrspace(5) noundef readnone byref([[STRUCT_BYREF:%.*]]) align 4 captures(none) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f5(ByRef) { } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z2f6Dv1_jDv2_jDv3_jDv4_j( +// AMDGCNSPIRV-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z2f6Dv1_jDv2_jDv3_jDv4_j( +// AMDGPU-SAME: <1 x i32> noundef [[TMP0:%.*]], <2 x i32> noundef [[TMP1:%.*]], <3 x i32> noundef [[TMP2:%.*]], <4 x i32> noundef [[TMP3:%.*]]) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret void +// +__device__ void f6(V1, V2, V3, V4) { } + +// AMDGCNSPIRV-LABEL: define spir_func noundef signext i16 @_Z2f7v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret i16 0 +// +// AMDGPU-LABEL: define dso_local noundef signext i16 @_Z2f7v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret i16 0 +// +__device__ short f7() { return 0; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z2f8v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret i32 0 +// +// AMDGPU-LABEL: define dso_local noundef i32 @_Z2f8v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret i32 0 +// +__device__ unsigned f8() { return 0; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef double @_Z2f9v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret double 0.000000e+00 +// +// AMDGPU-LABEL: define dso_local noundef double @_Z2f9v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret double 0.000000e+00 +// +__device__ double f9() { return 0.; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z3f10v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret i32 0 +// +// AMDGPU-LABEL: define dso_local noundef i32 @_Z3f10v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret i32 0 +// +__device__ Transparent f10() { return {}; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef i32 @_Z3f11v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret i32 0 +// +// AMDGPU-LABEL: define dso_local noundef i32 @_Z3f11v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret i32 0 +// +__device__ SingleElement f11() { return {}; } + +// AMDGCNSPIRV-LABEL: define spir_func void @_Z3f12v( +// AMDGCNSPIRV-SAME: ptr dead_on_unwind noalias writable writeonly sret([[STRUCT_BYREF:%.*]]) align 4 captures(none) initializes((0, 68)) [[AGG_RESULT:%.*]]) local_unnamed_addr addrspace(4) #[[ATTR2:[0-9]+]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: tail call addrspace(4) void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(68) [[AGG_RESULT]], i8 0, i64 68, i1 false) +// AMDGCNSPIRV-NEXT: ret void +// +// AMDGPU-LABEL: define dso_local void @_Z3f12v( +// AMDGPU-SAME: ptr addrspace(5) dead_on_unwind noalias writable writeonly sret([[STRUCT_BYREF:%.*]]) align 4 captures(none) initializes((0, 68)) [[AGG_RESULT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: tail call void @llvm.memset.p5.i64(ptr addrspace(5) noundef align 4 dereferenceable(68) [[AGG_RESULT]], i8 0, i64 68, i1 false) +// AMDGPU-NEXT: ret void +// +__device__ ByRef f12() { return {}; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef <1 x i32> @_Z3f13v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret <1 x i32> zeroinitializer +// +// AMDGPU-LABEL: define dso_local noundef <1 x i32> @_Z3f13v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret <1 x i32> zeroinitializer +// +__device__ V1 f13() { return {}; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef <2 x i32> @_Z3f14v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret <2 x i32> zeroinitializer +// +// AMDGPU-LABEL: define dso_local noundef <2 x i32> @_Z3f14v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret <2 x i32> zeroinitializer +// +__device__ V2 f14() { return {}; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef <3 x i32> @_Z3f15v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret <3 x i32> zeroinitializer +// +// AMDGPU-LABEL: define dso_local noundef <3 x i32> @_Z3f15v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret <3 x i32> zeroinitializer +// +__device__ V3 f15() { return {}; } + +// AMDGCNSPIRV-LABEL: define spir_func noundef <4 x i32> @_Z3f16v( +// AMDGCNSPIRV-SAME: ) local_unnamed_addr addrspace(4) #[[ATTR1]] { +// AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] +// AMDGCNSPIRV-NEXT: ret <4 x i32> zeroinitializer +// +// AMDGPU-LABEL: define dso_local noundef <4 x i32> @_Z3f16v( +// AMDGPU-SAME: ) local_unnamed_addr #[[ATTR1]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: ret <4 x i32> zeroinitializer +// +__device__ V4 f16() { return {}; } +//. +// AMDGCNSPIRV: [[META9]] = !{i32 1024, i32 1, i32 1} +//. diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index dd57b74d79a5e..c4adba3a137c0 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -374,7 +374,10 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, buildOpDecorate(VRegs[i][0], MIRBuilder, SPIRV::Decoration::FuncParamAttr, {Attr}); } - if (Arg.hasAttribute(Attribute::ByVal)) { + if (Arg.hasAttribute(Attribute::ByVal) || + (Arg.hasAttribute(Attribute::ByRef) && + F.getParent()->getTargetTriple().getVendor() == + Triple::VendorType::AMD)) { auto Attr = static_cast<unsigned>(SPIRV::FunctionParameterAttribute::ByVal); buildOpDecorate(VRegs[i][0], MIRBuilder, diff --git a/llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll b/llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll new file mode 100644 index 0000000000000..1712ddbb9bda5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/ptr-argument-byref-amdgcnspirv.ll @@ -0,0 +1,24 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck --check-prefixes=CHECK,SPIRV %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-amd-amdhsa %s -o - | FileCheck --check-prefixes=CHECK,AMDGCNSPIRV %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-amd-amdhsa %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpName %[[#XKER:]] "x" +; CHECK-DAG: OpName %[[#XFN:]] "x" +; SPIRV-NOT: OpDecorate %[[#XKER]] FuncParamAttr ByVal +; AMDGCNSPIRV: OpDecorate %[[#XKER]] FuncParamAttr ByVal +; SPIRV-NOT: OpDecorate %[[#XFN]] FuncParamAttr ByVal +; AMDGCNSPIRV: OpDecorate %[[#XFN]] FuncParamAttr ByVal + +%struct.S = type { i32 } +%struct.SS = type { [7 x %struct.S] } + +define spir_kernel void @ker(ptr addrspace(2) noundef byref(%struct.SS) %x) { +entry: + ret void +} + +define spir_func void @fn(ptr noundef byref(%struct.SS) %x) { +entry: + ret void +} \ No newline at end of file >From 5b256a5ada23dfbf7e9b17d4ccacc81b334e5de5 Mon Sep 17 00:00:00 2001 From: Alex Voicu <[email protected]> Date: Fri, 28 Nov 2025 03:05:35 +0000 Subject: [PATCH 2/2] Fix formatting. --- clang/lib/CodeGen/Targets/SPIR.cpp | 12 +++++++----- llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index 3540093074bfe..2134e158a663d 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -67,12 +67,14 @@ class AMDGCNSPIRVABIInfo : public SPIRVABIInfo { ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyKernelArgumentType(QualType Ty) const; ABIArgInfo classifyArgumentType(QualType Ty) const; + public: AMDGCNSPIRVABIInfo(CodeGenTypes &CGT) : SPIRVABIInfo(CGT) {} void computeInfo(CGFunctionInfo &FI) const override; - llvm::FixedVectorType *getOptimalVectorMemoryType( - llvm::FixedVectorType *Ty, const LangOptions &LangOpt) const override; + llvm::FixedVectorType * + getOptimalVectorMemoryType(llvm::FixedVectorType *Ty, + const LangOptions &LangOpt) const override; }; } // end anonymous namespace namespace { @@ -251,9 +253,9 @@ unsigned AMDGCNSPIRVABIInfo::numRegsForType(QualType Ty) const { return (getContext().getTypeSize(Ty) + 31) / 32; } -llvm::Type * -AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS, - unsigned ToAS) const { +llvm::Type *AMDGCNSPIRVABIInfo::coerceKernelArgumentType(llvm::Type *Ty, + unsigned FromAS, + unsigned ToAS) const { // Single value types. auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty); if (PtrTy && PtrTy->getAddressSpace() == FromAS) diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index c4adba3a137c0..bcaa3c4c66d76 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -377,7 +377,7 @@ bool SPIRVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, if (Arg.hasAttribute(Attribute::ByVal) || (Arg.hasAttribute(Attribute::ByRef) && F.getParent()->getTargetTriple().getVendor() == - Triple::VendorType::AMD)) { + Triple::VendorType::AMD)) { auto Attr = static_cast<unsigned>(SPIRV::FunctionParameterAttribute::ByVal); buildOpDecorate(VRegs[i][0], MIRBuilder, _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
