llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Alex Voicu (AlexVlx) <details> <summary>Changes</summary> This exposes Clang builtins corresponding to the `amdgcn.wave.shuffle` and `amdgcn.wave.id` LLVM intrinsics. The shuffle implementation is a bit more verbose, since we're using an overloaded interface as opposed to having multiple suffixed standalone variants, contrary to what wave_reduce chose to do. --- Full diff: https://github.com/llvm/llvm-project/pull/179492.diff 6 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.td (+9) - (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+4) - (modified) clang/lib/Sema/SemaAMDGPU.cpp (+25) - (modified) clang/lib/Sema/SemaChecking.cpp (+3-1) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+16) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn.cl (+21) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 1950757097fc6..a9acc1544ad53 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -67,6 +67,8 @@ def __builtin_amdgcn_mbcnt_lo : AMDGPUBuiltin<"unsigned int(unsigned int, unsign def __builtin_amdgcn_s_memtime : AMDGPUBuiltin<"uint64_t()", [], "s-memtime-inst">; +def __builtin_amdgcn_wave_id : AMDGPUBuiltin<"int32_t()", [Const], "architected-sgprs">; + //===----------------------------------------------------------------------===// // Instruction builtins. //===----------------------------------------------------------------------===// @@ -413,6 +415,13 @@ def __builtin_amdgcn_wave_reduce_fsub_f64 : AMDGPUBuiltin<"double(double, _Const def __builtin_amdgcn_wave_reduce_fmin_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>; def __builtin_amdgcn_wave_reduce_fmax_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>; +//===----------------------------------------------------------------------===// +// Wave Shuffle builtins. +//===----------------------------------------------------------------------===// + +// This is an overloaded builtin modelled after the atomic ones +def __builtin_amdgcn_wave_shuffle : AMDGPUBuiltin<"void(...)", [Const, CustomTypeChecking]>; + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index a096ed27a788e..619c9b4be9090 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -449,6 +449,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()}); return Builder.CreateCall(F, {Value, Strategy}); } + case AMDGPU::BI__builtin_amdgcn_wave_shuffle: + // TODO: can we unify this with wave_reduce? + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_wave_shuffle); case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 4261e1849133f..d5403f22eb7bb 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -296,6 +296,31 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, } return false; } + case AMDGPU::BI__builtin_amdgcn_wave_shuffle: { + Expr *Val = TheCall->getArg(0); + QualType ValTy = Val->getType(); + + if ((!ValTy->isIntegerType() && !ValTy->isFloatingType()) || + SemaRef.getASTContext().getTypeSize(ValTy) > 32) + return Diag(Val->getExprLoc(), diag::err_builtin_invalid_arg_type) + << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2 + << ValTy; + + Expr *Idx = TheCall->getArg(1); + QualType IdxTy = Idx->getType(); + if (!IdxTy->isIntegerType()) + return Diag(Idx->getExprLoc(), diag::err_typecheck_expect_int) << IdxTy; + if (SemaRef.getASTContext().getTypeSize(IdxTy) > 32) + return Diag(Idx->getExprLoc(), diag::err_builtin_invalid_arg_type) + << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0 + << IdxTy; + + // Resolve the overload here, now that we know that the invocation is + // correct: the intrinsic returns the type of the value argument. + TheCall->setType(ValTy); + + return false; + } default: return false; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e2e1b37572364..9858264aa042d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2100,8 +2100,10 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case llvm::Triple::spirv: case llvm::Triple::spirv32: case llvm::Triple::spirv64: - if (TI.getTriple().getOS() != llvm::Triple::OSType::AMDHSA) + if (TI.getTriple().getVendor() != llvm::Triple::VendorType::AMD) return SPIRV().CheckSPIRVBuiltinFunctionCall(TI, BuiltinID, TheCall); + else + return AMDGPU().CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall); return false; case llvm::Triple::systemz: return SystemZ().CheckSystemZBuiltinFunctionCall(BuiltinID, TheCall); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl index 8c02616780182..d39c4180178ad 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl @@ -317,3 +317,19 @@ void test_ds_bpermute_fi_b32(global int* out, int a, int b) { *out = __builtin_amdgcn_ds_bpermute_fi_b32(a, b); } + +__attribute__((target("architected-sgprs"))) +// CHECK-LABEL: @test_wave_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.wave.id() +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4 +// CHECK-NEXT: ret void +// +void test_wave_id(global int* out) +{ + *out = __builtin_amdgcn_wave_id(); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 376105cb6594c..4755cd32a2e2c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -937,6 +937,27 @@ void test_wave_reduce_max_u64_dpp(global int* out, long in) *out = __builtin_amdgcn_wave_reduce_max_u64(in, 2); } +// CHECK-LABEL: @test_wave_shuffle_u32 +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.shuffle.i32 +void test_wave_shuffle_u32(global unsigned* out, unsigned in, int idx) +{ + *out = __builtin_amdgcn_wave_shuffle(in, idx); +} + +// CHECK-LABEL: @test_wave_shuffle_i32 +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.shuffle.i32 +void test_wave_shuffle_i32(global int* out, int in, int idx) +{ + *out = __builtin_amdgcn_wave_shuffle(in, idx); +} + +// CHECK-LABEL: @test_wave_shuffle_f32 +// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.shuffle.f32 +void test_wave_shuffle_f32(global float* out, float in, int idx) +{ + *out = __builtin_amdgcn_wave_shuffle(in, idx); +} + // CHECK-LABEL: @test_s_barrier // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier( void test_s_barrier() `````````` </details> https://github.com/llvm/llvm-project/pull/179492 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
