https://github.com/AlexVlx updated https://github.com/llvm/llvm-project/pull/179492
>From edf668446bdf50c27f8ec01ada9f7ab67157083f Mon Sep 17 00:00:00 2001 From: Alex Voicu <[email protected]> Date: Tue, 3 Feb 2026 16:19:51 +0000 Subject: [PATCH 1/4] Add `wave_id` and `wave_shuffle` Clang builtins. --- clang/include/clang/Basic/BuiltinsAMDGPU.td | 9 +++++++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 4 +++ clang/lib/Sema/SemaAMDGPU.cpp | 25 +++++++++++++++++++ clang/lib/Sema/SemaChecking.cpp | 4 ++- .../CodeGenOpenCL/builtins-amdgcn-gfx12.cl | 16 ++++++++++++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 21 ++++++++++++++++ 6 files changed, 78 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 1950757097fc6..a9acc1544ad53 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -67,6 +67,8 @@ def __builtin_amdgcn_mbcnt_lo : AMDGPUBuiltin<"unsigned int(unsigned int, unsign def __builtin_amdgcn_s_memtime : AMDGPUBuiltin<"uint64_t()", [], "s-memtime-inst">; +def __builtin_amdgcn_wave_id : AMDGPUBuiltin<"int32_t()", [Const], "architected-sgprs">; + //===----------------------------------------------------------------------===// // Instruction builtins. //===----------------------------------------------------------------------===// @@ -413,6 +415,13 @@ def __builtin_amdgcn_wave_reduce_fsub_f64 : AMDGPUBuiltin<"double(double, _Const def __builtin_amdgcn_wave_reduce_fmin_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>; def __builtin_amdgcn_wave_reduce_fmax_f64 : AMDGPUBuiltin<"double(double, _Constant int32_t)", [Const]>; +//===----------------------------------------------------------------------===// +// Wave Shuffle builtins. +//===----------------------------------------------------------------------===// + +// This is an overloaded builtin modelled after the atomic ones +def __builtin_amdgcn_wave_shuffle : AMDGPUBuiltin<"void(...)", [Const, CustomTypeChecking]>; + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index a096ed27a788e..619c9b4be9090 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -449,6 +449,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()}); return Builder.CreateCall(F, {Value, Strategy}); } + case AMDGPU::BI__builtin_amdgcn_wave_shuffle: + // TODO: can we unify this with wave_reduce? + return emitBuiltinWithOneOverloadedType<2>(*this, E, + Intrinsic::amdgcn_wave_shuffle); case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 4261e1849133f..d5403f22eb7bb 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -296,6 +296,31 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, } return false; } + case AMDGPU::BI__builtin_amdgcn_wave_shuffle: { + Expr *Val = TheCall->getArg(0); + QualType ValTy = Val->getType(); + + if ((!ValTy->isIntegerType() && !ValTy->isFloatingType()) || + SemaRef.getASTContext().getTypeSize(ValTy) > 32) + return Diag(Val->getExprLoc(), diag::err_builtin_invalid_arg_type) + << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2 + << ValTy; + + Expr *Idx = TheCall->getArg(1); + QualType IdxTy = Idx->getType(); + if (!IdxTy->isIntegerType()) + return Diag(Idx->getExprLoc(), diag::err_typecheck_expect_int) << IdxTy; + if (SemaRef.getASTContext().getTypeSize(IdxTy) > 32) + return Diag(Idx->getExprLoc(), diag::err_builtin_invalid_arg_type) + << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0 + << IdxTy; + + // Resolve the overload here, now that we know that the invocation is + // correct: the intrinsic returns the type of the value argument. + TheCall->setType(ValTy); + + return false; + } default: return false; } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index e2e1b37572364..9858264aa042d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2100,8 +2100,10 @@ bool Sema::CheckTSBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case llvm::Triple::spirv: case llvm::Triple::spirv32: case llvm::Triple::spirv64: - if (TI.getTriple().getOS() != llvm::Triple::OSType::AMDHSA) + if (TI.getTriple().getVendor() != llvm::Triple::VendorType::AMD) return SPIRV().CheckSPIRVBuiltinFunctionCall(TI, BuiltinID, TheCall); + else + return AMDGPU().CheckAMDGCNBuiltinFunctionCall(BuiltinID, TheCall); return false; case llvm::Triple::systemz: return SystemZ().CheckSystemZBuiltinFunctionCall(BuiltinID, TheCall); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl index 8c02616780182..d39c4180178ad 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl @@ -317,3 +317,19 @@ void test_ds_bpermute_fi_b32(global int* out, int a, int b) { *out = __builtin_amdgcn_ds_bpermute_fi_b32(a, b); } + +__attribute__((target("architected-sgprs"))) +// CHECK-LABEL: @test_wave_id( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.wave.id() +// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP0]], ptr addrspace(1) [[TMP1]], align 4 +// CHECK-NEXT: ret void +// +void test_wave_id(global int* out) +{ + *out = __builtin_amdgcn_wave_id(); +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index 376105cb6594c..4755cd32a2e2c 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -937,6 +937,27 @@ void test_wave_reduce_max_u64_dpp(global int* out, long in) *out = __builtin_amdgcn_wave_reduce_max_u64(in, 2); } +// CHECK-LABEL: @test_wave_shuffle_u32 +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.shuffle.i32 +void test_wave_shuffle_u32(global unsigned* out, unsigned in, int idx) +{ + *out = __builtin_amdgcn_wave_shuffle(in, idx); +} + +// CHECK-LABEL: @test_wave_shuffle_i32 +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.shuffle.i32 +void test_wave_shuffle_i32(global int* out, int in, int idx) +{ + *out = __builtin_amdgcn_wave_shuffle(in, idx); +} + +// CHECK-LABEL: @test_wave_shuffle_f32 +// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.wave.shuffle.f32 +void test_wave_shuffle_f32(global float* out, float in, int idx) +{ + *out = __builtin_amdgcn_wave_shuffle(in, idx); +} + // CHECK-LABEL: @test_s_barrier // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier( void test_s_barrier() >From deb0a0d3e8f39604aeca1c5fc148b90f6a93d4ba Mon Sep 17 00:00:00 2001 From: Alex Voicu <[email protected]> Date: Tue, 3 Feb 2026 16:25:23 +0000 Subject: [PATCH 2/4] Fix formatting. --- clang/lib/Sema/SemaAMDGPU.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index d5403f22eb7bb..8fce0a56bc4f9 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -303,8 +303,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, if ((!ValTy->isIntegerType() && !ValTy->isFloatingType()) || SemaRef.getASTContext().getTypeSize(ValTy) > 32) return Diag(Val->getExprLoc(), diag::err_builtin_invalid_arg_type) - << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2 - << ValTy; + << Val << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/2 + << ValTy; Expr *Idx = TheCall->getArg(1); QualType IdxTy = Idx->getType(); @@ -312,8 +312,8 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, return Diag(Idx->getExprLoc(), diag::err_typecheck_expect_int) << IdxTy; if (SemaRef.getASTContext().getTypeSize(IdxTy) > 32) return Diag(Idx->getExprLoc(), diag::err_builtin_invalid_arg_type) - << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0 - << IdxTy; + << Idx << /*scalar=*/1 << /*'int'=*/4 << /*floating point=*/0 + << IdxTy; // Resolve the overload here, now that we know that the invocation is // correct: the intrinsic returns the type of the value argument. >From 2dc9e3b4f5a5b8209768e60d11f7138956d5c13c Mon Sep 17 00:00:00 2001 From: Alex Voicu <[email protected]> Date: Tue, 3 Feb 2026 17:28:21 +0000 Subject: [PATCH 3/4] Add tests for Sema failure + missing update. --- clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl | 2 ++ clang/test/SemaOpenCL/builtins-amdgcn-error.cl | 7 +++++++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 + 3 files changed, 10 insertions(+) diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl index 34887a65021c3..09afb7bc12017 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl @@ -13,4 +13,6 @@ typedef unsigned int uint; void test(global uint* out, uint a, uint b, uint c) { *out = __builtin_amdgcn_permlane16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlane16_var' needs target feature gfx12-insts}} *out = __builtin_amdgcn_permlanex16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16_var' needs target feature gfx12-insts}} + (void)__builtin_amdgcn_wave_id(); // expected-error {{'__builtin_amdgcn_wave_id' needs target feature architected-sgprs}} } + diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl index eb1a86bdcdeb0..12b9645463f3a 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error.cl @@ -229,3 +229,10 @@ void test_atomic_dec64() { __INT64_TYPE__ signedVal = 15; signedVal = __builtin_amdgcn_atomic_dec64(&signedVal, signedVal, __ATOMIC_ACQUIRE, ""); // expected-warning {{passing '__private long *' to parameter of type 'volatile __private unsigned long *' converts between pointers to integer types with different sign}} } + +void test_wave_shuffle(double d, int i, long long lli) { + struct S { int x; } s; + int x = __builtin_amdgcn_wave_shuffle(lli, i); // expected-error {{'lli' argument must be a scalar 'int' or 16 or 32 bit floating-point type (was '__private long long')}} + int y = __builtin_amdgcn_wave_shuffle(i, lli); // expected-error {{'lli' argument must be a scalar 'int' type (was '__private long long')}} + float z = __builtin_amdgcn_wave_shuffle(s, i); // expected-error {{'s' argument must be a scalar 'int' or 16 or 32 bit floating-point type (was '__private struct S')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index a8eba9ed126b7..cff37bb42965a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -3140,6 +3140,7 @@ def int_amdgcn_ds_read_tr16_b64 : AMDGPULoadIntrinsic<local_ptr_ty>; // i32 @llvm.amdgcn.wave.id() def int_amdgcn_wave_id : + ClangBuiltin<"__builtin_amdgcn_wave_id">, DefaultAttrsIntrinsic<[llvm_i32_ty], [], [NoUndef<RetIndex>, IntrNoMem, IntrSpeculatable]>; def int_amdgcn_s_prefetch_data : >From 223d63923954f109e105b84e1372d5f4fe14c6cd Mon Sep 17 00:00:00 2001 From: Alex Voicu <[email protected]> Date: Tue, 3 Feb 2026 18:48:22 +0000 Subject: [PATCH 4/4] Remove stray newline. --- clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl index 09afb7bc12017..f1736fdfc9086 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx12.cl @@ -15,4 +15,3 @@ void test(global uint* out, uint a, uint b, uint c) { *out = __builtin_amdgcn_permlanex16_var(a, b, c, 1, 1); // expected-error {{'__builtin_amdgcn_permlanex16_var' needs target feature gfx12-insts}} (void)__builtin_amdgcn_wave_id(); // expected-error {{'__builtin_amdgcn_wave_id' needs target feature architected-sgprs}} } - _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
