https://github.com/vangthao95 created https://github.com/llvm/llvm-project/pull/206177
None >From e2bff039760dd11566fe672bc409db0f951b6a86 Mon Sep 17 00:00:00 2001 From: Vang Thao <[email protected]> Date: Fri, 26 Jun 2026 16:20:29 -0400 Subject: [PATCH] [AMDGPU] Add clang builtin for s_bitreplicate intrinsic --- clang/include/clang/Basic/BuiltinsAMDGPU.td | 1 + clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl | 7 +++++++ clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl | 6 ++++-- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index ccbf2f97a1313..56976970e32d4 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -378,6 +378,7 @@ def __builtin_amdgcn_perm : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned i //===----------------------------------------------------------------------===// def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "gfx9-insts">; +def __builtin_amdgcn_s_bitreplicate : AMDGPUBuiltin<"uint64_t(unsigned int)", [Const], "gfx9-insts">; def __builtin_amdgcn_global_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">; def __builtin_amdgcn_global_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<1> *, float)", [], "atomic-fadd-rtn-insts">; diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl index 87f2da20a21a6..a86e16a933728 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl @@ -26,3 +26,10 @@ void test_groupstaticsize(global uint* out) { *out = __builtin_amdgcn_groupstaticsize(); } + +// CHECK-LABEL: @test_s_bitreplicate +// CHECK: call i64 @llvm.amdgcn.s.bitreplicate(i32 %a) +void test_s_bitreplicate(global ulong* out, uint a) +{ + *out = __builtin_amdgcn_s_bitreplicate(a); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl index 7c07632aeb60b..2fb1fa086a771 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl @@ -9,9 +9,11 @@ struct S { int x; }; -void test_gfx9_fmed3h(global half *out, half a, half b, half c) +void test_gfx9_builtins(global half *out0, half a0, half b0, half c0, + global unsigned long *out1, unsigned int a1) { - *out = __builtin_amdgcn_fmed3h(a, b, c); // expected-error {{'__builtin_amdgcn_fmed3h' needs target feature gfx9-insts}} + *out0 = __builtin_amdgcn_fmed3h(a0, b0, c0); // expected-error {{'__builtin_amdgcn_fmed3h' needs target feature gfx9-insts}} + *out1 = __builtin_amdgcn_s_bitreplicate(a1); // expected-error {{'__builtin_amdgcn_s_bitreplicate' needs target feature gfx9-insts}} } void test_mov_dpp(global int* out, int src, int i, int2 i2, struct S s, float _Complex fc) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f1659f0cd803a..95dc490bf398a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2476,6 +2476,7 @@ def int_amdgcn_inverse_ballot : // Lowers to S_BITREPLICATE_B64_B32. // The argument must be uniform; otherwise, the result is undefined. def int_amdgcn_s_bitreplicate : + ClangBuiltin<"__builtin_amdgcn_s_bitreplicate">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; // Lowers to S_QUADMASK_B{32,64} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
