Author: vangthao95 Date: 2026-06-29T13:47:20-07:00 New Revision: 0304e92b1578ab796c8eaa7538a3f4573284d6fb
URL: https://github.com/llvm/llvm-project/commit/0304e92b1578ab796c8eaa7538a3f4573284d6fb DIFF: https://github.com/llvm/llvm-project/commit/0304e92b1578ab796c8eaa7538a3f4573284d6fb.diff LOG: [AMDGPU] Add clang builtin for s_bitreplicate intrinsic (#206177) Added: Modified: clang/include/clang/Basic/BuiltinsAMDGPU.td clang/include/clang/Basic/BuiltinsAMDGPUDocs.td clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl llvm/include/llvm/IR/IntrinsicsAMDGPU.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td b/clang/include/clang/Basic/BuiltinsAMDGPU.td index 27a0e9daf456a..306af993fd869 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td @@ -378,6 +378,10 @@ def __builtin_amdgcn_perm : AMDGPUBuiltin<"unsigned int(unsigned int, unsigned i //===----------------------------------------------------------------------===// def __builtin_amdgcn_fmed3h : AMDGPUBuiltin<"__fp16(__fp16, __fp16, __fp16)", [Const], "gfx9-insts">; +def __builtin_amdgcn_s_bitreplicate : AMDGPUBuiltin<"uint64_t(unsigned int)", [Const], "gfx9-insts"> { + let Documentation = [DocSBitReplicate]; + let ArgNames = ["src"]; +} def __builtin_amdgcn_global_atomic_fadd_f64 : AMDGPUBuiltin<"double(double address_space<1> *, double)", [], "gfx90a-insts">; def __builtin_amdgcn_global_atomic_fadd_f32 : AMDGPUBuiltin<"float(float address_space<1> *, float)", [], "atomic-fadd-rtn-insts">; diff --git a/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td b/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td index 5b8d14818c0c8..c8a1b5a824f53 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td +++ b/clang/include/clang/Basic/BuiltinsAMDGPUDocs.td @@ -691,3 +691,23 @@ def DocTensorStoreFromLDS_GFX1250 : Documentation { Asynchronously copies a tensor from LDS into global memory. }]; } + +//===----------------------------------------------------------------------===// +// Bit Manipulation Builtins +//===----------------------------------------------------------------------===// + +def DocCatBitManip : DocumentationCategory<"Bit Manipulation Builtins"> { + let Content = [{ +These builtins perform bit-manipulation operations within a wavefront. +}]; +} + +def DocSBitReplicate : Documentation { + let Category = DocCatBitManip; + let Content = [{ +Replicates each bit of the 32-bit ``src`` operand into two adjacent bits of a +64-bit result. Bit ``i`` of the input is copied to bits ``2*i`` and ``2*i+1`` +of the output. The operand must be uniform across the wavefront. A divergent +value gives an undefined result. +}]; +} diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl index 87f2da20a21a6..a86e16a933728 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx9.cl @@ -26,3 +26,10 @@ void test_groupstaticsize(global uint* out) { *out = __builtin_amdgcn_groupstaticsize(); } + +// CHECK-LABEL: @test_s_bitreplicate +// CHECK: call i64 @llvm.amdgcn.s.bitreplicate(i32 %a) +void test_s_bitreplicate(global ulong* out, uint a) +{ + *out = __builtin_amdgcn_s_bitreplicate(a); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl index 7c07632aeb60b..564de662a2e86 100644 --- a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx9.cl @@ -9,9 +9,17 @@ struct S { int x; }; -void test_gfx9_fmed3h(global half *out, half a, half b, half c) +void test_gfx9_builtins(global half *out0, half a0, half b0, half c0, + global unsigned long *out1, unsigned int a1) { - *out = __builtin_amdgcn_fmed3h(a, b, c); // expected-error {{'__builtin_amdgcn_fmed3h' needs target feature gfx9-insts}} + *out0 = __builtin_amdgcn_fmed3h(a0, b0, c0); // expected-error {{'__builtin_amdgcn_fmed3h' needs target feature gfx9-insts}} + *out1 = __builtin_amdgcn_s_bitreplicate(a1); // expected-error {{'__builtin_amdgcn_s_bitreplicate' needs target feature gfx9-insts}} +} + +void test_s_bitreplicate(global unsigned long *out, unsigned int a) +{ + *out = __builtin_amdgcn_s_bitreplicate(); // expected-error {{too few arguments to function call, expected 1, have 0}} + *out = __builtin_amdgcn_s_bitreplicate(a, a); // expected-error {{too many arguments to function call, expected 1, have 2}} } void test_mov_dpp(global int* out, int src, int i, int2 i2, struct S s, float _Complex fc) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index f1659f0cd803a..95dc490bf398a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2476,6 +2476,7 @@ def int_amdgcn_inverse_ballot : // Lowers to S_BITREPLICATE_B64_B32. // The argument must be uniform; otherwise, the result is undefined. def int_amdgcn_s_bitreplicate : + ClangBuiltin<"__builtin_amdgcn_s_bitreplicate">, DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>; // Lowers to S_QUADMASK_B{32,64} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
