https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/150170
>From 01432a07787d4067b4625c2df8882b04faa073c7 Mon Sep 17 00:00:00 2001 From: Aaditya <aaditya.alokdeshpa...@amd.com> Date: Sat, 19 Jul 2025 12:57:27 +0530 Subject: [PATCH 1/2] Add builtins for wave reduction intrinsics --- clang/include/clang/Basic/BuiltinsAMDGPU.def | 25 ++ clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 58 +++ clang/test/CodeGenOpenCL/builtins-amdgcn.cl | 378 +++++++++++++++++++ 3 files changed, 461 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 878543566f0e3..c8b324193e9fb 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -351,6 +351,31 @@ BUILTIN(__builtin_amdgcn_endpgm, "v", "nr") BUILTIN(__builtin_amdgcn_get_fpenv, "WUi", "n") BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n") +//===----------------------------------------------------------------------===// + +// Wave Reduction builtins. + +//===----------------------------------------------------------------------===// + +BUILTIN(__builtin_amdgcn_wave_reduce_add_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "UiUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "UiUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "iii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_add_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWii", "nc") + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 7dccf82b1a7a3..28ea918b97cc5 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -295,11 +295,69 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); } +static Intrinsic::ID getIntrinsicIDforWaveReduction(unsigned BuiltinID) { + switch (BuiltinID) { + default: + llvm_unreachable("Unknown BuiltinID for wave reduction"); + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64: + return Intrinsic::amdgcn_wave_reduce_add; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64: + return Intrinsic::amdgcn_wave_reduce_sub; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + return Intrinsic::amdgcn_wave_reduce_min; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + return Intrinsic::amdgcn_wave_reduce_umin; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + return Intrinsic::amdgcn_wave_reduce_max; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + return Intrinsic::amdgcn_wave_reduce_umax; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + return Intrinsic::amdgcn_wave_reduce_and; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + return Intrinsic::amdgcn_wave_reduce_or; + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case clang::AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: + return Intrinsic::amdgcn_wave_reduce_xor; + } +} + Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b32: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_add_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_sub_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_min_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_i64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_max_u64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_and_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_or_b64: + case AMDGPU::BI__builtin_amdgcn_wave_reduce_xor_b64: { + Intrinsic::ID IID = getIntrinsicIDforWaveReduction(BuiltinID); + llvm::Value *Value = EmitScalarExpr(E->getArg(0)); + llvm::Value *Strategy = EmitScalarExpr(E->getArg(1)); + llvm::Function *F = CGM.getIntrinsic(IID, {Value->getType()}); + return Builder.CreateCall(F, {Value, Strategy}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl index bf022bc6eb446..16f5a524f3094 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn.cl @@ -398,6 +398,384 @@ void test_s_sendmsghalt_var(int in) __builtin_amdgcn_s_sendmsghalt(1, in); } +// CHECK-LABEL: @test_wave_reduce_add_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_add_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_add_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_add_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_add_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.add.i32( +void test_wave_reduce_add_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_add_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.add.i64( +void test_wave_reduce_add_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_add_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.sub.i32( +void test_wave_reduce_sub_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_sub_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.sub.i64( +void test_wave_reduce_sub_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_sub_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_and_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.and.i32( +void test_wave_reduce_and_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_and_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.and.i64( +void test_wave_reduce_and_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_and_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_or_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.or.i32( +void test_wave_reduce_or_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_or_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.or.i64( +void test_wave_reduce_or_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_or_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.xor.i32( +void test_wave_reduce_xor_b32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_xor_b64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.xor.i64( +void test_wave_reduce_xor_b64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_xor_b64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.min.i32( +void test_wave_reduce_min_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.min.i64( +void test_wave_reduce_min_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_min_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umin.i32( +void test_wave_reduce_min_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_min_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umin.i64( +void test_wave_reduce_min_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_min_u64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_i32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.max.i32( +void test_wave_reduce_max_i32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_i64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.max.i64( +void test_wave_reduce_max_i64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_i64(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_default +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_default(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_default +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_default(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 0); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_iterative +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_iterative(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_iterative +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_iterative(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 1); +} + +// CHECK-LABEL: @test_wave_reduce_max_u32_dpp +// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wave.reduce.umax.i32( +void test_wave_reduce_max_u32_dpp(global int* out, int in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u32(in, 2); +} + +// CHECK-LABEL: @test_wave_reduce_max_u64_dpp +// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.wave.reduce.umax.i64( +void test_wave_reduce_max_u64_dpp(global int* out, long in) +{ + *out = __builtin_amdgcn_wave_reduce_max_u64(in, 2); +} + // CHECK-LABEL: @test_s_barrier // CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.barrier( void test_s_barrier() >From 45ff803bd0166cd241ca2ecc2dfabf712a3c079b Mon Sep 17 00:00:00 2001 From: Aaditya <aaditya.alokdeshpa...@amd.com> Date: Mon, 28 Jul 2025 14:35:08 +0530 Subject: [PATCH 2/2] Using `int32_t` inplace of `int` --- a.out | Bin 0 -> 22264 bytes clang/include/clang/Basic/BuiltinsAMDGPU.def | 36 +++++++++---------- 2 files changed, 18 insertions(+), 18 deletions(-) create mode 100755 a.out diff --git a/a.out b/a.out new file mode 100755 index 0000000000000000000000000000000000000000..2dbcd9ad6edc6908ee25aacddc07417f96ca46f2 GIT binary patch literal 22264 zcmeHPdvIJ=c|Te|A`)AY^8k^A<Pzi{6SS6ONnQs+R+eP164_QQCzt@ay4r`dWVO5O z-gWGRViU&!Rk&k9NnOgcX$OKKJRBaW5@uvg?8Zr(b__H00RdcwbXE-%)6$U1P}JY= z-1Duj*1M4z@(0t=k@kM)`<?Im&bi+`_uTWi_w4T**b=CzQ4#{`U5eb1t(Jzu`lwWB zhlzlMRY+Ze<F)EdY9;vP8uQY_%F+>`V*w8rTI;cD87TFRne<};OQ#6ET60PXl@gLt zZ$4Ni>d_+`Jyxh{@ObL2z)x^FKcNj5d2&MidOXL6-{qx0e5a)&LK{u_hu3)N<$6<L z51-0fIwJIC4^~rj1cNCj%`v|jli!TVPv{xr2%+N7oiDgC`sG5)DgGQZdS^PJjBuvY zT0mo(Q%b1#{tuwXa{jh~`uv?TemiBZyHLv*6e{_xPbEjXI@YIREvaO>u(xGzsH>%` zBbd(w+qFHqkLR>?`)%3+-D<MXe6<60F0GJw88E8?ejPCW%0mc1C46fY+y-7L-+fi^ zi7NP!D)^_X;GeC6KU)QV0~}PToNumzZ?1xGuY$j?3ZAZlKT-vss)F-AsZ{<;P&q2$ z4OQ@P6?|J2JXQt2rwV=o{$Hc6RN<V37@7l=n;`~Dd^m4uUhrmPXK=fR>qLDB;67M} zdtw>dyA_~3u22`DRjI{qCF0(1CDQow(wV~hxdwlqgzYfm-aWNUeQ5z-qkaN@xRf4H zuNWSX%Dr`EJzLt(0%LI3J%}c1rTP|l+MC$F4zY{WX~R1&|0!a%>bp2H3|_zQ5{0t= zm*ERnCSS)EV>xbU>VM-C522U=)xaAZfo|e6g7ixKlzOAGz!1Cq=&GKk@l`$R`o24$ zY#S{|)Xuw3&b7y#WE#lmcqVNS8*DB<n#{ZLoSn!y<8eEgPGo>a_d0eWnRZgid*WbY z@ycRj$?RYz+vB8CJ(+YOIa<g$ZZeZrzz3W{Iy!c1JeQ8A)HozEQNr8iWAX85b{~w> zNFhIVz7_k9A-5x%DY%NUwl3Sw<lS7{8AmDe@toTq1=<!Jb8@zubCPbpKib!~6WE3k zC!dUpbpI}USF7DO)MXFeI<&z~!P@qrOe)`(WbD=<w=?SQ%f_MHw!z+J-<^!b)5sy| z_U-XRH)IQLU(}0tCQ>LXQc&tGxj3Ao3mMPlbS#zczpyezccDa`D3SKc*0|f3%Vlyy zZZ4S~)s}d=?pQ8!w>{#-?5LA>SsFMbo{pt{>oZsJy-B)p`;Z$-X7aF^k9YLr9DCZ# zS(f3w8rSu}aXn>TsGX*^Y;oMps2(|U{Yo&dxvl{Es@|5CS~D+>`sj?uV99%qZKNOV z4m{70kauIz8*bp^txIhk=-=G4d$+wI*x`={H!a}W+Nd`gMHQ!7o7&pb1C>^yQJ*2Z zJ=h-X^i&bw(4p36vhMm^COY1-G1wMt<&$=ODz#^P{YWysJ{UCrKxt~+Go_9UTfEs! zo1vrnaMXvJTUg@{`*3sXYW$cFmuEc_j{9)-+ayT)NKE>MOq2GJ_?rlraMtv@*uC@T zTlC@ne9!rCf4(pK@CE&z4_Z1W+BaQD&DZ*H>7y~B!H1(^D}_cMesKv^%JSi87)znq zhp#H3O11j%)jmAr!|Qx_*oULxE`^8>Zz!Q?pMCfmAHLg%`=6`BKAe3j31dF|asffJ zKKu$FzSoDp-iIIX;cxKaAM@eY`0yh>ywQgr_2F;y;Sc+8@A={-9`oT>`Q&R%uX8wJ zY4q)uiu4a!XcRNK8!cryu7wV4+)~MOEV(BcD>x}@kCQ6I^GJ2$dDp&6KPT+Fojq|p zkz$2t+;(CyyU^9)%VsIM<bl`31Ge2W(7k=Dy<^LkfgRnw_U7BR_YU;gcB$=dSeT96 zOJnGNa1ds9b+i=Hccn9Tr_IQ0G+k(s(d?ADg|9V6@Pymb(SoPmXf#d2-&o#hX&FuI z-PGzV%xZ~S^1zY@mOQZJfh7+td0@!{OCDJAz>)`+Jh0?}B@Zll;5G8VesBFhEOVgC z)LFfjBId&;ve!)Z-Hb_5uK&Ld$2F>6&;L?a<jU#GHJ$;XG8al%^7JGY9>ZpmRjAA; zP*2ax%4Dld`WY%GBGj)(lVLN78ot5HPiWR;KO==Q^B&BPddH34@ppKFLXFzI&x{`D zPc9@s$yfX$>-ZO1PUBwnyROHFSYU*5dcfd9#XWR?3TYN-_qN`xgIIPy_!VzDC+JOK z=0EX6b?Ku)XEd9$v2>Mo)#rjvZZ!YN`@a&zjAkyL&)fN|6OBJIn#zngsm}*9i9|k5 zu{_q=Y99^iiBbD5OqG7w6R}4Mi9|eiP(*7z(dB;8V<Y=)tyq#;^=eX21V?k3Le|d5 zN5|u77fXG6<1w2BP><^qL6J*|SOgqR*M9RnoZ<~%2&SC$Xu%ncAKQ_Qr+Wshp2td% zy(gZ_V?FS}fSL-9JA3U!%5m+xGr7A=_OS2|%W>*yEIpoBWFq)m!E7$M2a8dqg854Q zXfR*$_$7~dOg@`TrOZY6d@#Rnd?b_l*XtI^C3shCDp*J-6Pet&E~!0V)S9mc3;B57 zj_pHvvA&39)#zPM;WCu64m{>D{;a<m<g9igmw}(@vhbHpg~6iiXxx1QT;+*DsaWE~ z8dED#`N{(Chf9~CcAEA;Ce7are9bc4k-RAp!uqIGFX$R|v6A~)?k7_Dao)e77OND? zxwfv<t{d))9Dn<YFWh?QmCJZP@aJD!0$U1{*M(ig*^ABn<6?O0!*;ZgjM*DEY)W** z+B@31I@;U2I^--%+5XlM(`?ZhuNuPYtLo#B@ULCHs>1Ji;(axN8MeIvwQ>eTeYobn z>A>pyrted#Y5&Z}Ppk|C9t;EmA65sl&wGvhNARk~fu-M)50>h{QXM#99w%zbbc|$; zUe=c79fC0vk+uC{HZ}<V^P9O0X`@LuW>i=dndlGbY*|ruG%WKlwA%5~MzD1>=~~VC z{Me1_*CRSs7-@-*7uG|%Wgs~^=BDv7O-p|oZ#UshCjELxOE#DJKs@SNR(ofAsNL#{ zb%r`)p^fc{k=TZ4=cbOBv(ecQZQbNVH;uHn$Kwe+Z8j#l)~T}W+e_Ia<FJ=`Fq#>^ zQNKjOgJczCAzRxzHncmf8#jhF#yVQNHjZ?3wRc9_V~J2G)IQSMk#LZ>A-Zv$8c3$m zlf2Oy80fXybuL!(Yn+AOvkT&UZY%YRx$=(2+YGLW?=WO=8J0-WM+`2!(63d&zh4Dk ztqb&vInfu|T?Nln!T+iX{>>`*vsLhS(lLQrCF3V4&krD9ssHuAz$^8?EZC{k|5Cwp z#*M2WI+*CLsvuUCgJVv9OqnBbH>ZNROw4f|6~v2Ku9jwZ6mUG1%h*{AGx!o&a3r5s zL2sB6jOINJ!?MGQ{&;e{Fb)}1TaE%0`^c^{0y>6lyqHH9?Wm;-<2Jj&7bM8R(S-<( z;H^W9N-jj&H#G2uF^lv(y^j2zXmrtX=pMfwx8$REyD>Vps0OYWhX}eHC_V=X7o?Mm zM+<tmbV1U5l?ECvUJXp86bBL6f*xa)E=3upy<Uc0h;J7y%_5z2yZ1fYySMfC$j$OQ zP~MhoYovXuHg1<J<hdd|b@{9keR+Nf9X5h|3^6a6pOEJo-xYglZZ0GG^86CoZ1g2v z^bL%+biH{TNtfrH&~kk#ztC%;&oP(i%kx#}Fw*>i=im=-1w%~qWn3sU>=#D7oL>)& z`ttmj@us{4Sek!=j1FZ5=mbbz88=EhD)T-vFC^Un@t=&-_<0C1(LZkHvt*uXP#gsr zhO+F{Hed_%WnRm4V9Mpo`JEMPG9SsGaepY+caT_)QT{^p_Za<FD*1xw?<g}atkC_? zDP4P?zA`UO9yNmWH{X{O|4aS<C@}H8U;U_gx$@8aAmWm*z@I>*l75?c3A3-PaOL`2 z2Kyr#Yp}$mrZrbBdq)g0Ud9=W<qv?K13e1L4<5b*5(a$^^a$urK^yR5<|-Jo@M0y6 zwt~8#M?gOgIs?k@G|qy47L)<AAv)CDvs2aVZK%0w)kU>OkVc&E<%W>w^_1wnNLN`N zI7=}IT<SFCBKQpe3xTTcx`qb=J*!vdVGJzO$M8!b{T9$tdZ14WF#TireFo_+(|hZN z10!`~y>;3CIvAkMWBC0V>6g&io9i07>l%CO4%8f6_Q!$1f7E@frnl}$O+SFZx9S>^ z{pPybZgAeW20wwy8Jv*x*?Dd+<M%$i)n<XRJa-@?;`}b-8;HD@cyC>6U}qg?j8rr5 zUn6oI@ojYtyNrAocn9p@5-bS+U4P9hZ9nUu`1YNVi62J}{^Z=?uD-UJwr3-UZ)$_e z+!fz%RO*+BVEwiCYdy-Jd~QD67MXbAr4=u%LydC-FTVBZ72jhtGP&aG(2E>=S-F?N z)Vuz3ZZ2}T^EA?muPj&U{U^nqr~fmfzv<}}KQ{XJivFs|<V_DWL-E^Cgy9d&Cmg=% zg`dsM6`#=-y)s1({&aU_;zLcL3Abta#N|zq!@W(R$V6{bn6c%IMIsYtNe3f`Kh)G5 zIqWtyMh@?4YKR;jXsRtv>RjBW=E+@6t&z!rrpDgM-X?1RiQyBwS6gb%ZK}nuvG_qs z9y~R6a^5?MBi*+Tp$KCqkfw?@B_>XuR#9+~)4ffoB#0o}+G#DY9mhj6^@k3^)sacS zPZEwy{O}0?s(7<W{t}Z<5aMr|hseY$S_T)ZukG35S}j@FA3<4b^+_T()&~Wuzv1EO z<IMGGBZ;xy#2OeIW|=~C1^ltA$)YP7iw97LBa?1Z2(Adz70q--D_t=tu4sfSnu`fa z9z4}RX*q`<J;5fWcrTK8iZ`LuC-^q~^gcN8Nxqw1+>f-$4H_*TM0|RfNmPqWoGv;v zI=SLvT>HpmxEPr{+M2gi<k0u)@8?%v2ks3icV+$KJv}eJJ~Hv6;!Z8T4AE)cKy!nw z_0#7(+gq6gi#xOk++4g3F_!8#kZhabReW#vohMK5t^VmK3Q_FUDkt@|DsDt}P!DVD zN0EuAi)#^`&Onf68#R7<8Z}qV{u~FNonPZ@`X`>QaB)YF0_7J$1}1(*qh}B;89fOG z#;5t5o#oecKBJ#Unl^f#w^t(tMjrw(M(;(mWHb$iM)?$bF;d(Q)@OE5r=HYhEp7k< zO9L9y=iG*9$x@TXPfu@xrQ&J$hmN=zpsuDxK2GYd?S%p>^lSnG3C-i?XvPh_8f|st z&|Km26H}e=_U)dv$i#|Yu7%>eXX@9WX4bD+-Zafy?#aVAJi$Zv)Eok9Jj-`tc)()4 zYo5hwke459KL!)9?OrihyoPp9ulREy3=pmLH(qEm22V7gP8L6h0-oTvZPR3*oXJ{h z_v~?iZ0{nISFC74J$Y&NFMvGvoW>6Rv=)`~Z&8mUhi2Ts+!<}$lV=*ULY8x#-+K#= zYoEul{@O5N<i}W=*|R*CwcjBRyK44~=Kqd7%B^N+H2)O&W#o@*{z>xMs}F1bYvlQe zn?0iWFOp|hZ}x!Z|C&6z-?LfGf0{fSqS;~1KS=&{<OenXN91`2&4x99A9+6BXInLY z5BXK(EzRf1vllblp!pAwXRl;dX?}z}dl|Fmegk0b9pw3Vo;|Dio#gSv#QAIf7V>;d z&CY0kGkG=_v&S`mBY8G$vkz;&ojjgwIDgH*jXZh|IDgGwN1m%QvsukwNgmG-oWJHT zCC}l(>>&6vTP;M<7eUR^!O(_?$IG`qS1t3BdEprN4I_i_{D#rb_m#=>n?`>+ziDLP ziTDFA97E`0OH=&>LF}03=GR4dLJYDzeb&euvnp#MrWooO)Nls<i~yB<SaBGN0ZUU} zcm}c0ipEe}X=y5b6*jF5GQT)u(&<<R`F{N>k1g4gZ(OGCv~&vlSPXKWtY{3U%ynQz zV>oO0a~KnnKeNIjQMiPfZ!r0z#%Z26%xe+YGBC*Z@tW}+R&KL2_j*IGH?+&pEr#B1 zXu{AvhCX2ELxw(L=;MYyZRkH4`tOFmWa!H6CVxY(H?+&pEr#B1Xu{AvhCX2ELxys( zeQExe^V_e<6tl0UER3?<;I~|-%zfuwMRe_v5D3((8PhJw{m1)qIp&KOT2YD(UjXmP zNIMoX0v2cuzF405AY=3G6w_-k;tX|JgiGNT#3=uakq?`c_k+^Dw1<>q7{!nBwRn6o zNIUq4I(}K{{+IjuevMzQ<h~dDS5T}8G%C5z1fPU_rStt0;4II%TP?)q$4-wUPF&h? zi-`#Th#$+z$2^0S^9P7iUYT}9kE?+FT+3Xk8jZY^=W>=)ty1j1Fi88j4mj<wXToru zJ}UJN;FZe%6dI<Qz`XyZoyxOdfj5@gX=6tH3k3pKoY($5&3pq5rFK%}pVRo7QhO=* z1F*w-b=;Iw#)aPg2=JS!Z`T`zDC}H{^EzvugO`~0o;?=}XD|DVi2VuRoM#_gZD~l( zi}M_km&lTS5qLudJAVV*@(Y^ycYup;{mI2DI9{cwD1S9@o|k+-=^A6VYTR$xi^qZU zyx1*ekos__iu|Xl;Ge02KT-uhUIqVq;Fb1q;Qj`c{QMSNw@P?R6?}UY{EjL(d#aV} zXMtDRSK^*3@?WchpQ?gCUj=^yDqf}XTwMk4FnIYs7Xww~-%|z8R>2Qf!5;xm|5#=1 zk_sldh?@1*r-I#^`&(QGD@^`%Dzh-_Em|4I##+_<8dhobDxPwvsu!%BU^a!7Ct1<r zZU70##Ygy9^TG}bdYgeE?=3YIjE|XJD#l{4ES#)^amxl3#I6-d<ryVlJGq>*&x0k7 zyuAgp(G0U*xN7FDlOe^kp%>9EV1dlGospzXuS9e12zE(G2eD&>9n0vQB<5p5TLVeG zvj*R8>WkQ0c6M*;vj_TzcG=vK!{|UVGs3lM57OtQ6B$DyNXdvkbqyu;a!oLr&0@EQ zflctCx8p{CvahWboZe=_b_WM{+2hLII<RAN_keBZ3nO;F%{4QlIphw6P|}Ps%*!&K zi4{_DJL+bz)dly0u(9HV3uTWv*pVWB{@YoekasohyK_!98y8JuD@I%J7q+yZw_+)L z`;C1<^gDhob`pu&*(7$@(A$?_4-r%fULC_r;qejgP#?DV=-!M^c$MA11L;@*wXtwV z|8ge?PeV39@!Kfd4aGZM<7(Nw;?;LX$!ClMc$unflM;*XNk;MBq6;@`FTdLN?%swd zm3CEGWKDBv*Of(gg86^G>xzG;mP(ticz0ywcPYzTqQ33DY>i&K#ar}K&@IwGbPIGi z7O)KUco}zloF^-BQce(EgrDfkIOUW9ETs7Z9%l;z=Os915q+6oC>lU0^I%z}UnTu& zuD^0DFZwdxKVt?&T%Tk>H$VrG6Z&?boEs2c<_8+ggvW8C$NX9L^73=J(!c&J^BXfp zzuD+Z{!)Get3E`SCi%<!Nzv%b{E5iZ2mY`P49g_?G9MH&6EMe&aC!dXzjqt`%|=e< zht3*(|NIc=Y^406?*L<&+Ks;S>jzhR0LO*?Ao_wO5uv`6Uo}`>5@!Mtrq+72bTqi| z1w{P%GVYq{^dzL5B43{WN6Yl*_Z{>!Vxnkh`St%)nZAsdLtUQ2ZWhiL%JctAq*26N z?$Y>5_J{0{fxWR&cKYgbW%@E7cIus;g3PNK_4DfQqh<Ose|FaBo69Kb=jlIIrhm?i z8_yYi&XxN^dHFdW=cKUIPni#t{UaB<eosLcqw|u!%o|LZ8^7G&<vhpz^u6RS^LMiE z<r!0c-v11uFX!>kz$ha6vLDrvFhGQ6kCcw0FTnGN5EK2WVM`}Xg*8zTo}4d~xl_G} zG=Kgw-WGi+r<f?u{}*NYGXMKb*wZMAqM_ybuK@R#U*?gHcY6vPpZY`C&mhi;Dwe;s z3>W>y-k+$2a#<Fhr5=y8nj7mHuvg<J<(GQJHBj-rr(yK^J$fAr=L==5y0VJ?&RaaC Le=BP+%Ju&bz?d`u literal 0 HcmV?d00001 diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index c8b324193e9fb..a9bf747d0aaa1 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -357,24 +357,24 @@ BUILTIN(__builtin_amdgcn_set_fpenv, "vWUi", "n") //===----------------------------------------------------------------------===// -BUILTIN(__builtin_amdgcn_wave_reduce_add_i32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_sub_i32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "UiUii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "UiUii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "iii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_add_i64, "WiWii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_sub_i64, "WiWii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWii", "nc") -BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWii", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_add_i32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_i32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u32, "ZUiZUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u32, "ZUiZUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b32, "ZiZiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_add_i64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_sub_i64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_i64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_min_u64, "WUiWUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_i64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_max_u64, "WUiWUiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_and_b64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_or_b64, "WiWiZi", "nc") +BUILTIN(__builtin_amdgcn_wave_reduce_xor_b64, "WiWiZi", "nc") //===----------------------------------------------------------------------===// // R600-NI only builtins. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits