Author: Michael Liao Date: 2020-11-05T19:06:50-05:00 New Revision: 23c6d1501d80073784cab367d30d50419ffa5706
URL: https://github.com/llvm/llvm-project/commit/23c6d1501d80073784cab367d30d50419ffa5706 DIFF: https://github.com/llvm/llvm-project/commit/23c6d1501d80073784cab367d30d50419ffa5706.diff LOG: [amdgpu] Add `llvm.amdgcn.endpgm` support. - `llvm.amdgcn.endpgm` is added to enable "abort" support. Differential Revision: https://reviews.llvm.org/D90809 Added: llvm/test/CodeGen/AMDGPU/amd.endpgm.ll Modified: clang/include/clang/Basic/BuiltinsAMDGPU.def clang/test/CodeGenCUDA/builtins-amdgcn.cu llvm/include/llvm/IR/IntrinsicsAMDGPU.td llvm/lib/Target/AMDGPU/SOPInstructions.td Removed: ################################################################################ diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f5901e6f8f3b..123a7ad212da 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -214,6 +214,8 @@ BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc") BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc") BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc") +BUILTIN(__builtin_amdgcn_endpgm, "v", "nr") + //===----------------------------------------------------------------------===// // R600-NI only builtins. //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu index 1c3a79064595..8f0d0d0801bd 100644 --- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu +++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu @@ -16,3 +16,9 @@ void test_ds_fmax(float src) { __shared__ float shared; volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false); } + +// CHECK-LABEL: @_Z6endpgmv( +// CHECK: call void @llvm.amdgcn.endpgm() +__global__ void endpgm() { + __builtin_amdgcn_endpgm(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 304377ce28ab..bc04fa40f2a8 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1577,6 +1577,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty], // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn? def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>; +def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">, + Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects] +>; + // Copies the active channels of the source value to the destination value, // with the guarantee that the source value is computed as if the entire // program were executed in Whole Wavefront Mode, i.e. with all channels diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 08966d7d62eb..00527171ff11 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1118,6 +1118,7 @@ let isTerminator = 1 in { def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16"> { let isBarrier = 1; let isReturn = 1; + let hasSideEffects = 1; } def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> { @@ -1328,6 +1329,11 @@ def : GCNPat < (S_ENDPGM (i16 0)) >; +def : GCNPat < + (int_amdgcn_endpgm), + (S_ENDPGM (i16 0)) +>; + def : GCNPat < (i64 (ctpop i64:$src)), (i64 (REG_SEQUENCE SReg_64, diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll new file mode 100644 index 000000000000..ac9cd0699118 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +define amdgpu_kernel void @test0() { +; CHECK-LABEL: test0: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_endpgm + tail call void @llvm.amdgcn.endpgm() + unreachable +} + +define void @test1() { +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_endpgm + tail call void @llvm.amdgcn.endpgm() + unreachable +} + +define amdgpu_kernel void @test2(i32* %p, i32 %x) { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_dword s2, s[0:1], 0x2c +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmp_lt_i32 s2, 1 +; CHECK-NEXT: s_cbranch_scc0 BB2_2 +; CHECK-NEXT: ; %bb.1: ; %else +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; CHECK-NEXT: v_mov_b32_e32 v2, s2 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_dword v[0:1], v2 +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: BB2_2: ; %then +; CHECK-NEXT: s_endpgm + %cond = icmp sgt i32 %x, 0 + br i1 %cond, label %then, label %else + +then: + tail call void @llvm.amdgcn.endpgm() + unreachable + +else: + store i32 %x, i32* %p + ret void +} + +declare void @llvm.amdgcn.endpgm() _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits