llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Mariusz Sikora (mariusz-sikora-at-amd) <details> <summary>Changes</summary> --- Patch is 32.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/202614.diff 6 Files Affected: - (renamed) clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+2-1) - (modified) llvm/lib/Target/AMDGPU/FLATInstructions.td (+8) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll (+91) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll (+91) - (modified) llvm/test/MC/AMDGPU/gfx13_asm_vflat.s (+126) ``````````diff diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl similarity index 98% rename from clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl rename to clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl index e32808c181981..110d99bd0be60 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl @@ -1,6 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 +// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1310 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250 typedef int v2i __attribute__((ext_vector_type(2))); typedef int v4i __attribute__((ext_vector_type(4))); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 2d014be12cad7..ca69e69a28243 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -549,7 +549,8 @@ defm GFX13Insts : AMDGPUSubtargetFeature<"gfx13-insts", "Additional instructions for GFX13+", /*GenPredicate=*/0, /*GenAssemblerPredicate=*/0, - [FeatureSWakeupImm, + [FeatureMcastLoadInsts, + FeatureSWakeupImm, FeatureSBarrierLeaveImm, ] >; diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index 0f30ab24521cb..7d6433b8abab5 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -3921,6 +3921,14 @@ defm GLOBAL_ATOMIC_OR_X2 : VFLAT_Real_AllAddr_Atomics_gfx13<0x5a, " defm GLOBAL_ATOMIC_XOR_X2 : VFLAT_Real_AllAddr_Atomics_gfx13<0x5b, "global_atomic_xor_b64">; defm GLOBAL_ATOMIC_INC_X2 : VFLAT_Real_AllAddr_Atomics_gfx13<0x5c, "global_atomic_inc_u64">; defm GLOBAL_ATOMIC_DEC_X2 : VFLAT_Real_AllAddr_Atomics_gfx13<0x5d, "global_atomic_dec_u64">; +defm GLOBAL_LOAD_ASYNC_TO_LDS_B8 : VFLAT_Real_AllAddr_gfx13<0x61>; +defm GLOBAL_LOAD_ASYNC_TO_LDS_B32 : VFLAT_Real_AllAddr_gfx13<0x62>; +defm GLOBAL_LOAD_ASYNC_TO_LDS_B64 : VFLAT_Real_AllAddr_gfx13<0x63>; +defm GLOBAL_LOAD_ASYNC_TO_LDS_B128 : VFLAT_Real_AllAddr_gfx13<0x64>; +defm GLOBAL_STORE_ASYNC_FROM_LDS_B8 : VFLAT_Real_AllAddr_gfx13<0x65>; +defm GLOBAL_STORE_ASYNC_FROM_LDS_B32 : VFLAT_Real_AllAddr_gfx13<0x66>; +defm GLOBAL_STORE_ASYNC_FROM_LDS_B64 : VFLAT_Real_AllAddr_gfx13<0x67>; +defm GLOBAL_STORE_ASYNC_FROM_LDS_B128 : VFLAT_Real_AllAddr_gfx13<0x68>; defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VFLAT_Real_AllAddr_Atomics_gfx13<0x75>; // ENC_VSCRATCH. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll index 6e326b133f981..2cac750d0616e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck -check-prefixes=GFX13,GFX13-SDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck -check-prefixes=GFX13,GFX13-GISEL %s declare void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol) declare void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol) @@ -23,6 +25,14 @@ define amdgpu_ps void @global_load_async_to_lds_b8_vaddr(ptr addrspace(1) %gaddr ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_load_async_to_lds_b8 v2, v[0:1], off offset:16 th:TH_LOAD_NT ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b8_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_load_async_to_lds_b8 v2, v[0:1], off offset:16 th:TH_LOAD_NT +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 1) @@ -36,6 +46,12 @@ define amdgpu_ps void @global_load_async_to_lds_b8_saddr(ptr addrspace(1) inreg ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_load_async_to_lds_b8 v0, v1, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b8_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_load_async_to_lds_b8 v0, v1, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -58,6 +74,14 @@ define amdgpu_ps void @global_load_async_to_lds_b32_vaddr(ptr addrspace(1) %gadd ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_load_async_to_lds_b32 v2, v[0:1], off offset:16 th:TH_LOAD_HT scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b32_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_load_async_to_lds_b32 v2, v[0:1], off offset:16 th:TH_LOAD_HT scope:SCOPE_SE +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 10) @@ -71,6 +95,12 @@ define amdgpu_ps void @global_load_async_to_lds_b32_saddr(ptr addrspace(1) inreg ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b32_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -93,6 +123,14 @@ define amdgpu_ps void @global_load_async_to_lds_b64_vaddr(ptr addrspace(1) %gadd ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_load_async_to_lds_b64 v2, v[0:1], off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b64_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_load_async_to_lds_b64 v2, v[0:1], off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 22) @@ -106,6 +144,12 @@ define amdgpu_ps void @global_load_async_to_lds_b64_saddr(ptr addrspace(1) inreg ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b64_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -128,6 +172,14 @@ define amdgpu_ps void @global_load_async_to_lds_b128_vaddr(ptr addrspace(1) %gad ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_load_async_to_lds_b128 v2, v[0:1], off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b128_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_load_async_to_lds_b128 v2, v[0:1], off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 27) @@ -141,6 +193,12 @@ define amdgpu_ps void @global_load_async_to_lds_b128_saddr(ptr addrspace(1) inre ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_load_async_to_lds_b128 v0, v1, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b128_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_load_async_to_lds_b128 v0, v1, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -153,6 +211,11 @@ define amdgpu_ps void @global_load_async_to_lds_b32_saddr_scale_offset(ptr addrs ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-NEXT: global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 scale_offset th:TH_LOAD_NT ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b32_saddr_scale_offset: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 scale_offset th:TH_LOAD_NT +; GFX13-NEXT: s_endpgm entry: %idxprom = sext i32 %idx to i64 %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom @@ -166,6 +229,11 @@ define amdgpu_ps void @global_load_async_to_lds_b64_saddr_scale_offset(ptr addrs ; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1250-NEXT: global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 scale_offset th:TH_LOAD_NT ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_load_async_to_lds_b64_saddr_scale_offset: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 scale_offset th:TH_LOAD_NT +; GFX13-NEXT: s_endpgm entry: %idxprom = sext i32 %idx to i64 %gep = getelementptr i64, ptr addrspace(1) %gaddr, i64 %idxprom @@ -183,6 +251,29 @@ define amdgpu_ps void @global_load_async_to_lds_b64_saddr_no_scale_offset(ptr ad ; GFX1250-NEXT: v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1] ; GFX1250-NEXT: global_load_async_to_lds_b64 v0, v[2:3], off offset:16 th:TH_LOAD_NT ; GFX1250-NEXT: s_endpgm +; +; GFX13-SDAG-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset: +; GFX13-SDAG: ; %bb.0: ; %entry +; GFX13-SDAG-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; GFX13-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX13-SDAG-NEXT: v_lshlrev_b64_e32 v[1:2], 2, v[1:2] +; GFX13-SDAG-NEXT: v_add_co_u32 v1, vcc_lo, s0, v1 +; GFX13-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-SDAG-NEXT: v_add_co_ci_u32_e64 v2, null, s1, v2, vcc_lo +; GFX13-SDAG-NEXT: global_load_async_to_lds_b64 v0, v[1:2], off offset:16 th:TH_LOAD_NT +; GFX13-SDAG-NEXT: s_endpgm +; +; GFX13-GISEL-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset: +; GFX13-GISEL: ; %bb.0: ; %entry +; GFX13-GISEL-NEXT: v_dual_ashrrev_i32 v2, 31, v1 :: v_dual_mov_b32 v4, s1 +; GFX13-GISEL-NEXT: v_mov_b32_e32 v3, s0 +; GFX13-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX13-GISEL-NEXT: v_lshlrev_b64_e32 v[1:2], 2, v[1:2] +; GFX13-GISEL-NEXT: v_add_co_u32 v1, vcc_lo, v3, v1 +; GFX13-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-GISEL-NEXT: v_add_co_ci_u32_e64 v2, null, v4, v2, vcc_lo +; GFX13-GISEL-NEXT: global_load_async_to_lds_b64 v0, v[1:2], off offset:16 th:TH_LOAD_NT +; GFX13-GISEL-NEXT: s_endpgm entry: %idxprom = sext i32 %idx to i64 %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll index cb50f436d4a1a..0e64df55c2378 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck -check-prefixes=GFX13,GFX13-SDAG %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck -check-prefixes=GFX13,GFX13-GISEL %s declare void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol) declare void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) %gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol) @@ -23,6 +25,14 @@ define amdgpu_ps void @global_store_async_from_lds_b8_vaddr(ptr addrspace(1) %ga ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_store_async_from_lds_b8 v[0:1], v2, off offset:16 th:TH_STORE_NT ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b8_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_store_async_from_lds_b8 v[0:1], v2, off offset:16 th:TH_LOAD_NT +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 1) @@ -36,6 +46,12 @@ define amdgpu_ps void @global_store_async_from_lds_b8_saddr(ptr addrspace(1) inr ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_store_async_from_lds_b8 v1, v0, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b8_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_store_async_from_lds_b8 v1, v0, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -58,6 +74,14 @@ define amdgpu_ps void @global_store_async_from_lds_b32(ptr addrspace(1) %gaddr, ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_store_async_from_lds_b32 v[0:1], v2, off offset:16 th:TH_STORE_HT scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b32: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_store_async_from_lds_b32 v[0:1], v2, off offset:16 th:TH_LOAD_HT scope:SCOPE_SE +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 10) @@ -71,6 +95,12 @@ define amdgpu_ps void @global_store_async_from_lds_b32_saddr(ptr addrspace(1) in ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_store_async_from_lds_b32 v1, v0, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b32_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_store_async_from_lds_b32 v1, v0, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -93,6 +123,14 @@ define amdgpu_ps void @global_store_async_from_lds_b64_vaddr(ptr addrspace(1) %g ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_store_async_from_lds_b64 v[0:1], v2, off offset:16 th:TH_STORE_NT_HT scope:SCOPE_DEV ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b64_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_store_async_from_lds_b64 v[0:1], v2, off offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 22) @@ -106,6 +144,12 @@ define amdgpu_ps void @global_store_async_from_lds_b64_saddr(ptr addrspace(1) in ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_store_async_from_lds_b64 v1, v0, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b64_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX13-NEXT: global_store_async_from_lds_b64 v1, v0, s[0:1] offset:16 +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 0) @@ -128,6 +172,14 @@ define amdgpu_ps void @global_store_async_from_lds_b128_vaddr(ptr addrspace(1) % ; GFX1250-GISEL-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo ; GFX1250-GISEL-NEXT: global_store_async_from_lds_b128 v[0:1], v2, off offset:16 th:TH_STORE_BYPASS scope:SCOPE_SYS ; GFX1250-GISEL-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b128_vaddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_add_co_u32 v0, vcc_lo, v0, 32 +; GFX13-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX13-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo +; GFX13-NEXT: global_store_async_from_lds_b128 v[0:1], v2, off offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS +; GFX13-NEXT: s_endpgm entry: %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4 call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) %gep, ptr addrspace(3) %laddr, i32 16, i32 27) @@ -141,6 +193,12 @@ define amdgpu_ps void @global_store_async_from_lds_b128_saddr(ptr addrspace(1) i ; GFX1250-NEXT: v_mov_b32_e32 v1, 32 ; GFX1250-NEXT: global_store_async_from_lds_b128 v1, v0, s[0:1] offset:16 ; GFX1250-NEXT: s_endpgm +; +; GFX13-LABEL: global_store_async_from_lds_b128_saddr: +; GFX13: ; %bb.0: ; %entry +; GFX13-NEXT: v_mov_b32_e32 v1, 32 +; GFX... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/202614 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
