https://github.com/mariusz-sikora-at-amd created 
https://github.com/llvm/llvm-project/pull/202614

None

>From 3212327a4d28c782e4d56cc676b0a479a0afd45f Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <[email protected]>
Date: Fri, 15 May 2026 05:47:16 -0400
Subject: [PATCH] [AMDGPU] Async memory LDS copy for gfx13

---
 ...> builtins-amdgcn-async-load-store-lds.cl} |   1 +
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   3 +-
 llvm/lib/Target/AMDGPU/FLATInstructions.td    |   8 ++
 .../llvm.amdgcn.global.load.async.to.lds.ll   |  91 +++++++++++++
 ...llvm.amdgcn.global.store.async.from.lds.ll |  91 +++++++++++++
 llvm/test/MC/AMDGPU/gfx13_asm_vflat.s         | 126 ++++++++++++++++++
 6 files changed, 319 insertions(+), 1 deletion(-)
 rename 
clang/test/CodeGenOpenCL/{builtins-amdgcn-gfx1250-async-load-store-lds.cl => 
builtins-amdgcn-async-load-store-lds.cl} (98%)

diff --git 
a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl
similarity index 98%
rename from 
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
rename to clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl
index e32808c181981..110d99bd0be60 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-async-load-store-lds.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-async-load-store-lds.cl
@@ -1,6 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 6
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu 
gfx1250 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu 
gfx1310 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX1250
 
 typedef int    v2i   __attribute__((ext_vector_type(2)));
 typedef int    v4i   __attribute__((ext_vector_type(4)));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 2d014be12cad7..ca69e69a28243 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -549,7 +549,8 @@ defm GFX13Insts : AMDGPUSubtargetFeature<"gfx13-insts",
   "Additional instructions for GFX13+",
   /*GenPredicate=*/0,
   /*GenAssemblerPredicate=*/0,
-  [FeatureSWakeupImm,
+  [FeatureMcastLoadInsts,
+   FeatureSWakeupImm,
    FeatureSBarrierLeaveImm,
   ]
 >;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td 
b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 0f30ab24521cb..7d6433b8abab5 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -3921,6 +3921,14 @@ defm GLOBAL_ATOMIC_OR_X2              : 
VFLAT_Real_AllAddr_Atomics_gfx13<0x5a, "
 defm GLOBAL_ATOMIC_XOR_X2             : VFLAT_Real_AllAddr_Atomics_gfx13<0x5b, 
"global_atomic_xor_b64">;
 defm GLOBAL_ATOMIC_INC_X2             : VFLAT_Real_AllAddr_Atomics_gfx13<0x5c, 
"global_atomic_inc_u64">;
 defm GLOBAL_ATOMIC_DEC_X2             : VFLAT_Real_AllAddr_Atomics_gfx13<0x5d, 
"global_atomic_dec_u64">;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B8      : VFLAT_Real_AllAddr_gfx13<0x61>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B32     : VFLAT_Real_AllAddr_gfx13<0x62>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B64     : VFLAT_Real_AllAddr_gfx13<0x63>;
+defm GLOBAL_LOAD_ASYNC_TO_LDS_B128    : VFLAT_Real_AllAddr_gfx13<0x64>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B8   : VFLAT_Real_AllAddr_gfx13<0x65>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B32  : VFLAT_Real_AllAddr_gfx13<0x66>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B64  : VFLAT_Real_AllAddr_gfx13<0x67>;
+defm GLOBAL_STORE_ASYNC_FROM_LDS_B128 : VFLAT_Real_AllAddr_gfx13<0x68>;
 defm GLOBAL_ATOMIC_ORDERED_ADD_B64    : VFLAT_Real_AllAddr_Atomics_gfx13<0x75>;
 
 // ENC_VSCRATCH.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll
index 6e326b133f981..2cac750d0616e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.async.to.lds.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck 
-check-prefixes=GFX1250,GFX1250-SDAG %s
 ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < 
%s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck 
-check-prefixes=GFX13,GFX13-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1310 < 
%s | FileCheck -check-prefixes=GFX13,GFX13-GISEL %s
 
 declare void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) %gaddr, 
ptr addrspace(3) %laddr,  i32 %offset, i32 %cpol)
 declare void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) 
%gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol)
@@ -23,6 +25,14 @@ define amdgpu_ps void @global_load_async_to_lds_b8_vaddr(ptr 
addrspace(1) %gaddr
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_load_async_to_lds_b8 v2, v[0:1], off offset:16 
th:TH_LOAD_NT
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b8_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_load_async_to_lds_b8 v2, v[0:1], off offset:16 
th:TH_LOAD_NT
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 1)
@@ -36,6 +46,12 @@ define amdgpu_ps void @global_load_async_to_lds_b8_saddr(ptr 
addrspace(1) inreg
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_load_async_to_lds_b8 v0, v1, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b8_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_load_async_to_lds_b8 v0, v1, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b8(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -58,6 +74,14 @@ define amdgpu_ps void 
@global_load_async_to_lds_b32_vaddr(ptr addrspace(1) %gadd
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_load_async_to_lds_b32 v2, v[0:1], off 
offset:16 th:TH_LOAD_HT scope:SCOPE_SE
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b32_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_load_async_to_lds_b32 v2, v[0:1], off offset:16 
th:TH_LOAD_HT scope:SCOPE_SE
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 10)
@@ -71,6 +95,12 @@ define amdgpu_ps void 
@global_load_async_to_lds_b32_saddr(ptr addrspace(1) inreg
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b32_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b32(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -93,6 +123,14 @@ define amdgpu_ps void 
@global_load_async_to_lds_b64_vaddr(ptr addrspace(1) %gadd
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_load_async_to_lds_b64 v2, v[0:1], off 
offset:16 th:TH_LOAD_NT_HT scope:SCOPE_DEV
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b64_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_load_async_to_lds_b64 v2, v[0:1], off offset:16 
th:TH_LOAD_NT_HT scope:SCOPE_DEV
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 22)
@@ -106,6 +144,12 @@ define amdgpu_ps void 
@global_load_async_to_lds_b64_saddr(ptr addrspace(1) inreg
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b64_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b64(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -128,6 +172,14 @@ define amdgpu_ps void 
@global_load_async_to_lds_b128_vaddr(ptr addrspace(1) %gad
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_load_async_to_lds_b128 v2, v[0:1], off 
offset:16 th:TH_LOAD_BYPASS scope:SCOPE_SYS
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b128_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_load_async_to_lds_b128 v2, v[0:1], off offset:16 
th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 27)
@@ -141,6 +193,12 @@ define amdgpu_ps void 
@global_load_async_to_lds_b128_saddr(ptr addrspace(1) inre
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_load_async_to_lds_b128 v0, v1, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b128_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_load_async_to_lds_b128 v0, v1, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.load.async.to.lds.b128(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -153,6 +211,11 @@ define amdgpu_ps void 
@global_load_async_to_lds_b32_saddr_scale_offset(ptr addrs
 ; GFX1250-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; 
msbs: dst=0 src0=0 src1=0 src2=0
 ; GFX1250-NEXT:    global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 
scale_offset th:TH_LOAD_NT
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b32_saddr_scale_offset:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    global_load_async_to_lds_b32 v0, v1, s[0:1] offset:16 
scale_offset th:TH_LOAD_NT
+; GFX13-NEXT:    s_endpgm
 entry:
   %idxprom = sext i32 %idx to i64
   %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
@@ -166,6 +229,11 @@ define amdgpu_ps void 
@global_load_async_to_lds_b64_saddr_scale_offset(ptr addrs
 ; GFX1250-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; 
msbs: dst=0 src0=0 src1=0 src2=0
 ; GFX1250-NEXT:    global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 
scale_offset th:TH_LOAD_NT
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_load_async_to_lds_b64_saddr_scale_offset:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    global_load_async_to_lds_b64 v0, v1, s[0:1] offset:16 
scale_offset th:TH_LOAD_NT
+; GFX13-NEXT:    s_endpgm
 entry:
   %idxprom = sext i32 %idx to i64
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i64 %idxprom
@@ -183,6 +251,29 @@ define amdgpu_ps void 
@global_load_async_to_lds_b64_saddr_no_scale_offset(ptr ad
 ; GFX1250-NEXT:    v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
 ; GFX1250-NEXT:    global_load_async_to_lds_b64 v0, v[2:3], off offset:16 
th:TH_LOAD_NT
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-SDAG-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
+; GFX13-SDAG:       ; %bb.0: ; %entry
+; GFX13-SDAG-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX13-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
+; GFX13-SDAG-NEXT:    v_lshlrev_b64_e32 v[1:2], 2, v[1:2]
+; GFX13-SDAG-NEXT:    v_add_co_u32 v1, vcc_lo, s0, v1
+; GFX13-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-SDAG-NEXT:    v_add_co_ci_u32_e64 v2, null, s1, v2, vcc_lo
+; GFX13-SDAG-NEXT:    global_load_async_to_lds_b64 v0, v[1:2], off offset:16 
th:TH_LOAD_NT
+; GFX13-SDAG-NEXT:    s_endpgm
+;
+; GFX13-GISEL-LABEL: global_load_async_to_lds_b64_saddr_no_scale_offset:
+; GFX13-GISEL:       ; %bb.0: ; %entry
+; GFX13-GISEL-NEXT:    v_dual_ashrrev_i32 v2, 31, v1 :: v_dual_mov_b32 v4, s1
+; GFX13-GISEL-NEXT:    v_mov_b32_e32 v3, s0
+; GFX13-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | 
instid1(VALU_DEP_1)
+; GFX13-GISEL-NEXT:    v_lshlrev_b64_e32 v[1:2], 2, v[1:2]
+; GFX13-GISEL-NEXT:    v_add_co_u32 v1, vcc_lo, v3, v1
+; GFX13-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-GISEL-NEXT:    v_add_co_ci_u32_e64 v2, null, v4, v2, vcc_lo
+; GFX13-GISEL-NEXT:    global_load_async_to_lds_b64 v0, v[1:2], off offset:16 
th:TH_LOAD_NT
+; GFX13-GISEL-NEXT:    s_endpgm
 entry:
   %idxprom = sext i32 %idx to i64
   %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
diff --git 
a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll 
b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll
index cb50f436d4a1a..0e64df55c2378 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.store.async.from.lds.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck 
-check-prefixes=GFX1250,GFX1250-SDAG %s
 ; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 < 
%s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1310 < %s | FileCheck 
-check-prefixes=GFX13,GFX13-SDAG %s
+; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1310 < 
%s | FileCheck -check-prefixes=GFX13,GFX13-GISEL %s
 
 declare void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) 
%gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol)
 declare void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) 
%gaddr, ptr addrspace(3) %laddr, i32 %offset, i32 %cpol)
@@ -23,6 +25,14 @@ define amdgpu_ps void 
@global_store_async_from_lds_b8_vaddr(ptr addrspace(1) %ga
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_store_async_from_lds_b8 v[0:1], v2, off 
offset:16 th:TH_STORE_NT
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b8_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_store_async_from_lds_b8 v[0:1], v2, off offset:16 
th:TH_LOAD_NT
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 1)
@@ -36,6 +46,12 @@ define amdgpu_ps void 
@global_store_async_from_lds_b8_saddr(ptr addrspace(1) inr
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_store_async_from_lds_b8 v1, v0, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b8_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_store_async_from_lds_b8 v1, v0, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b8(ptr addrspace(1) %gep, 
ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -58,6 +74,14 @@ define amdgpu_ps void @global_store_async_from_lds_b32(ptr 
addrspace(1) %gaddr,
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_store_async_from_lds_b32 v[0:1], v2, off 
offset:16 th:TH_STORE_HT scope:SCOPE_SE
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b32:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_store_async_from_lds_b32 v[0:1], v2, off offset:16 
th:TH_LOAD_HT scope:SCOPE_SE
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) 
%gep, ptr addrspace(3) %laddr, i32 16, i32 10)
@@ -71,6 +95,12 @@ define amdgpu_ps void 
@global_store_async_from_lds_b32_saddr(ptr addrspace(1) in
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_store_async_from_lds_b32 v1, v0, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b32_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_store_async_from_lds_b32 v1, v0, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b32(ptr addrspace(1) 
%gep, ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -93,6 +123,14 @@ define amdgpu_ps void 
@global_store_async_from_lds_b64_vaddr(ptr addrspace(1) %g
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_store_async_from_lds_b64 v[0:1], v2, off 
offset:16 th:TH_STORE_NT_HT scope:SCOPE_DEV
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b64_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_store_async_from_lds_b64 v[0:1], v2, off offset:16 
th:TH_LOAD_NT_HT scope:SCOPE_DEV
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) 
%gep, ptr addrspace(3) %laddr, i32 16, i32 22)
@@ -106,6 +144,12 @@ define amdgpu_ps void 
@global_store_async_from_lds_b64_saddr(ptr addrspace(1) in
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_store_async_from_lds_b64 v1, v0, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b64_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_store_async_from_lds_b64 v1, v0, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b64(ptr addrspace(1) 
%gep, ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -128,6 +172,14 @@ define amdgpu_ps void 
@global_store_async_from_lds_b128_vaddr(ptr addrspace(1) %
 ; GFX1250-GISEL-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
 ; GFX1250-GISEL-NEXT:    global_store_async_from_lds_b128 v[0:1], v2, off 
offset:16 th:TH_STORE_BYPASS scope:SCOPE_SYS
 ; GFX1250-GISEL-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b128_vaddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_add_co_u32 v0, vcc_lo, v0, 32
+; GFX13-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-NEXT:    v_add_co_ci_u32_e64 v1, null, 0, v1, vcc_lo
+; GFX13-NEXT:    global_store_async_from_lds_b128 v[0:1], v2, off offset:16 
th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) 
%gep, ptr addrspace(3) %laddr, i32 16, i32 27)
@@ -141,6 +193,12 @@ define amdgpu_ps void 
@global_store_async_from_lds_b128_saddr(ptr addrspace(1) i
 ; GFX1250-NEXT:    v_mov_b32_e32 v1, 32
 ; GFX1250-NEXT:    global_store_async_from_lds_b128 v1, v0, s[0:1] offset:16
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b128_saddr:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    v_mov_b32_e32 v1, 32
+; GFX13-NEXT:    global_store_async_from_lds_b128 v1, v0, s[0:1] offset:16
+; GFX13-NEXT:    s_endpgm
 entry:
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i32 4
   call void @llvm.amdgcn.global.store.async.from.lds.b128(ptr addrspace(1) 
%gep, ptr addrspace(3) %laddr, i32 16, i32 0)
@@ -153,6 +211,11 @@ define amdgpu_ps void 
@global_store_async_from_lds_b32_saddr_scale_offset(ptr ad
 ; GFX1250-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; 
msbs: dst=0 src0=0 src1=0 src2=0
 ; GFX1250-NEXT:    global_store_async_from_lds_b32 v1, v0, s[0:1] offset:16 
scale_offset th:TH_STORE_NT
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b32_saddr_scale_offset:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    global_store_async_from_lds_b32 v1, v0, s[0:1] offset:16 
scale_offset th:TH_LOAD_NT
+; GFX13-NEXT:    s_endpgm
 entry:
   %idxprom = sext i32 %idx to i64
   %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
@@ -166,6 +229,11 @@ define amdgpu_ps void 
@global_store_async_from_lds_b64_saddr_scale_offset(ptr ad
 ; GFX1250-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; 
msbs: dst=0 src0=0 src1=0 src2=0
 ; GFX1250-NEXT:    global_store_async_from_lds_b64 v1, v0, s[0:1] offset:16 
scale_offset th:TH_STORE_NT
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-LABEL: global_store_async_from_lds_b64_saddr_scale_offset:
+; GFX13:       ; %bb.0: ; %entry
+; GFX13-NEXT:    global_store_async_from_lds_b64 v1, v0, s[0:1] offset:16 
scale_offset th:TH_LOAD_NT
+; GFX13-NEXT:    s_endpgm
 entry:
   %idxprom = sext i32 %idx to i64
   %gep = getelementptr i64, ptr addrspace(1) %gaddr, i64 %idxprom
@@ -183,6 +251,29 @@ define amdgpu_ps void 
@global_store_async_from_lds_b64_saddr_no_scale_offset(ptr
 ; GFX1250-NEXT:    v_lshl_add_u64 v[2:3], v[2:3], 2, s[0:1]
 ; GFX1250-NEXT:    global_store_async_from_lds_b64 v[2:3], v0, off offset:16 
th:TH_STORE_NT
 ; GFX1250-NEXT:    s_endpgm
+;
+; GFX13-SDAG-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
+; GFX13-SDAG:       ; %bb.0: ; %entry
+; GFX13-SDAG-NEXT:    v_ashrrev_i32_e32 v2, 31, v1
+; GFX13-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | 
instid1(VALU_DEP_1)
+; GFX13-SDAG-NEXT:    v_lshlrev_b64_e32 v[1:2], 2, v[1:2]
+; GFX13-SDAG-NEXT:    v_add_co_u32 v1, vcc_lo, s0, v1
+; GFX13-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-SDAG-NEXT:    v_add_co_ci_u32_e64 v2, null, s1, v2, vcc_lo
+; GFX13-SDAG-NEXT:    global_store_async_from_lds_b64 v[1:2], v0, off 
offset:16 th:TH_LOAD_NT
+; GFX13-SDAG-NEXT:    s_endpgm
+;
+; GFX13-GISEL-LABEL: global_store_async_from_lds_b64_saddr_no_scale_offset:
+; GFX13-GISEL:       ; %bb.0: ; %entry
+; GFX13-GISEL-NEXT:    v_dual_ashrrev_i32 v2, 31, v1 :: v_dual_mov_b32 v4, s1
+; GFX13-GISEL-NEXT:    v_mov_b32_e32 v3, s0
+; GFX13-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | 
instid1(VALU_DEP_1)
+; GFX13-GISEL-NEXT:    v_lshlrev_b64_e32 v[1:2], 2, v[1:2]
+; GFX13-GISEL-NEXT:    v_add_co_u32 v1, vcc_lo, v3, v1
+; GFX13-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX13-GISEL-NEXT:    v_add_co_ci_u32_e64 v2, null, v4, v2, vcc_lo
+; GFX13-GISEL-NEXT:    global_store_async_from_lds_b64 v[1:2], v0, off 
offset:16 th:TH_LOAD_NT
+; GFX13-GISEL-NEXT:    s_endpgm
 entry:
   %idxprom = sext i32 %idx to i64
   %gep = getelementptr i32, ptr addrspace(1) %gaddr, i64 %idxprom
diff --git a/llvm/test/MC/AMDGPU/gfx13_asm_vflat.s 
b/llvm/test/MC/AMDGPU/gfx13_asm_vflat.s
index a9ace1677cacf..fb269220611cd 100644
--- a/llvm/test/MC/AMDGPU/gfx13_asm_vflat.s
+++ b/llvm/test/MC/AMDGPU/gfx13_asm_vflat.s
@@ -2093,6 +2093,60 @@ global_load_addtid_b32 v1, s[0:1] offset:64
 global_load_addtid_b32 v1, s[2:3]
 // GFX13: global_load_addtid_b32 v1, s[2:3]       ; encoding: 
[0x02,0x80,0x05,0xee,0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 
+global_load_async_to_lds_b8 v1, v[2:3], off
+// GFX13: global_load_async_to_lds_b8 v1, v[2:3], off ; encoding: 
[0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+
+global_load_async_to_lds_b8 v1, v[2:3], off offset:64
+// GFX13: global_load_async_to_lds_b8 v1, v[2:3], off offset:64 ; encoding: 
[0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+
+global_load_async_to_lds_b8 v1, v[2:3], off offset:-64
+// GFX13: global_load_async_to_lds_b8 v1, v[2:3], off offset:-64 ; encoding: 
[0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+
+global_load_async_to_lds_b8 v1, v2, s[2:3]
+// GFX13: global_load_async_to_lds_b8 v1, v2, s[2:3] ; encoding: 
[0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+
+global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64
+// GFX13: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64 ; encoding: 
[0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+
+global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64
+// GFX13: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64 ; encoding: 
[0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+
+global_load_async_to_lds_b32 v1, v[2:3], off
+// GFX13: global_load_async_to_lds_b32 v1, v[2:3], off ; encoding: 
[0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+
+global_load_async_to_lds_b32 v1, v[2:3], off offset:64
+// GFX13: global_load_async_to_lds_b32 v1, v[2:3], off offset:64 ; encoding: 
[0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+
+global_load_async_to_lds_b32 v1, v[2:3], off offset:-64
+// GFX13: global_load_async_to_lds_b32 v1, v[2:3], off offset:-64 ; encoding: 
[0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+
+global_load_async_to_lds_b32 v1, v2, s[2:3]
+// GFX13: global_load_async_to_lds_b32 v1, v2, s[2:3] ; encoding: 
[0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+
+global_load_async_to_lds_b32 v1, v2, s[2:3] offset:64
+// GFX13: global_load_async_to_lds_b32 v1, v2, s[2:3] offset:64 ; encoding: 
[0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+
+global_load_async_to_lds_b32 v1, v2, s[2:3] offset:-64
+// GFX13: global_load_async_to_lds_b32 v1, v2, s[2:3] offset:-64 ; encoding: 
[0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+
+global_load_async_to_lds_b64 v1, v[2:3], off
+// GFX13: global_load_async_to_lds_b64 v1, v[2:3], off ; encoding: 
[0x7c,0xc0,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+
+global_load_async_to_lds_b64 v1, v[2:3], off offset:64
+// GFX13: global_load_async_to_lds_b64 v1, v[2:3], off offset:64 ; encoding: 
[0x7c,0xc0,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+
+global_load_async_to_lds_b64 v1, v[2:3], off offset:-64
+// GFX13: global_load_async_to_lds_b64 v1, v[2:3], off offset:-64 ; encoding: 
[0x7c,0xc0,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+
+global_load_async_to_lds_b64 v1, v2, s[2:3]
+// GFX13: global_load_async_to_lds_b64 v1, v2, s[2:3] ; encoding: 
[0x02,0xc0,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x00,0x00,0x00]
+
+global_load_async_to_lds_b64 v1, v2, s[2:3] offset:64
+// GFX13: global_load_async_to_lds_b64 v1, v2, s[2:3] offset:64 ; encoding: 
[0x02,0xc0,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00]
+
+global_load_async_to_lds_b64 v1, v2, s[2:3] offset:-64
+// GFX13: global_load_async_to_lds_b64 v1, v2, s[2:3] offset:-64 ; encoding: 
[0x02,0xc0,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff]
+
 global_load_b128 v[1:4], v0, s[0:1] offset:-64
 // GFX1250-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
 // GFX13: global_load_b128 v[1:4], v0, s[0:1] offset:-64 ; encoding: 
[0x00,0x80,0x03,0xee,0x01,0x00,0x00,0x00,0x00,0xc0,0xff,0xff]
@@ -2481,6 +2535,78 @@ global_store_addtid_b32 v2, s[0:1] offset:-64
 global_store_addtid_b32 v2, s[0:1] offset:64
 // GFX13: global_store_addtid_b32 v2, s[0:1] offset:64 ; encoding: 
[0x00,0xc0,0x05,0xee,0x00,0x00,0x00,0x01,0x00,0x40,0x00,0x00]
 
+global_store_async_from_lds_b8 v[2:3], v1, off
+// GFX13: global_store_async_from_lds_b8 v[2:3], v1, off ; encoding: 
[0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b8 v[2:3], v1, off offset:64
+// GFX13: global_store_async_from_lds_b8 v[2:3], v1, off offset:64 ; encoding: 
[0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b8 v[2:3], v1, off offset:-64
+// GFX13: global_store_async_from_lds_b8 v[2:3], v1, off offset:-64 ; 
encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
+global_store_async_from_lds_b8 v2, v1, s[2:3]
+// GFX13: global_store_async_from_lds_b8 v2, v1, s[2:3] ; encoding: 
[0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64
+// GFX13: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64 ; encoding: 
[0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64
+// GFX13: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64 ; encoding: 
[0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b32 v[2:3], v1, off
+// GFX13: global_store_async_from_lds_b32 v[2:3], v1, off ; encoding: 
[0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b32 v[2:3], v1, off offset:64
+// GFX13: global_store_async_from_lds_b32 v[2:3], v1, off offset:64 ; 
encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b32 v[2:3], v1, off offset:-64
+// GFX13: global_store_async_from_lds_b32 v[2:3], v1, off offset:-64 ; 
encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
+global_store_async_from_lds_b32 v2, v1, s[2:3]
+// GFX13: global_store_async_from_lds_b32 v2, v1, s[2:3] ; encoding: 
[0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64
+// GFX13: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64 ; encoding: 
[0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64
+// GFX13: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64 ; 
encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
+global_store_async_from_lds_b64 v[2:3], v1, off
+// GFX13: global_store_async_from_lds_b64 v[2:3], v1, off ; encoding: 
[0x7c,0xc0,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b64 v[2:3], v1, off offset:64
+// GFX13: global_store_async_from_lds_b64 v[2:3], v1, off offset:64 ; 
encoding: [0x7c,0xc0,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b64 v[2:3], v1, off offset:-64
+// GFX13: global_store_async_from_lds_b64 v[2:3], v1, off offset:-64 ; 
encoding: [0x7c,0xc0,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
+global_store_async_from_lds_b64 v2, v1, s[2:3]
+// GFX13: global_store_async_from_lds_b64 v2, v1, s[2:3] ; encoding: 
[0x02,0xc0,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64
+// GFX13: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64 ; encoding: 
[0x02,0xc0,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64
+// GFX13: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64 ; 
encoding: [0x02,0xc0,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
+global_store_async_from_lds_b128 v[2:3], v1, off
+// GFX13: global_store_async_from_lds_b128 v[2:3], v1, off ; encoding: 
[0x7c,0x00,0x1a,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b128 v[2:3], v1, off offset:64
+// GFX13: global_store_async_from_lds_b128 v[2:3], v1, off offset:64 ; 
encoding: [0x7c,0x00,0x1a,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b128 v[2:3], v1, off offset:-64
+// GFX13: global_store_async_from_lds_b128 v[2:3], v1, off offset:-64 ; 
encoding: [0x7c,0x00,0x1a,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
+global_store_async_from_lds_b128 v2, v1, s[2:3]
+// GFX13: global_store_async_from_lds_b128 v2, v1, s[2:3] ; encoding: 
[0x02,0x00,0x1a,0xee,0x00,0x00,0x80,0x00,0x02,0x00,0x00,0x00]
+
+global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64
+// GFX13: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64 ; 
encoding: [0x02,0x00,0x1a,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00]
+
+global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64
+// GFX13: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64 ; 
encoding: [0x02,0x00,0x1a,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff]
+
 global_store_b128 v0, v[2:5], s[0:1] offset:-64
 // GFX13: global_store_b128 v0, v[2:5], s[0:1] offset:-64 ; encoding: 
[0x00,0x80,0x07,0xee,0x00,0x00,0x00,0x01,0x00,0xc0,0xff,0xff]
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to