https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/111730
>From e6c0bf257510507bea11e9e8de2533cdd36cc13d Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Wed, 9 Oct 2024 22:05:48 +0400 Subject: [PATCH 1/2] GlobalISel: Fix combine duplicating atomic loads The sext_inreg (load) combine was not deleting the old load instruction, and it would never be deleted if volatile or atomic. --- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 1 + .../AMDGPU/GlobalISel/atomic_load_flat.ll | 96 ++++--------------- .../AMDGPU/GlobalISel/atomic_load_global.ll | 51 +++------- .../AMDGPU/GlobalISel/atomic_load_local_2.ll | 36 ++----- ...lizer-combiner-sextload-from-sextinreg.mir | 2 - 5 files changed, 40 insertions(+), 146 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index f9b1621955c217..7bba493fba49fc 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1110,6 +1110,7 @@ void CombinerHelper::applySextInRegOfLoad( Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), LoadDef->getPointerReg(), *NewMMO); MI.eraseFromParent(); + LoadDef->eraseFromParent(); } /// Return true if 'MI' is a load or a store that may be fold it's address diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll index 817d1af9c226c8..83912b1e77db20 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_flat.ll @@ -27,32 +27,12 @@ define i32 @atomic_load_flat_monotonic_i8_zext_to_i32(ptr %ptr) { } define i32 @atomic_load_flat_monotonic_i8_sext_to_i32(ptr %ptr) { -; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX9-NEXT: flat_load_ubyte v3, v[0:1] glc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %load = load atomic i8, ptr %ptr monotonic, align 1 %ext = sext i8 %load to i32 ret i32 %ext @@ -71,32 +51,12 @@ define i16 @atomic_load_flat_monotonic_i8_zext_to_i16(ptr %ptr) { } define i16 @atomic_load_flat_monotonic_i8_sext_to_i16(ptr %ptr) { -; GFX7-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX9-NEXT: flat_load_ubyte v3, v[0:1] glc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: atomic_load_flat_monotonic_i8_sext_to_i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_load_sbyte v0, v[0:1] glc +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %load = load atomic i8, ptr %ptr monotonic, align 1 %ext = sext i8 %load to i16 ret i16 %ext @@ -126,32 +86,12 @@ define i32 @atomic_load_flat_monotonic_i16_zext_to_i32(ptr %ptr) { } define i32 @atomic_load_flat_monotonic_i16_sext_to_i32(ptr %ptr) { -; GFX7-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: flat_load_sshort v2, v[0:1] glc -; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc -; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: flat_load_sshort v2, v[0:1] glc -; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc -; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v2 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: flat_load_sshort v2, v[0:1] glc -; GFX9-NEXT: flat_load_ushort v3, v[0:1] glc -; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: atomic_load_flat_monotonic_i16_sext_to_i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_load_sshort v0, v[0:1] glc +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] %load = load atomic i16, ptr %ptr monotonic, align 2 %ext = sext i16 %load to i32 ret i32 %ext diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll index a3116dd2656649..e2906c3d4fdb24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_global.ll @@ -82,37 +82,28 @@ define i32 @atomic_load_global_monotonic_i8_sext_to_i32(ptr addrspace(1) %ptr) { ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0x100f000 ; GFX6-NEXT: s_mov_b64 s[4:5], 0 -; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc -; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc +; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc +; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc +; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc -; GFX9-NEXT: global_load_ubyte v3, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 @@ -163,37 +154,28 @@ define i16 @atomic_load_global_monotonic_i8_sext_to_i16(ptr addrspace(1) %ptr) { ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0x100f000 ; GFX6-NEXT: s_mov_b64 s[4:5], 0 -; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc -; GFX6-NEXT: buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 glc +; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX7-NEXT: flat_load_ubyte v0, v[0:1] glc +; GFX7-NEXT: flat_load_sbyte v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: flat_load_sbyte v2, v[0:1] glc -; GFX8-NEXT: flat_load_ubyte v0, v[0:1] glc +; GFX8-NEXT: flat_load_sbyte v0, v[0:1] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_global_monotonic_i8_sext_to_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_sbyte v2, v[0:1], off glc -; GFX9-NEXT: global_load_ubyte v3, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: global_load_sbyte v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic i8, ptr addrspace(1) %ptr monotonic, align 1 @@ -279,37 +261,28 @@ define i32 @atomic_load_global_monotonic_i16_sext_to_i32(ptr addrspace(1) %ptr) ; GFX6-NEXT: s_mov_b32 s6, 0 ; GFX6-NEXT: s_mov_b32 s7, 0x100f000 ; GFX6-NEXT: s_mov_b64 s[4:5], 0 -; GFX6-NEXT: buffer_load_sbyte v2, v[0:1], s[4:7], 0 addr64 glc -; GFX6-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 glc +; GFX6-NEXT: buffer_load_sbyte v0, v[0:1], s[4:7], 0 addr64 glc ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_mov_b32_e32 v0, v2 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: flat_load_sshort v2, v[0:1] glc -; GFX7-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX7-NEXT: flat_load_sshort v0, v[0:1] glc ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v2 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: flat_load_sshort v2, v[0:1] glc -; GFX8-NEXT: flat_load_ushort v0, v[0:1] glc +; GFX8-NEXT: flat_load_sshort v0, v[0:1] glc ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_global_monotonic_i16_sext_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_sshort v2, v[0:1], off glc -; GFX9-NEXT: global_load_ushort v3, v[0:1], off glc -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: global_load_sshort v0, v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic i16, ptr addrspace(1) %ptr monotonic, align 2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll index fb001e09a967a7..1656814d6fb06b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local_2.ll @@ -65,29 +65,23 @@ define i32 @atomic_load_local_monotonic_i8_sext_to_i32(ptr addrspace(3) %ptr) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_mov_b32 m0, -1 -; GFX7-NEXT: ds_read_i8 v1, v0 -; GFX7-NEXT: ds_read_u8 v0, v0 +; GFX7-NEXT: ds_read_i8 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_read_i8 v1, v0 -; GFX8-NEXT: ds_read_u8 v0, v0 +; GFX8-NEXT: ds_read_i8 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_read_i8 v1, v0 -; GFX9-NEXT: ds_read_u8 v0, v0 +; GFX9-NEXT: ds_read_i8 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1 %ext = sext i8 %load to i32 @@ -127,29 +121,23 @@ define i16 @atomic_load_local_monotonic_i8_sext_to_i16(ptr addrspace(3) %ptr) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_mov_b32 m0, -1 -; GFX7-NEXT: ds_read_i8 v1, v0 -; GFX7-NEXT: ds_read_u8 v0, v0 +; GFX7-NEXT: ds_read_i8 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_i8_sext_to_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_read_i8 v1, v0 -; GFX8-NEXT: ds_read_u8 v0, v0 +; GFX8-NEXT: ds_read_i8 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_local_monotonic_i8_sext_to_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_read_i8 v1, v0 -; GFX9-NEXT: ds_read_u8 v0, v0 +; GFX9-NEXT: ds_read_i8 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1 %ext = sext i8 %load to i16 @@ -216,29 +204,23 @@ define i32 @atomic_load_local_monotonic_i16_sext_to_i32(ptr addrspace(3) %ptr) { ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_mov_b32 m0, -1 -; GFX7-NEXT: ds_read_i16 v1, v0 -; GFX7-NEXT: ds_read_u16 v0, v0 +; GFX7-NEXT: ds_read_i16 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_mov_b32_e32 v0, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: atomic_load_local_monotonic_i16_sext_to_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_mov_b32 m0, -1 -; GFX8-NEXT: ds_read_i16 v1, v0 -; GFX8-NEXT: ds_read_u16 v0, v0 +; GFX8-NEXT: ds_read_i16 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: atomic_load_local_monotonic_i16_sext_to_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: ds_read_i16 v1, v0 -; GFX9-NEXT: ds_read_u16 v0, v0 +; GFX9-NEXT: ds_read_i16 v0, v0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2 %ext = sext i16 %load to i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir index afa81980ebd621..23b80528c80a98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-sextload-from-sextinreg.mir @@ -133,7 +133,6 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s8), addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s8), align 1, addrspace 1) @@ -172,7 +171,6 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (volatile load (s16), addrspace 1) ; CHECK-NEXT: $vgpr0 = COPY [[SEXTLOAD]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_LOAD %0 :: (volatile load (s16), align 2, addrspace 1) >From 02d2bcc1c40165fa88bcec191738b74bdb4e23aa Mon Sep 17 00:00:00 2001 From: Matt Arsenault <matthew.arsena...@amd.com> Date: Thu, 10 Oct 2024 14:44:13 +0400 Subject: [PATCH 2/2] Add comment --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 7bba493fba49fc..63d02ac62d5f31 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1110,6 +1110,8 @@ void CombinerHelper::applySextInRegOfLoad( Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(), LoadDef->getPointerReg(), *NewMMO); MI.eraseFromParent(); + + // Not all loads can be deleted, so make sure the old one is removed. LoadDef->eraseFromParent(); } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits