llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: GaĆ«tan Bossu (gbossu) <details> <summary>Changes</summary> š This is a chained PR. Predecessor is https://github.com/llvm/llvm-project/pull/151729 This adds patterns for selecting EXT_ZZI_B. They are tested for fixed vectors using extract shuffles, and for scalable vectors using llvm.vector.splice intrinsics. We will get better codegen when enabling subreg liveness. Without it, any use of a zpr2 tuple is always considered as using both zpr registers of the pair. --- Patch is 174.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151730.diff 14 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+16) - (modified) llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll (+5-5) - (modified) llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll (+42-49) - (modified) llvm/test/CodeGen/AArch64/sve-pr92779.ll (+8-9) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll (+12-12) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll (+16-16) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll (+16-16) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll (+252-252) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll (+1226-600) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll (+300-324) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll (+11-11) - (modified) llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll (+47-49) - (added) llvm/test/CodeGen/AArch64/sve-vector-splice.ll (+253) - (modified) llvm/test/CodeGen/AArch64/sve2-fixed-length-extract-subvector.ll (+32-47) ``````````diff diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0c4b4f4c3ed88..201dd93302d7a 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -4069,6 +4069,22 @@ let Predicates = [HasSVE2_or_SME] in { let AddedComplexity = 2 in { def : Pat<(nxv16i8 (AArch64ext nxv16i8:$zn1, nxv16i8:$zn2, (i32 imm0_255:$imm))), (EXT_ZZI_B (REG_SEQUENCE ZPR2, $zn1, zsub0, $zn2, zsub1), imm0_255:$imm)>; + + foreach VT = [nxv16i8] in + def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_255 i32:$index)))), + (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>; + + foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in + def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_127 i32:$index)))), + (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>; + + foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in + def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_63 i32:$index)))), + (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>; + + foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in + def : Pat<(VT (vector_splice VT:$Z1, VT:$Z2, (i64 (sve_ext_imm_0_31 i32:$index)))), + (EXT_ZZI_B (REG_SEQUENCE ZPR2, $Z1, zsub0, $Z2, zsub1), imm0_255:$index)>; } } // End HasSVE2_or_SME diff --git a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll index 50975d16c7e9e..13bec605839a9 100644 --- a/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll +++ b/llvm/test/CodeGen/AArch64/get-active-lane-mask-extract.ll @@ -192,7 +192,7 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 { ; CHECK-SVE2p1-NEXT: mov z1.s, p0/z, #1 // =0x1 ; CHECK-SVE2p1-NEXT: fmov s0, w8 ; CHECK-SVE2p1-NEXT: mov v0.s[1], v1.s[1] -; CHECK-SVE2p1-NEXT: ext z1.b, z1.b, z0.b, #8 +; CHECK-SVE2p1-NEXT: ext z1.b, { z1.b, z2.b }, #8 ; CHECK-SVE2p1-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-SVE2p1-NEXT: b use @@ -202,12 +202,12 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 { ; CHECK-SME2-NEXT: whilelo p0.s, x0, x1 ; CHECK-SME2-NEXT: cset w8, mi ; CHECK-SME2-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-SME2-NEXT: fmov s2, w8 +; CHECK-SME2-NEXT: fmov s3, w8 ; CHECK-SME2-NEXT: mov z0.s, z1.s[1] -; CHECK-SME2-NEXT: zip1 z0.s, z2.s, z0.s -; CHECK-SME2-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-SME2-NEXT: ext z1.b, { z1.b, z2.b }, #8 ; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1 +; CHECK-SME2-NEXT: zip1 z0.s, z3.s, z0.s +; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-SME2-NEXT: b use %r = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %i, i64 %n) %v0 = call <2 x i1> @llvm.vector.extract.v2i1.nxv4i1.i64(<vscale x 4 x i1> %r, i64 0) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll index 33d5ac4cd299e..3e8b3a40467dd 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-partial-reduce.ll @@ -109,14 +109,13 @@ define <16 x i16> @two_way_i8_i16_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal ; SME-LABEL: two_way_i8_i16_vl256: ; SME: // %bb.0: ; SME-NEXT: ldr z0, [x0] -; SME-NEXT: ldr z1, [x1] -; SME-NEXT: ldr z2, [x2] -; SME-NEXT: umlalb z0.h, z2.b, z1.b -; SME-NEXT: umlalt z0.h, z2.b, z1.b -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ldr z2, [x1] +; SME-NEXT: ldr z3, [x2] +; SME-NEXT: umlalb z0.h, z3.b, z2.b +; SME-NEXT: umlalt z0.h, z3.b, z2.b +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <16 x i16>, ptr %accptr %u = load <32 x i8>, ptr %uptr @@ -232,14 +231,13 @@ define <8 x i32> @two_way_i16_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal ; SME-LABEL: two_way_i16_i32_vl256: ; SME: // %bb.0: ; SME-NEXT: ldr z0, [x0] -; SME-NEXT: ldr z1, [x1] -; SME-NEXT: ldr z2, [x2] -; SME-NEXT: umlalb z0.s, z2.h, z1.h -; SME-NEXT: umlalt z0.s, z2.h, z1.h -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ldr z2, [x1] +; SME-NEXT: ldr z3, [x2] +; SME-NEXT: umlalb z0.s, z3.h, z2.h +; SME-NEXT: umlalt z0.s, z3.h, z2.h +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <8 x i32>, ptr %accptr %u = load <16 x i16>, ptr %uptr @@ -355,14 +353,13 @@ define <4 x i64> @two_way_i32_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal ; SME-LABEL: two_way_i32_i64_vl256: ; SME: // %bb.0: ; SME-NEXT: ldr z0, [x0] -; SME-NEXT: ldr z1, [x1] -; SME-NEXT: ldr z2, [x2] -; SME-NEXT: umlalb z0.d, z2.s, z1.s -; SME-NEXT: umlalt z0.d, z2.s, z1.s -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ldr z2, [x1] +; SME-NEXT: ldr z3, [x2] +; SME-NEXT: umlalb z0.d, z3.s, z2.s +; SME-NEXT: umlalt z0.d, z3.s, z2.s +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <4 x i64>, ptr %accptr %u = load <8 x i32>, ptr %uptr @@ -644,13 +641,12 @@ define <8 x i32> @four_way_i8_i32_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal ; SME-LABEL: four_way_i8_i32_vl256: ; SME: // %bb.0: ; SME-NEXT: ldr z0, [x0] -; SME-NEXT: ldr z1, [x1] -; SME-NEXT: ldr z2, [x2] -; SME-NEXT: udot z0.s, z2.b, z1.b -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ldr z2, [x1] +; SME-NEXT: ldr z3, [x2] +; SME-NEXT: udot z0.s, z3.b, z2.b +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <8 x i32>, ptr %accptr %u = load <32 x i8>, ptr %uptr @@ -689,13 +685,12 @@ define <8 x i32> @four_way_i8_i32_vl256_usdot(ptr %accptr, ptr %uptr, ptr %sptr) ; SME-LABEL: four_way_i8_i32_vl256_usdot: ; SME: // %bb.0: ; SME-NEXT: ldr z0, [x0] -; SME-NEXT: ldr z1, [x1] -; SME-NEXT: ldr z2, [x2] -; SME-NEXT: usdot z0.s, z1.b, z2.b -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ldr z2, [x1] +; SME-NEXT: ldr z3, [x2] +; SME-NEXT: usdot z0.s, z2.b, z3.b +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <8 x i32>, ptr %accptr %u = load <32 x i8>, ptr %uptr @@ -822,13 +817,12 @@ define <4 x i64> @four_way_i16_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vsca ; SME-LABEL: four_way_i16_i64_vl256: ; SME: // %bb.0: ; SME-NEXT: ldr z0, [x0] -; SME-NEXT: ldr z1, [x1] -; SME-NEXT: ldr z2, [x2] -; SME-NEXT: udot z0.d, z2.h, z1.h -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ldr z2, [x1] +; SME-NEXT: ldr z3, [x2] +; SME-NEXT: udot z0.d, z3.h, z2.h +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <4 x i64>, ptr %accptr %u = load <16 x i16>, ptr %uptr @@ -999,10 +993,9 @@ define <4 x i64> @four_way_i8_i64_vl256(ptr %accptr, ptr %uptr, ptr %sptr) vscal ; SME-NEXT: ldr z0, [x0] ; SME-NEXT: uaddwb z0.d, z0.d, z2.s ; SME-NEXT: uaddwt z0.d, z0.d, z2.s -; SME-NEXT: mov z1.d, z0.d -; SME-NEXT: ext z1.b, z1.b, z0.b, #16 -; SME-NEXT: // kill: def $q0 killed $q0 killed $z0 -; SME-NEXT: // kill: def $q1 killed $q1 killed $z1 +; SME-NEXT: ext z2.b, { z0.b, z1.b }, #16 +; SME-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 +; SME-NEXT: mov z1.d, z2.d ; SME-NEXT: ret %acc = load <4 x i64>, ptr %accptr %u = load <32 x i8>, ptr %uptr diff --git a/llvm/test/CodeGen/AArch64/sve-pr92779.ll b/llvm/test/CodeGen/AArch64/sve-pr92779.ll index 3f34d79b3bb49..427d3903cf2e9 100644 --- a/llvm/test/CodeGen/AArch64/sve-pr92779.ll +++ b/llvm/test/CodeGen/AArch64/sve-pr92779.ll @@ -5,16 +5,15 @@ define void @main(ptr %0) { ; CHECK-LABEL: main: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uzp1 v0.2s, v1.2s, v0.2s -; CHECK-NEXT: neg v0.2s, v0.2s -; CHECK-NEXT: smov x8, v0.s[0] -; CHECK-NEXT: smov x9, v0.s[1] -; CHECK-NEXT: mov z1.d, p0/m, x8 -; CHECK-NEXT: mov z1.d, p0/m, x9 -; CHECK-NEXT: str z1, [x0] +; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8 +; CHECK-NEXT: uzp1 v2.2s, v0.2s, v2.2s +; CHECK-NEXT: neg v2.2s, v2.2s +; CHECK-NEXT: smov x8, v2.s[0] +; CHECK-NEXT: smov x9, v2.s[1] +; CHECK-NEXT: mov z0.d, p0/m, x8 +; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret "entry": %1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll index 4d524bc848de6..6fe6b8a1c48d0 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -228,25 +228,25 @@ define <4 x i256> @load_sext_v4i32i256(ptr %ap) { ; CHECK-LABEL: load_sext_v4i32i256: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: sunpklo z1.d, z0.s -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: sunpklo z2.d, z0.s +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: mov z1.d, z1.d[1] -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: fmov x9, d2 +; CHECK-NEXT: mov z2.d, z2.d[1] ; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: fmov x11, d2 ; CHECK-NEXT: stp x9, x10, [x8] -; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: asr x12, x11, #63 ; CHECK-NEXT: stp x10, x10, [x8, #16] -; CHECK-NEXT: stp x11, x12, [x8, #64] +; CHECK-NEXT: stp x11, x12, [x8, #32] ; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: stp x12, x12, [x8, #80] -; CHECK-NEXT: stp x10, x10, [x8, #48] +; CHECK-NEXT: stp x12, x12, [x8, #48] +; CHECK-NEXT: stp x10, x10, [x8, #80] ; CHECK-NEXT: asr x12, x11, #63 -; CHECK-NEXT: stp x9, x10, [x8, #32] +; CHECK-NEXT: stp x9, x10, [x8, #64] ; CHECK-NEXT: stp x12, x12, [x8, #112] ; CHECK-NEXT: stp x11, x12, [x8, #96] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll index 35dd827bbabc5..7ef35f153f029 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -78,8 +78,8 @@ define <4 x i8> @extract_subvector_v8i8(<8 x i8> %op) { define <8 x i8> @extract_subvector_v16i8(<16 x i8> %op) { ; CHECK-LABEL: extract_subvector_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -119,7 +119,7 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -138,8 +138,8 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) { define <4 x i16> @extract_subvector_v8i16(<8 x i16> %op) { ; CHECK-LABEL: extract_subvector_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -198,8 +198,8 @@ define <1 x i32> @extract_subvector_v2i32(<2 x i32> %op) { define <2 x i32> @extract_subvector_v4i32(<4 x i32> %op) { ; CHECK-LABEL: extract_subvector_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -237,8 +237,8 @@ define void @extract_subvector_v8i32(ptr %a, ptr %b) { define <1 x i64> @extract_subvector_v2i64(<2 x i64> %op) { ; CHECK-LABEL: extract_subvector_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -297,8 +297,8 @@ define <2 x half> @extract_subvector_v4f16(<4 x half> %op) { define <4 x half> @extract_subvector_v8f16(<8 x half> %op) { ; CHECK-LABEL: extract_subvector_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -357,8 +357,8 @@ define <1 x float> @extract_subvector_v2f32(<2 x float> %op) { define <2 x float> @extract_subvector_v4f32(<4 x float> %op) { ; CHECK-LABEL: extract_subvector_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -396,8 +396,8 @@ define void @extract_subvector_v8f32(ptr %a, ptr %b) { define <1 x double> @extract_subvector_v2f64(<2 x double> %op) { ; CHECK-LABEL: extract_subvector_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z0.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll index e3d0a72c74b87..bc9b0373d8e49 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -74,14 +74,14 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) { define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) { ; CHECK-LABEL: fcvt_v8f16_to_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0_z1 +; CHECK-NEXT: ext z2.b, { z0.b, z1.b }, #8 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: fcvt z1.s, p0/m, z1.h +; CHECK-NEXT: uunpklo z1.s, z2.h ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: fcvt z1.s, p0/m, z1.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvt_v8f16_to_v8f32: @@ -122,21 +122,21 @@ define void @fcvt_v8f16_to_v8f32(<8 x half> %a, ptr %b) { define void @fcvt_v16f16_to_v16f32(<16 x half> %a, ptr %b) { ; CHECK-LABEL: fcvt_v16f16_to_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: uunpklo z2.s, z1.h -; CHECK-NEXT: uunpklo z3.s, z0.h +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1_z2 +; CHECK-NEXT: ext z0.b, { z1.b, z2.b }, #8 ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ext z5.b, { z3.b, z4.b }, #8 ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: fcvt z2.s, p0/m, z2.h -; CHECK-NEXT: fcvt z3.s, p0/m, z3.h +; CHECK-NEXT: uunpklo z2.s, z3.h ; CHECK-NEXT: fcvt z1.s, p0/m, z1.h +; CHECK-NEXT: uunpklo z3.s, z5.h ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h -; CHECK-NEXT: stp q3, q0, [x0] -; CHECK-NEXT: stp q2, q1, [x0, #32] +; CHECK-NEXT: fcvt z2.s, p0/m, z2.h +; CHECK-NEXT: fcvt z3.s, p0/m, z3.h +; CHECK-NEXT: stp q1, q0, [x0, #32] +; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: fcvt_v16f16_to_v16f32: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index ae7c676172867..0e34b2cd09fe1 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -58,21 +58,21 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: s... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/151730 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits