llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Tobias Stadler (tobias-stadler) <details> <summary>Changes</summary> LegalizerHelper::reduceLoadStoreWidth does not work for non-byte-sized types, because this would require (un)packing of bits across byte boundaries. Precommit tests: #<!-- -->134904 --- Patch is 49.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136739.diff 4 Files Affected: - (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+5) - (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir (+12-92) - (modified) llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll (+192-298) - (modified) llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll (+159-265) ``````````diff diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 0aa853389bf1a..4052060271331 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -5210,6 +5210,11 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, if (TypeIdx != 0) return UnableToLegalize; + if (!NarrowTy.isByteSized()) { + LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n"); + return UnableToLegalize; + } + // This implementation doesn't work for atomics. Give up instead of doing // something invalid. if (LdStMI.isAtomic()) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir index 221980ff2c42e..3a2c57ab50147 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir @@ -2,9 +2,11 @@ # RUN: llc -O0 -mtriple=aarch64 -verify-machineinstrs -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' -o - %s 2> %t.err | FileCheck %s # RUN: FileCheck -check-prefix=ERR %s < %t.err -# ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in function: load-narrow-scalar-high-bits) +# ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE %{{[0-9]+}}:_(<8 x s9>), %{{[0-9]+}}:_(p0) :: (store (<8 x s9>), align 16) (in function: store-narrow-non-byte-sized) +# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(<8 x s9>) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<8 x s9>), align 16) (in function: load-narrow-non-byte-sized) +# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in function: load-narrow-scalar-high-bits) -# FIXME: Scalarized stores for non-byte-sized vector elements store incorrect partial values. +# FIXME: Non-byte-sized vector elements cause fallback in LegalizerHelper::reduceLoadStoreWidth --- name: store-narrow-non-byte-sized tracksRegLiveness: true @@ -15,60 +17,10 @@ body: | ; CHECK: liveins: $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16), align 16) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 257 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) into unknown-address + 1, align 1) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into unknown-address + 2) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[COPY7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC3]](s16), [[PTR_ADD2]](p0) :: (store (s16) into unknown-address + 3, align 1) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[COPY9]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC4]](s16), [[PTR_ADD3]](p0) :: (store (s16) into unknown-address + 4, align 4) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[COPY11]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[AND5]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC5]](s16), [[PTR_ADD4]](p0) :: (store (s16) into unknown-address + 5, align 1) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[C]], [[COPY12]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[AND6]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC6]](s16), [[PTR_ADD5]](p0) :: (store (s16) into unknown-address + 6) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[C3]], [[COPY13]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[AND7]](s32) - ; CHECK-NEXT: G_STORE [[TRUNC7]](s16), [[PTR_ADD6]](p0) :: (store (s16) into unknown-address + 7, align 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s9) = G_CONSTANT i9 -256 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s9) = G_CONSTANT i9 -255 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s9>) = G_BUILD_VECTOR [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s9>), [[COPY]](p0) :: (store (<8 x s9>), align 16) ; CHECK-NEXT: RET_ReallyLR %0:_(p0) = COPY $x8 %1:_(s9) = G_CONSTANT i9 256 @@ -153,7 +105,7 @@ body: | ... -# FIXME: Scalarized loads for non-byte-sized vector elements load incorrect partial values. +# FIXME: Non-byte-sized vector elements cause fallback in LegalizerHelper::reduceLoadStoreWidth --- name: load-narrow-non-byte-sized tracksRegLiveness: true @@ -164,41 +116,9 @@ body: | ; CHECK: liveins: $x8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load (s16), align 16) - ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD]], 9 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load (s16) from unknown-address + 1, align 1) - ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD1]], 9 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load (s16) from unknown-address + 2) - ; CHECK-NEXT: [[ASSERT_ZEXT2:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD2]], 9 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD2]](p0) :: (load (s16) from unknown-address + 3, align 1) - ; CHECK-NEXT: [[ASSERT_ZEXT3:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD3]], 9 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD3]](p0) :: (load (s16) from unknown-address + 4, align 4) - ; CHECK-NEXT: [[ASSERT_ZEXT4:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD4]], 9 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD4]](p0) :: (load (s16) from unknown-address + 5, align 1) - ; CHECK-NEXT: [[ASSERT_ZEXT5:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD5]], 9 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD5]](p0) :: (load (s16) from unknown-address + 6) - ; CHECK-NEXT: [[ASSERT_ZEXT6:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD6]], 9 - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD6]](p0) :: (load (s16) from unknown-address + 7, align 1) - ; CHECK-NEXT: [[ASSERT_ZEXT7:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD7]], 9 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[ASSERT_ZEXT]](s16), [[ASSERT_ZEXT1]](s16), [[ASSERT_ZEXT2]](s16), [[ASSERT_ZEXT3]](s16), [[ASSERT_ZEXT4]](s16), [[ASSERT_ZEXT5]](s16), [[ASSERT_ZEXT6]](s16), [[ASSERT_ZEXT7]](s16) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 511 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: $q0 = COPY [[AND]](<8 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s9>) = G_LOAD [[COPY]](p0) :: (load (<8 x s9>), align 16) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[LOAD]](<8 x s9>) + ; CHECK-NEXT: $q0 = COPY [[ZEXT]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(p0) = COPY $x8 %2:_(<8 x s9>) = G_LOAD %0(p0) :: (load (<8 x s9>), align 16) diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index c834ca772b6ac..ed3b1ee53fbd9 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -4136,304 +4136,198 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { } define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { -; CHECK-SD-LABEL: test_signed_v8f16_v8i100: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: sub sp, sp, #192 -; CHECK-SD-NEXT: str d10, [sp, #64] // 8-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x29, x30, [sp, #96] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x28, x27, [sp, #112] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x26, x25, [sp, #128] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x24, x23, [sp, #144] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 192 -; CHECK-SD-NEXT: .cfi_offset w19, -8 -; CHECK-SD-NEXT: .cfi_offset w20, -16 -; CHECK-SD-NEXT: .cfi_offset w21, -24 -; CHECK-SD-NEXT: .cfi_offset w22, -32 -; CHECK-SD-NEXT: .cfi_offset w23, -40 -; CHECK-SD-NEXT: .cfi_offset w24, -48 -; CHECK-SD-NEXT: .cfi_offset w25, -56 -; CHECK-SD-NEXT: .cfi_offset w26, -64 -; CHECK-SD-NEXT: .cfi_offset w27, -72 -; CHECK-SD-NEXT: .cfi_offset w28, -80 -; CHECK-SD-NEXT: .cfi_offset w30, -88 -; CHECK-SD-NEXT: .cfi_offset w29, -96 -; CHECK-SD-NEXT: .cfi_offset b8, -104 -; CHECK-SD-NEXT: .cfi_offset b9, -112 -; CHECK-SD-NEXT: .cfi_offset b10, -128 -; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-SD-NEXT: mov x19, x8 -; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: movi v10.2s, #241, lsl #24 -; CHECK-SD-NEXT: mov w8, #1895825407 // =0x70ffffff -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: fmov s9, w8 -; CHECK-SD-NEXT: mov x22, #-34359738368 // =0xfffffff800000000 -; CHECK-SD-NEXT: mov x23, #34359738367 // =0x7ffffffff -; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: csel x8, x22, x1, lt -; CHECK-SD-NEXT: csel x9, xzr, x0, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csinv x9, x9, xzr, le -; CHECK-SD-NEXT: csel x8, x23, x8, gt -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x8, xzr, x8, vs -; CHECK-SD-NEXT: str x8, [sp, #72] // 8-byte Folded Spill -; CHECK-SD-NEXT: csel x8, xzr, x9, vs -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: str x8, [sp, #24] // 8-byte Folded Spill -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: csel x8, xzr, x0, lt -; CHECK-SD-NEXT: csel x9, x22, x1, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csel x9, x23, x9, gt -; CHECK-SD-NEXT: csinv x8, x8, xzr, le -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x10, xzr, x8, vs -; CHECK-SD-NEXT: csel x8, xzr, x9, vs -; CHECK-SD-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: csel x8, x22, x1, lt -; CHECK-SD-NEXT: csel x9, xzr, x0, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csinv x9, x9, xzr, le -; CHECK-SD-NEXT: csel x8, x23, x8, gt -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x26, xzr, x8, vs -; CHECK-SD-NEXT: csel x8, xzr, x9, vs -; CHECK-SD-NEXT: str x8, [sp, #32] // 8-byte Folded Spill -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov h0, v0.h[1] -; CHECK-SD-NEXT: csel x8, x22, x1, lt -; CHECK-SD-NEXT: csel x9, xzr, x0, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csinv x9, x9, xzr, le -; CHECK-SD-NEXT: csel x8, x23, x8, gt -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x27, xzr, x8, vs -; CHECK-SD-NEXT: csel x8, xzr, x9, vs -; CHECK-SD-NEXT: str x8, [sp] // 8-byte Folded Spill -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov h0, v0.h[3] -; CHECK-SD-NEXT: csel x8, x22, x1, lt -; CHECK-SD-NEXT: csel x9, xzr, x0, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csinv x9, x9, xzr, le -; CHECK-SD-NEXT: csel x8, x23, x8, gt -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x20, xzr, x8, vs -; CHECK-SD-NEXT: csel x21, xzr, x9, vs -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: csel x8, xzr, x0, lt -; CHECK-SD-NEXT: csel x9, x22, x1, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csel x9, x23, x9, gt -; CHECK-SD-NEXT: csinv x8, x8, xzr, le -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x28, xzr, x8, vs -; CHECK-SD-NEXT: csel x24, xzr, x9, vs -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: mov h0, v0.h[2] -; CHECK-SD-NEXT: csel x8, x22, x1, lt -; CHECK-SD-NEXT: csel x9, xzr, x0, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: csinv x9, x9, xzr, le -; CHECK-SD-NEXT: csel x8, x23, x8, gt -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: fcvt s8, h0 -; CHECK-SD-NEXT: csel x25, xzr, x8, vs -; CHECK-SD-NEXT: csel x29, xzr, x9, vs -; CHECK-SD-NEXT: fmov s0, s8 -; CHECK-SD-NEXT: bl __fixsfti -; CHECK-SD-NEXT: ldr x9, [sp] // 8-byte Folded Reload -; CHECK-SD-NEXT: extr x8, x24, x28, #28 -; CHECK-SD-NEXT: fcmp s8, s10 -; CHECK-SD-NEXT: bfi x25, x21, #36, #28 -; CHECK-SD-NEXT: lsr x11, x20, #28 -; CHECK-SD-NEXT: stur x9, [x19, #75] -; CHECK-SD-NEXT: extr x9, x20, x21, #28 -; CHECK-SD-NEXT: stur x8, [x19, #41] -; CHECK-SD-NEXT: csel x8, x22, x1, lt -; CHECK-SD-NEXT: str x9, [x19, #16] -; CHECK-SD-NEXT: csel x9, xzr, x0, lt -; CHECK-SD-NEXT: fcmp s8, s9 -; CHECK-SD-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload -; CHECK-SD-NEXT: stp x29, x25, [x19] -; CHECK-SD-NEXT: stur x10, [x19, #50] -; CHECK-SD-NEXT: lsr x10, x24, #28 -; CHECK-SD-NEXT: csinv x9, x9, xzr, le -; CHECK-SD-NEXT: csel x8, x23, x8, gt -; CHECK-SD-NEXT: fcmp s8, s8 -; CHECK-SD-NEXT: strb w10, [x19, #49] -; CHECK-SD-NEXT: ldp x14, x12, [sp, #8] // 16-byte Folded Reload -; CHECK-SD-NEXT: strb w11, [x19, #24] -; CHECK-SD-NEXT: csel x8, xzr, x8, vs -; CHECK-SD-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload -; CHECK-SD-NEXT: csel x9, xzr, x9, vs -; CHECK-SD-NEXT: bfi x8, x28, #36, #28 -; CHECK-SD-NEXT: extr x10, x14, x12, #28 -; CHECK-SD-NEXT: bfi x27, x12, #36, #28 -; CHECK-SD-NEXT: ldr x12, [sp, #72] // 8-byte Folded Reload -; CHECK-SD-NEXT: bfi x26, x13, #36, #28 -; CHECK-SD-NEXT: stur x9, [x19, #25] -; CHECK-SD-NEXT: lsr x9, x14, #28 -; CHECK-SD-NEXT: extr x11, x12, x13, #28 -; CHECK-SD-NEXT: stur x8, [x19, #33] -; CHECK-SD-NEXT: lsr x8, x12, #28 -; CHECK-SD-NEXT: stur x10, [x19, #91] -; CHECK-SD-NEXT: stur x27, [x19, #83] -; CHECK-SD-NEXT: stur x11, [x19, #66] -; CHECK-SD-NEXT: stur x26, [x19, #58] -; CHECK-SD-NEXT: strb w9, [x19, #99] -; CHECK-SD-NEXT: strb w8, [x19, #74] -; CHECK-SD-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload -; CHECK-SD-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-SD-NEXT: add sp, sp, #192 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i100: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: mov h1, v0.h[1] -; CHECK-GI-CVT-NEXT: mov h2, v0.h[2] -; CHECK-GI-CVT-NEXT: mov x11, x8 -; CHECK-GI-CVT-NEXT: fcvt s3, h0 -; CHECK-GI-CVT-NEXT: mov h4, v0.h[3] -; CHECK-GI-CVT-NEXT: str wzr, [x8, #8] -; CHECK-GI-CVT-NEXT: strb wzr, [x8, #12] -; CHECK-GI-CVT-NEXT: fcvt s1, h1 -; CHECK-GI-CVT-NEXT: fcvt s2, h2 -; CHECK-GI-CVT-NEXT: fcvtzs x9, s3 -; CHECK-GI-CVT-NEXT: fcvt s3, h4 -; CHECK-GI-CVT-NEXT: fcvtzs x10, s1 -; CHECK-GI-CVT-NEXT: mov h1, v0.h[4] -; CHECK-GI-CVT-NEXT: fcvtzs x12, s2 -; CHECK-GI-CVT-NEXT: mov h2, v0.h[5] -; CHECK-GI-CVT-NEXT: str x9, [x8] -; CHECK-GI-CVT-NEXT: mov ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/136739 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits