https://github.com/tobias-stadler created 
https://github.com/llvm/llvm-project/pull/136739

LegalizerHelper::reduceLoadStoreWidth does not work for non-byte-sized types, 
because this would require (un)packing of bits across byte boundaries.

Precommit tests: #134904

>From e88a6e177837b478b4dc20def1b59f193b950965 Mon Sep 17 00:00:00 2001
From: Tobias Stadler <m...@stadler-tobias.de>
Date: Wed, 9 Apr 2025 13:32:02 +0100
Subject: [PATCH] [GlobalISel] Fix miscompile when narrowing vector load/stores
 to non-byte-sized types

LegalizerHelper::reduceLoadStoreWidth does not work for non-byte-sized
types, because this would require (un)packing of bits across byte
boundaries.
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   5 +
 .../GlobalISel/legalize-load-store-vector.mir | 104 +---
 .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 490 +++++++-----------
 .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 424 ++++++---------
 4 files changed, 368 insertions(+), 655 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp 
b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 0aa853389bf1a..4052060271331 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5210,6 +5210,11 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore 
&LdStMI, unsigned TypeIdx,
   if (TypeIdx != 0)
     return UnableToLegalize;
 
+  if (!NarrowTy.isByteSized()) {
+    LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
+    return UnableToLegalize;
+  }
+
   // This implementation doesn't work for atomics. Give up instead of doing
   // something invalid.
   if (LdStMI.isAtomic())
diff --git 
a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir 
b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir
index 221980ff2c42e..3a2c57ab50147 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store-vector.mir
@@ -2,9 +2,11 @@
 # RUN: llc -O0 -mtriple=aarch64 -verify-machineinstrs -run-pass=legalizer 
-global-isel-abort=0 -pass-remarks-missed='gisel.*' -o - %s 2> %t.err | 
FileCheck %s
 # RUN: FileCheck -check-prefix=ERR %s < %t.err
 
-# ERR: remark: <unknown>:0:0: unable to legalize instruction: 
%{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in 
function: load-narrow-scalar-high-bits)
+# ERR: remark: <unknown>:0:0: unable to legalize instruction: G_STORE 
%{{[0-9]+}}:_(<8 x s9>), %{{[0-9]+}}:_(p0) :: (store (<8 x s9>), align 16) (in 
function: store-narrow-non-byte-sized)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: 
%{{[0-9]+}}:_(<8 x s9>) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<8 x s9>), align 
16) (in function: load-narrow-non-byte-sized)
+# ERR-NEXT: remark: <unknown>:0:0: unable to legalize instruction: 
%{{[0-9]+}}:_(s128) = G_LOAD %{{[0-9]+}}:_(p0) :: (load (<2 x s63>)) (in 
function: load-narrow-scalar-high-bits)
 
-# FIXME: Scalarized stores for non-byte-sized vector elements store incorrect 
partial values.
+# FIXME: Non-byte-sized vector elements cause fallback in 
LegalizerHelper::reduceLoadStoreWidth
 ---
 name:            store-narrow-non-byte-sized
 tracksRegLiveness: true
@@ -15,60 +17,10 @@ body:             |
     ; CHECK: liveins: $x8
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 511
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
-    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AND]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[COPY]](p0) :: (store (s16), align 
16)
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 257
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]]
-    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND1]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC1]](s16), [[PTR_ADD]](p0) :: (store (s16) 
into unknown-address + 1, align 1)
-    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[COPY5]]
-    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC2]](s16), [[PTR_ADD1]](p0) :: (store (s16) 
into unknown-address + 2)
-    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[COPY7]]
-    ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC3]](s16), [[PTR_ADD2]](p0) :: (store (s16) 
into unknown-address + 3, align 1)
-    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[COPY9]]
-    ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC4]](s16), [[PTR_ADD3]](p0) :: (store (s16) 
into unknown-address + 4, align 4)
-    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[COPY11]]
-    ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[AND5]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC5]](s16), [[PTR_ADD4]](p0) :: (store (s16) 
into unknown-address + 5, align 1)
-    ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[C]], [[COPY12]]
-    ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[AND6]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC6]](s16), [[PTR_ADD5]](p0) :: (store (s16) 
into unknown-address + 6)
-    ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[C3]], [[COPY13]]
-    ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[AND7]](s32)
-    ; CHECK-NEXT: G_STORE [[TRUNC7]](s16), [[PTR_ADD6]](p0) :: (store (s16) 
into unknown-address + 7, align 1)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s9) = G_CONSTANT i9 -256
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s9) = G_CONSTANT i9 -255
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s9>) = G_BUILD_VECTOR 
[[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), [[C1]](s9), [[C]](s9), 
[[C1]](s9)
+    ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s9>), [[COPY]](p0) :: (store 
(<8 x s9>), align 16)
     ; CHECK-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x8
     %1:_(s9) = G_CONSTANT i9 256
@@ -153,7 +105,7 @@ body:             |
 ...
 
 
-# FIXME: Scalarized loads for non-byte-sized vector elements load incorrect 
partial values.
+# FIXME: Non-byte-sized vector elements cause fallback in 
LegalizerHelper::reduceLoadStoreWidth
 ---
 name:            load-narrow-non-byte-sized
 tracksRegLiveness: true
@@ -164,41 +116,9 @@ body:             |
     ; CHECK: liveins: $x8
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x8
-    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p0) :: (load 
(s16), align 16)
-    ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD]], 9
-    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD]](p0) :: (load 
(s16) from unknown-address + 1, align 1)
-    ; CHECK-NEXT: [[ASSERT_ZEXT1:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD1]], 9
-    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p0) :: (load 
(s16) from unknown-address + 2)
-    ; CHECK-NEXT: [[ASSERT_ZEXT2:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD2]], 9
-    ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD2]](p0) :: (load 
(s16) from unknown-address + 3, align 1)
-    ; CHECK-NEXT: [[ASSERT_ZEXT3:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD3]], 9
-    ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD3]](p0) :: (load 
(s16) from unknown-address + 4, align 4)
-    ; CHECK-NEXT: [[ASSERT_ZEXT4:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD4]], 9
-    ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
-    ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CHECK-NEXT: [[LOAD5:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD4]](p0) :: (load 
(s16) from unknown-address + 5, align 1)
-    ; CHECK-NEXT: [[ASSERT_ZEXT5:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD5]], 9
-    ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
-    ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CHECK-NEXT: [[LOAD6:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD5]](p0) :: (load 
(s16) from unknown-address + 6)
-    ; CHECK-NEXT: [[ASSERT_ZEXT6:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD6]], 9
-    ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
-    ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CHECK-NEXT: [[LOAD7:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD6]](p0) :: (load 
(s16) from unknown-address + 7, align 1)
-    ; CHECK-NEXT: [[ASSERT_ZEXT7:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD7]], 9
-    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR 
[[ASSERT_ZEXT]](s16), [[ASSERT_ZEXT1]](s16), [[ASSERT_ZEXT2]](s16), 
[[ASSERT_ZEXT3]](s16), [[ASSERT_ZEXT4]](s16), [[ASSERT_ZEXT5]](s16), 
[[ASSERT_ZEXT6]](s16), [[ASSERT_ZEXT7]](s16)
-    ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s16) = G_CONSTANT i16 511
-    ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR 
[[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), [[C7]](s16), 
[[C7]](s16), [[C7]](s16)
-    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BUILD_VECTOR]], 
[[BUILD_VECTOR1]]
-    ; CHECK-NEXT: $q0 = COPY [[AND]](<8 x s16>)
+    ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<8 x s9>) = G_LOAD [[COPY]](p0) :: (load 
(<8 x s9>), align 16)
+    ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(<8 x s16>) = G_ZEXT [[LOAD]](<8 x s9>)
+    ; CHECK-NEXT: $q0 = COPY [[ZEXT]](<8 x s16>)
     ; CHECK-NEXT: RET_ReallyLR implicit $q0
     %0:_(p0) = COPY $x8
     %2:_(<8 x s9>) = G_LOAD %0(p0) :: (load (<8 x s9>), align 16)
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll 
b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index c834ca772b6ac..ed3b1ee53fbd9 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -4136,304 +4136,198 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> 
%f) {
 }
 
 define <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
-; CHECK-SD-LABEL: test_signed_v8f16_v8i100:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #192
-; CHECK-SD-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 192
-; CHECK-SD-NEXT:    .cfi_offset w19, -8
-; CHECK-SD-NEXT:    .cfi_offset w20, -16
-; CHECK-SD-NEXT:    .cfi_offset w21, -24
-; CHECK-SD-NEXT:    .cfi_offset w22, -32
-; CHECK-SD-NEXT:    .cfi_offset w23, -40
-; CHECK-SD-NEXT:    .cfi_offset w24, -48
-; CHECK-SD-NEXT:    .cfi_offset w25, -56
-; CHECK-SD-NEXT:    .cfi_offset w26, -64
-; CHECK-SD-NEXT:    .cfi_offset w27, -72
-; CHECK-SD-NEXT:    .cfi_offset w28, -80
-; CHECK-SD-NEXT:    .cfi_offset w30, -88
-; CHECK-SD-NEXT:    .cfi_offset w29, -96
-; CHECK-SD-NEXT:    .cfi_offset b8, -104
-; CHECK-SD-NEXT:    .cfi_offset b9, -112
-; CHECK-SD-NEXT:    .cfi_offset b10, -128
-; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    mov x19, x8
-; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    movi v10.2s, #241, lsl #24
-; CHECK-SD-NEXT:    mov w8, #1895825407 // =0x70ffffff
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fmov s9, w8
-; CHECK-SD-NEXT:    mov x22, #-34359738368 // =0xfffffff800000000
-; CHECK-SD-NEXT:    mov x23, #34359738367 // =0x7ffffffff
-; CHECK-SD-NEXT:    mov h0, v0.h[3]
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    csel x8, x22, x1, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
-; CHECK-SD-NEXT:    csel x8, x23, x8, gt
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
-; CHECK-SD-NEXT:    str x8, [sp, #72] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, x22, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csel x9, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x10, xzr, x8, vs
-; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
-; CHECK-SD-NEXT:    stp x8, x10, [sp, #8] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov h0, v0.h[2]
-; CHECK-SD-NEXT:    csel x8, x22, x1, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
-; CHECK-SD-NEXT:    csel x8, x23, x8, gt
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x26, xzr, x8, vs
-; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
-; CHECK-SD-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
-; CHECK-SD-NEXT:    csel x8, x22, x1, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
-; CHECK-SD-NEXT:    csel x8, x23, x8, gt
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x27, xzr, x8, vs
-; CHECK-SD-NEXT:    csel x8, xzr, x9, vs
-; CHECK-SD-NEXT:    str x8, [sp] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov h0, v0.h[3]
-; CHECK-SD-NEXT:    csel x8, x22, x1, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
-; CHECK-SD-NEXT:    csel x8, x23, x8, gt
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x20, xzr, x8, vs
-; CHECK-SD-NEXT:    csel x21, xzr, x9, vs
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, x22, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csel x9, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x28, xzr, x8, vs
-; CHECK-SD-NEXT:    csel x24, xzr, x9, vs
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov h0, v0.h[2]
-; CHECK-SD-NEXT:    csel x8, x22, x1, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
-; CHECK-SD-NEXT:    csel x8, x23, x8, gt
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x25, xzr, x8, vs
-; CHECK-SD-NEXT:    csel x29, xzr, x9, vs
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixsfti
-; CHECK-SD-NEXT:    ldr x9, [sp] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    extr x8, x24, x28, #28
-; CHECK-SD-NEXT:    fcmp s8, s10
-; CHECK-SD-NEXT:    bfi x25, x21, #36, #28
-; CHECK-SD-NEXT:    lsr x11, x20, #28
-; CHECK-SD-NEXT:    stur x9, [x19, #75]
-; CHECK-SD-NEXT:    extr x9, x20, x21, #28
-; CHECK-SD-NEXT:    stur x8, [x19, #41]
-; CHECK-SD-NEXT:    csel x8, x22, x1, lt
-; CHECK-SD-NEXT:    str x9, [x19, #16]
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    stp x29, x25, [x19]
-; CHECK-SD-NEXT:    stur x10, [x19, #50]
-; CHECK-SD-NEXT:    lsr x10, x24, #28
-; CHECK-SD-NEXT:    csinv x9, x9, xzr, le
-; CHECK-SD-NEXT:    csel x8, x23, x8, gt
-; CHECK-SD-NEXT:    fcmp s8, s8
-; CHECK-SD-NEXT:    strb w10, [x19, #49]
-; CHECK-SD-NEXT:    ldp x14, x12, [sp, #8] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    strb w11, [x19, #24]
-; CHECK-SD-NEXT:    csel x8, xzr, x8, vs
-; CHECK-SD-NEXT:    ldr x13, [sp, #24] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    csel x9, xzr, x9, vs
-; CHECK-SD-NEXT:    bfi x8, x28, #36, #28
-; CHECK-SD-NEXT:    extr x10, x14, x12, #28
-; CHECK-SD-NEXT:    bfi x27, x12, #36, #28
-; CHECK-SD-NEXT:    ldr x12, [sp, #72] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    bfi x26, x13, #36, #28
-; CHECK-SD-NEXT:    stur x9, [x19, #25]
-; CHECK-SD-NEXT:    lsr x9, x14, #28
-; CHECK-SD-NEXT:    extr x11, x12, x13, #28
-; CHECK-SD-NEXT:    stur x8, [x19, #33]
-; CHECK-SD-NEXT:    lsr x8, x12, #28
-; CHECK-SD-NEXT:    stur x10, [x19, #91]
-; CHECK-SD-NEXT:    stur x27, [x19, #83]
-; CHECK-SD-NEXT:    stur x11, [x19, #66]
-; CHECK-SD-NEXT:    stur x26, [x19, #58]
-; CHECK-SD-NEXT:    strb w9, [x19, #99]
-; CHECK-SD-NEXT:    strb w8, [x19, #74]
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldr d10, [sp, #64] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    add sp, sp, #192
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-CVT-LABEL: test_signed_v8f16_v8i100:
-; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-CVT-NEXT:    mov x11, x8
-; CHECK-GI-CVT-NEXT:    fcvt s3, h0
-; CHECK-GI-CVT-NEXT:    mov h4, v0.h[3]
-; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-CVT-NEXT:    fcvt s1, h1
-; CHECK-GI-CVT-NEXT:    fcvt s2, h2
-; CHECK-GI-CVT-NEXT:    fcvtzs x9, s3
-; CHECK-GI-CVT-NEXT:    fcvt s3, h4
-; CHECK-GI-CVT-NEXT:    fcvtzs x10, s1
-; CHECK-GI-CVT-NEXT:    mov h1, v0.h[4]
-; CHECK-GI-CVT-NEXT:    fcvtzs x12, s2
-; CHECK-GI-CVT-NEXT:    mov h2, v0.h[5]
-; CHECK-GI-CVT-NEXT:    str x9, [x8]
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    fcvt s1, h1
-; CHECK-GI-CVT-NEXT:    str x10, [x11, #12]!
-; CHECK-GI-CVT-NEXT:    fcvtzs x10, s3
-; CHECK-GI-CVT-NEXT:    mov h3, v0.h[6]
-; CHECK-GI-CVT-NEXT:    fcvt s2, h2
-; CHECK-GI-CVT-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-CVT-NEXT:    mov x11, x8
-; CHECK-GI-CVT-NEXT:    str x12, [x9, #25]!
-; CHECK-GI-CVT-NEXT:    fcvtzs x12, s1
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    fcvt s1, h3
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    fcvt s0, h0
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    str x10, [x11, #37]!
-; CHECK-GI-CVT-NEXT:    fcvtzs x10, s2
-; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-CVT-NEXT:    fcvtzs x11, s1
-; CHECK-GI-CVT-NEXT:    str x12, [x9, #50]!
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    str x10, [x9, #62]!
-; CHECK-GI-CVT-NEXT:    fcvtzs x10, s0
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    str x11, [x9, #75]!
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    str x10, [x8, #87]!
-; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-CVT-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: test_signed_v8f16_v8i100:
-; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT:    mov x11, x8
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
-; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h2
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[4]
-; CHECK-GI-FP16-NEXT:    str x9, [x8]
-; CHECK-GI-FP16-NEXT:    mov x9, x8
-; CHECK-GI-FP16-NEXT:    str x10, [x11, #12]!
-; CHECK-GI-FP16-NEXT:    fcvtzs x10, h1
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[5]
-; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-FP16-NEXT:    mov x11, x8
-; CHECK-GI-FP16-NEXT:    str x12, [x9, #25]!
-; CHECK-GI-FP16-NEXT:    fcvtzs x12, h2
-; CHECK-GI-FP16-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[6]
-; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h1
-; CHECK-GI-FP16-NEXT:    str x10, [x11, #37]!
-; CHECK-GI-FP16-NEXT:    mov x10, x8
-; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-FP16-NEXT:    fcvtzs x11, h2
-; CHECK-GI-FP16-NEXT:    str x12, [x10, #50]!
-; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
-; CHECK-GI-FP16-NEXT:    mov x10, x8
-; CHECK-GI-FP16-NEXT:    str x9, [x10, #62]!
-; CHECK-GI-FP16-NEXT:    fcvtzs x9, h0
-; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
-; CHECK-GI-FP16-NEXT:    mov x10, x8
-; CHECK-GI-FP16-NEXT:    str x11, [x10, #75]!
-; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
-; CHECK-GI-FP16-NEXT:    str x9, [x8, #87]!
-; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-LABEL: test_signed_v8f16_v8i100:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #192
+; CHECK-NEXT:    str d10, [sp, #64] // 8-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #176] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -88
+; CHECK-NEXT:    .cfi_offset w29, -96
+; CHECK-NEXT:    .cfi_offset b8, -104
+; CHECK-NEXT:    .cfi_offset b9, -112
+; CHECK-NEXT:    .cfi_offset b10, -128
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    mov x19, x8
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    mov h0, v0.h[1]
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    movi v10.2s, #241, lsl #24
+; CHECK-NEXT:    mov w8, #1895825407 // =0x70ffffff
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    fmov s9, w8
+; CHECK-NEXT:    mov x22, #-34359738368 // =0xfffffff800000000
+; CHECK-NEXT:    mov x23, #34359738367 // =0x7ffffffff
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    csel x8, x22, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csinv x9, x9, xzr, le
+; CHECK-NEXT:    csel x8, x23, x8, gt
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x8, xzr, x8, vs
+; CHECK-NEXT:    str x8, [sp, #72] // 8-byte Folded Spill
+; CHECK-NEXT:    csel x8, xzr, x9, vs
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    str x8, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x22, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csel x9, x23, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x10, xzr, x8, vs
+; CHECK-NEXT:    csel x8, xzr, x9, vs
+; CHECK-NEXT:    stp x8, x10, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    csel x8, x22, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csinv x9, x9, xzr, le
+; CHECK-NEXT:    csel x8, x23, x8, gt
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x26, xzr, x8, vs
+; CHECK-NEXT:    csel x8, xzr, x9, vs
+; CHECK-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    mov h0, v0.h[1]
+; CHECK-NEXT:    csel x8, x22, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csinv x9, x9, xzr, le
+; CHECK-NEXT:    csel x8, x23, x8, gt
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x27, xzr, x8, vs
+; CHECK-NEXT:    csel x8, xzr, x9, vs
+; CHECK-NEXT:    str x8, [sp] // 8-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    csel x8, x22, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csinv x9, x9, xzr, le
+; CHECK-NEXT:    csel x8, x23, x8, gt
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x20, xzr, x8, vs
+; CHECK-NEXT:    csel x21, xzr, x9, vs
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, x22, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csel x9, x23, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x28, xzr, x8, vs
+; CHECK-NEXT:    csel x24, xzr, x9, vs
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    csel x8, x22, x1, lt
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    csinv x9, x9, xzr, le
+; CHECK-NEXT:    csel x8, x23, x8, gt
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x25, xzr, x8, vs
+; CHECK-NEXT:    csel x29, xzr, x9, vs
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixsfti
+; CHECK-NEXT:    ldr x9, [sp] // 8-byte Folded Reload
+; CHECK-NEXT:    extr x8, x24, x28, #28
+; CHECK-NEXT:    fcmp s8, s10
+; CHECK-NEXT:    bfi x25, x21, #36, #28
+; CHECK-NEXT:    lsr x11, x20, #28
+; CHECK-NEXT:    stur x9, [x19, #75]
+; CHECK-NEXT:    extr x9, x20, x21, #28
+; CHECK-NEXT:    stur x8, [x19, #41]
+; CHECK-NEXT:    csel x8, x22, x1, lt
+; CHECK-NEXT:    str x9, [x19, #16]
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    stp x29, x25, [x19]
+; CHECK-NEXT:    stur x10, [x19, #50]
+; CHECK-NEXT:    lsr x10, x24, #28
+; CHECK-NEXT:    csinv x9, x9, xzr, le
+; CHECK-NEXT:    csel x8, x23, x8, gt
+; CHECK-NEXT:    fcmp s8, s8
+; CHECK-NEXT:    strb w10, [x19, #49]
+; CHECK-NEXT:    ldp x14, x12, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT:    strb w11, [x19, #24]
+; CHECK-NEXT:    csel x8, xzr, x8, vs
+; CHECK-NEXT:    ldr x13, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT:    csel x9, xzr, x9, vs
+; CHECK-NEXT:    bfi x8, x28, #36, #28
+; CHECK-NEXT:    extr x10, x14, x12, #28
+; CHECK-NEXT:    bfi x27, x12, #36, #28
+; CHECK-NEXT:    ldr x12, [sp, #72] // 8-byte Folded Reload
+; CHECK-NEXT:    bfi x26, x13, #36, #28
+; CHECK-NEXT:    stur x9, [x19, #25]
+; CHECK-NEXT:    lsr x9, x14, #28
+; CHECK-NEXT:    extr x11, x12, x13, #28
+; CHECK-NEXT:    stur x8, [x19, #33]
+; CHECK-NEXT:    lsr x8, x12, #28
+; CHECK-NEXT:    stur x10, [x19, #91]
+; CHECK-NEXT:    stur x27, [x19, #83]
+; CHECK-NEXT:    stur x11, [x19, #66]
+; CHECK-NEXT:    stur x26, [x19, #58]
+; CHECK-NEXT:    strb w9, [x19, #99]
+; CHECK-NEXT:    strb w8, [x19, #74]
+; CHECK-NEXT:    ldp x20, x19, [sp, #176] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr d10, [sp, #64] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #192
+; CHECK-NEXT:    ret
     %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f)
     ret <8 x i100> %x
 }
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll 
b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index a01644678b25f..3cc8b7e427ca8 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -3405,271 +3405,165 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x 
half> %f) {
 }
 
 define <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
-; CHECK-SD-LABEL: test_unsigned_v8f16_v8i100:
-; CHECK-SD:       // %bb.0:
-; CHECK-SD-NEXT:    sub sp, sp, #176
-; CHECK-SD-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    .cfi_def_cfa_offset 176
-; CHECK-SD-NEXT:    .cfi_offset w19, -8
-; CHECK-SD-NEXT:    .cfi_offset w20, -16
-; CHECK-SD-NEXT:    .cfi_offset w21, -24
-; CHECK-SD-NEXT:    .cfi_offset w22, -32
-; CHECK-SD-NEXT:    .cfi_offset w23, -40
-; CHECK-SD-NEXT:    .cfi_offset w24, -48
-; CHECK-SD-NEXT:    .cfi_offset w25, -56
-; CHECK-SD-NEXT:    .cfi_offset w26, -64
-; CHECK-SD-NEXT:    .cfi_offset w27, -72
-; CHECK-SD-NEXT:    .cfi_offset w28, -80
-; CHECK-SD-NEXT:    .cfi_offset w30, -88
-; CHECK-SD-NEXT:    .cfi_offset w29, -96
-; CHECK-SD-NEXT:    .cfi_offset b8, -104
-; CHECK-SD-NEXT:    .cfi_offset b9, -112
-; CHECK-SD-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-SD-NEXT:    mov x19, x8
-; CHECK-SD-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    mov w8, #1904214015 // =0x717fffff
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    fmov s9, w8
-; CHECK-SD-NEXT:    mov x23, #68719476735 // =0xfffffffff
-; CHECK-SD-NEXT:    mov h0, v0.h[3]
-; CHECK-SD-NEXT:    csel x9, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x8, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x10, x23, x8, gt
-; CHECK-SD-NEXT:    csinv x8, x9, xzr, le
-; CHECK-SD-NEXT:    stp x8, x10, [sp, #16] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x9, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
-; CHECK-SD-NEXT:    stp x8, x9, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    mov h0, v0.h[2]
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
-; CHECK-SD-NEXT:    csel x25, x23, x9, gt
-; CHECK-SD-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    mov h0, v0.h[1]
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x26, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x28, x8, xzr, le
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    mov h0, v0.h[3]
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x29, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x20, x8, xzr, le
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x21, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x27, x8, xzr, le
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    mov h0, v0.h[2]
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    fcvt s8, h0
-; CHECK-SD-NEXT:    csel x22, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x24, x8, xzr, le
-; CHECK-SD-NEXT:    fmov s0, s8
-; CHECK-SD-NEXT:    bl __fixunssfti
-; CHECK-SD-NEXT:    extr x8, x21, x27, #28
-; CHECK-SD-NEXT:    extr x9, x29, x20, #28
-; CHECK-SD-NEXT:    stur x28, [x19, #75]
-; CHECK-SD-NEXT:    fcmp s8, #0.0
-; CHECK-SD-NEXT:    bfi x22, x20, #36, #28
-; CHECK-SD-NEXT:    lsr x11, x29, #28
-; CHECK-SD-NEXT:    stur x8, [x19, #41]
-; CHECK-SD-NEXT:    str x9, [x19, #16]
-; CHECK-SD-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT:    csel x8, xzr, x0, lt
-; CHECK-SD-NEXT:    csel x9, xzr, x1, lt
-; CHECK-SD-NEXT:    fcmp s8, s9
-; CHECK-SD-NEXT:    stp x24, x22, [x19]
-; CHECK-SD-NEXT:    stur x10, [x19, #50]
-; CHECK-SD-NEXT:    lsr x10, x21, #28
-; CHECK-SD-NEXT:    strb w11, [x19, #24]
-; CHECK-SD-NEXT:    strb w10, [x19, #49]
-; CHECK-SD-NEXT:    csel x9, x23, x9, gt
-; CHECK-SD-NEXT:    csinv x8, x8, xzr, le
-; CHECK-SD-NEXT:    ldp x12, x11, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    bfi x9, x27, #36, #28
-; CHECK-SD-NEXT:    stur x8, [x19, #25]
-; CHECK-SD-NEXT:    stur x9, [x19, #33]
-; CHECK-SD-NEXT:    extr x10, x11, x12, #28
-; CHECK-SD-NEXT:    bfi x26, x12, #36, #28
-; CHECK-SD-NEXT:    stur x10, [x19, #91]
-; CHECK-SD-NEXT:    ldp x10, x9, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    stur x26, [x19, #83]
-; CHECK-SD-NEXT:    extr x8, x9, x10, #28
-; CHECK-SD-NEXT:    bfi x25, x10, #36, #28
-; CHECK-SD-NEXT:    lsr x9, x9, #28
-; CHECK-SD-NEXT:    stur x8, [x19, #66]
-; CHECK-SD-NEXT:    lsr x8, x11, #28
-; CHECK-SD-NEXT:    stur x25, [x19, #58]
-; CHECK-SD-NEXT:    strb w8, [x19, #99]
-; CHECK-SD-NEXT:    strb w9, [x19, #74]
-; CHECK-SD-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT:    add sp, sp, #176
-; CHECK-SD-NEXT:    ret
-;
-; CHECK-GI-CVT-LABEL: test_unsigned_v8f16_v8i100:
-; CHECK-GI-CVT:       // %bb.0:
-; CHECK-GI-CVT-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-CVT-NEXT:    mov x11, x8
-; CHECK-GI-CVT-NEXT:    fcvt s3, h0
-; CHECK-GI-CVT-NEXT:    mov h4, v0.h[3]
-; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-CVT-NEXT:    fcvt s1, h1
-; CHECK-GI-CVT-NEXT:    fcvt s2, h2
-; CHECK-GI-CVT-NEXT:    fcvtzu x9, s3
-; CHECK-GI-CVT-NEXT:    fcvt s3, h4
-; CHECK-GI-CVT-NEXT:    fcvtzu x10, s1
-; CHECK-GI-CVT-NEXT:    mov h1, v0.h[4]
-; CHECK-GI-CVT-NEXT:    fcvtzu x12, s2
-; CHECK-GI-CVT-NEXT:    mov h2, v0.h[5]
-; CHECK-GI-CVT-NEXT:    str x9, [x8]
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    fcvt s1, h1
-; CHECK-GI-CVT-NEXT:    str x10, [x11, #12]!
-; CHECK-GI-CVT-NEXT:    fcvtzu x10, s3
-; CHECK-GI-CVT-NEXT:    mov h3, v0.h[6]
-; CHECK-GI-CVT-NEXT:    fcvt s2, h2
-; CHECK-GI-CVT-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-CVT-NEXT:    mov x11, x8
-; CHECK-GI-CVT-NEXT:    str x12, [x9, #25]!
-; CHECK-GI-CVT-NEXT:    fcvtzu x12, s1
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    fcvt s1, h3
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    fcvt s0, h0
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    str x10, [x11, #37]!
-; CHECK-GI-CVT-NEXT:    fcvtzu x10, s2
-; CHECK-GI-CVT-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-CVT-NEXT:    fcvtzu x11, s1
-; CHECK-GI-CVT-NEXT:    str x12, [x9, #50]!
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    str x10, [x9, #62]!
-; CHECK-GI-CVT-NEXT:    fcvtzu x10, s0
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    mov x9, x8
-; CHECK-GI-CVT-NEXT:    str x11, [x9, #75]!
-; CHECK-GI-CVT-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-CVT-NEXT:    str x10, [x8, #87]!
-; CHECK-GI-CVT-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-CVT-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-CVT-NEXT:    ret
-;
-; CHECK-GI-FP16-LABEL: test_unsigned_v8f16_v8i100:
-; CHECK-GI-FP16:       // %bb.0:
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[1]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-GI-FP16-NEXT:    mov x11, x8
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[3]
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h2
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[4]
-; CHECK-GI-FP16-NEXT:    str x9, [x8]
-; CHECK-GI-FP16-NEXT:    mov x9, x8
-; CHECK-GI-FP16-NEXT:    str x10, [x11, #12]!
-; CHECK-GI-FP16-NEXT:    fcvtzu x10, h1
-; CHECK-GI-FP16-NEXT:    mov h1, v0.h[5]
-; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-FP16-NEXT:    mov x11, x8
-; CHECK-GI-FP16-NEXT:    str x12, [x9, #25]!
-; CHECK-GI-FP16-NEXT:    fcvtzu x12, h2
-; CHECK-GI-FP16-NEXT:    str wzr, [x9, #8]
-; CHECK-GI-FP16-NEXT:    mov h2, v0.h[6]
-; CHECK-GI-FP16-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x9, #12]
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h1
-; CHECK-GI-FP16-NEXT:    str x10, [x11, #37]!
-; CHECK-GI-FP16-NEXT:    mov x10, x8
-; CHECK-GI-FP16-NEXT:    str wzr, [x11, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x11, #12]
-; CHECK-GI-FP16-NEXT:    fcvtzu x11, h2
-; CHECK-GI-FP16-NEXT:    str x12, [x10, #50]!
-; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
-; CHECK-GI-FP16-NEXT:    mov x10, x8
-; CHECK-GI-FP16-NEXT:    str x9, [x10, #62]!
-; CHECK-GI-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
-; CHECK-GI-FP16-NEXT:    mov x10, x8
-; CHECK-GI-FP16-NEXT:    str x11, [x10, #75]!
-; CHECK-GI-FP16-NEXT:    str wzr, [x10, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x10, #12]
-; CHECK-GI-FP16-NEXT:    str x9, [x8, #87]!
-; CHECK-GI-FP16-NEXT:    str wzr, [x8, #8]
-; CHECK-GI-FP16-NEXT:    strb wzr, [x8, #12]
-; CHECK-GI-FP16-NEXT:    ret
+; CHECK-LABEL: test_unsigned_v8f16_v8i100:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #176
+; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 176
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w23, -40
+; CHECK-NEXT:    .cfi_offset w24, -48
+; CHECK-NEXT:    .cfi_offset w25, -56
+; CHECK-NEXT:    .cfi_offset w26, -64
+; CHECK-NEXT:    .cfi_offset w27, -72
+; CHECK-NEXT:    .cfi_offset w28, -80
+; CHECK-NEXT:    .cfi_offset w30, -88
+; CHECK-NEXT:    .cfi_offset w29, -96
+; CHECK-NEXT:    .cfi_offset b8, -104
+; CHECK-NEXT:    .cfi_offset b9, -112
+; CHECK-NEXT:    str q0, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    mov x19, x8
+; CHECK-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    mov h0, v0.h[1]
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w8, #1904214015 // =0x717fffff
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    fmov s9, w8
+; CHECK-NEXT:    mov x23, #68719476735 // =0xfffffffff
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    csel x9, xzr, x0, lt
+; CHECK-NEXT:    csel x8, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x10, x23, x8, gt
+; CHECK-NEXT:    csinv x8, x9, xzr, le
+; CHECK-NEXT:    stp x8, x10, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x9, x23, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    stp x8, x9, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    csel x25, x23, x9, gt
+; CHECK-NEXT:    str x8, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov h0, v0.h[1]
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x26, x23, x9, gt
+; CHECK-NEXT:    csinv x28, x8, xzr, le
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x29, x23, x9, gt
+; CHECK-NEXT:    csinv x20, x8, xzr, le
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x21, x23, x9, gt
+; CHECK-NEXT:    csinv x27, x8, xzr, le
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    ldr q0, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    fcvt s8, h0
+; CHECK-NEXT:    csel x22, x23, x9, gt
+; CHECK-NEXT:    csinv x24, x8, xzr, le
+; CHECK-NEXT:    fmov s0, s8
+; CHECK-NEXT:    bl __fixunssfti
+; CHECK-NEXT:    extr x8, x21, x27, #28
+; CHECK-NEXT:    extr x9, x29, x20, #28
+; CHECK-NEXT:    stur x28, [x19, #75]
+; CHECK-NEXT:    fcmp s8, #0.0
+; CHECK-NEXT:    bfi x22, x20, #36, #28
+; CHECK-NEXT:    lsr x11, x29, #28
+; CHECK-NEXT:    stur x8, [x19, #41]
+; CHECK-NEXT:    str x9, [x19, #16]
+; CHECK-NEXT:    ldr x10, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    csel x8, xzr, x0, lt
+; CHECK-NEXT:    csel x9, xzr, x1, lt
+; CHECK-NEXT:    fcmp s8, s9
+; CHECK-NEXT:    stp x24, x22, [x19]
+; CHECK-NEXT:    stur x10, [x19, #50]
+; CHECK-NEXT:    lsr x10, x21, #28
+; CHECK-NEXT:    strb w11, [x19, #24]
+; CHECK-NEXT:    strb w10, [x19, #49]
+; CHECK-NEXT:    csel x9, x23, x9, gt
+; CHECK-NEXT:    csinv x8, x8, xzr, le
+; CHECK-NEXT:    ldp x12, x11, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    bfi x9, x27, #36, #28
+; CHECK-NEXT:    stur x8, [x19, #25]
+; CHECK-NEXT:    stur x9, [x19, #33]
+; CHECK-NEXT:    extr x10, x11, x12, #28
+; CHECK-NEXT:    bfi x26, x12, #36, #28
+; CHECK-NEXT:    stur x10, [x19, #91]
+; CHECK-NEXT:    ldp x10, x9, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    stur x26, [x19, #83]
+; CHECK-NEXT:    extr x8, x9, x10, #28
+; CHECK-NEXT:    bfi x25, x10, #36, #28
+; CHECK-NEXT:    lsr x9, x9, #28
+; CHECK-NEXT:    stur x8, [x19, #66]
+; CHECK-NEXT:    lsr x8, x11, #28
+; CHECK-NEXT:    stur x25, [x19, #58]
+; CHECK-NEXT:    strb w8, [x19, #99]
+; CHECK-NEXT:    strb w9, [x19, #74]
+; CHECK-NEXT:    ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x29, x30, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #176
+; CHECK-NEXT:    ret
     %x = call <8 x i100> @llvm.fptoui.sat.v8f16.v8i100(<8 x half> %f)
     ret <8 x i100> %x
 }

_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to