[llvm-branch-commits] [llvm] e8f70c9 - Revert "[DAG] Enable bitcast STLF for Constant/Undef (#172523)"

via llvm-branch-commits Fri, 30 Jan 2026 03:48:05 -0800

Author: Alex Bradbury
Date: 2026-01-30T11:47:52Z
New Revision: e8f70c9350b50f4ea6705d532a3c4370fbe3827a


URL: 
https://github.com/llvm/llvm-project/commit/e8f70c9350b50f4ea6705d532a3c4370fbe3827a
DIFF: 
https://github.com/llvm/llvm-project/commit/e8f70c9350b50f4ea6705d532a3c4370fbe3827a.diff

LOG: Revert "[DAG] Enable bitcast STLF for Constant/Undef (#172523)"

This reverts commit d3c64633c3a17edd370280eb09668c8c3927383e.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
    llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
    llvm/test/CodeGen/AArch64/pr161013.ll
    llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
    
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
    llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
    llvm/test/CodeGen/AArch64/v3f-to-int.ll
    llvm/test/CodeGen/PowerPC/vsx-p9.ll
    llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
    llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
    llvm/test/CodeGen/X86/pr30290.ll
    llvm/test/CodeGen/X86/pr38533.ll
    llvm/test/CodeGen/X86/vectorcall.ll

Removed: 
    llvm/test/CodeGen/AArch64/sve-stlf.ll
    llvm/test/CodeGen/RISCV/rvv/stlf.ll
    llvm/test/CodeGen/X86/dag-stlf-mismatch.ll


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d1de43318bd09..b8a61f0f63758 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -20747,32 +20747,9 @@ SDValue 
DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
     if (!isTypeLegal(LDMemType))
       break;
     if (STMemType != LDMemType) {
-      if (LdMemSize == StMemSize) {
-        if (TLI.isOperationLegal(ISD::BITCAST, LDMemType) &&
-            isTypeLegal(LDMemType) &&
-            TLI.isOperationLegal(ISD::BITCAST, STMemType) &&
-            isTypeLegal(STMemType) &&
-            TLI.isLoadBitCastBeneficial(LDMemType, STMemType, DAG,
-                                        *LD->getMemOperand()))
-          Val = DAG.getBitcast(LDMemType, Val);
-        else
-          break;
-      } else if (LDMemType.isVector()) {
-        EVT EltVT = LDMemType.getVectorElementType();
-        uint64_t EltSize = EltVT.getSizeInBits();
-
-        if (!StMemSize.isKnownMultipleOf(EltSize))
-          break;
-
-        EVT InterVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                                       StMemSize.divideCoefficientBy(EltSize));
-        if (!TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, InterVT))
-          break;
-
-        Val = DAG.getExtractSubvector(SDLoc(LD), LDMemType,
-                                      DAG.getBitcast(InterVT, Val), 0);
-      } else if (!STMemType.isVector() && !LDMemType.isVector() &&
-                 STMemType.isInteger() && LDMemType.isInteger())
+      // TODO: Support vectors? This requires extract_subvector/bitcast.
+      if (!STMemType.isVector() && !LDMemType.isVector() &&
+          STMemType.isInteger() && LDMemType.isInteger())
         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
       else
         break;

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll 
b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 2c1b735ffe28c..35ffc99f7a405 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -508,8 +508,8 @@ define <4 x i8> @small_vector(<4 x i8> %0) {
 ; CHECK-NEXT:     add  x29, sp, #176
 ; CHECK-NEXT:     .seh_add_fp  176
 ; CHECK-NEXT:     .seh_endprologue
-; CHECK-NEXT:     fmov s0, w0
 ; CHECK-NEXT:     str  w0, [sp, #12]
+; CHECK-NEXT:     ldr  s0, [sp, #12]
 ; CHECK-NEXT:     ushll        v0.8h, v0.8b, #0
 ; CHECK-NEXT:                                           // kill: def $d0 
killed $d0 killed $q0
 ; CHECK-NEXT:     blr  x9

diff  --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll 
b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index 6fba6a3974574..dc352244deeef 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -477,8 +477,8 @@ declare <4 x i8> @small_vector(<4 x i8> %0) nounwind;
 ; CHECK-NEXT:     fmov w0, s0
 ; CHECK-NEXT:     stur s0, [x29, #-4]
 ; CHECK-NEXT:     blr  x16
-; CHECK-NEXT:     fmov s0, w8
 ; CHECK-NEXT:     stur w8, [x29, #-8]
+; CHECK-NEXT:     ldur s0, [x29, #-8]
 ; CHECK-NEXT:     ushll        v0.8h, v0.8b, #0
 ; CHECK-NEXT:                                           // kill: def $d0 
killed $d0 killed $q0
 ; CHECK-NEXT:     .seh_startepilogue

diff  --git a/llvm/test/CodeGen/AArch64/pr161013.ll 
b/llvm/test/CodeGen/AArch64/pr161013.ll
index c493a80ce2edb..d163914f1ac0e 100644
--- a/llvm/test/CodeGen/AArch64/pr161013.ll
+++ b/llvm/test/CodeGen/AArch64/pr161013.ll
@@ -6,7 +6,8 @@ define <16 x i4> @avir_v2i4_v16i4(<2 x i4> %arg) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    uzp1 v0.4h, v0.4h, v0.4h
-; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    str d0, [sp, #8]
+; CHECK-NEXT:    ldr x8, [sp, #8]
 ; CHECK-NEXT:    and w10, w8, #0xf
 ; CHECK-NEXT:    ubfx w9, w8, #4, #4
 ; CHECK-NEXT:    fmov s0, w10

diff  --git a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll 
b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
index 8fe8873ec3e0a..8620c9a34b5d6 100644
--- a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
+++ b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll
@@ -62,6 +62,7 @@ define <vscale x 4 x i32> @sti64ldi32(ptr nocapture %P, 
<vscale x 2 x i64> %v) {
 ; CHECK-LABEL: sti64ldi32:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str z0, [x0, #1, mul vl]
+; CHECK-NEXT:    ldr z0, [x0, #1, mul vl]
 ; CHECK-NEXT:    ret
 entry:
   %0 = bitcast ptr %P to ptr

diff  --git a/llvm/test/CodeGen/AArch64/sve-stlf.ll 
b/llvm/test/CodeGen/AArch64/sve-stlf.ll
deleted file mode 100644
index 7403da8298312..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-stlf.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
-; RUN: llc < %s -mtriple=aarch64 -mattr=+sve | FileCheck %s
-
-define <vscale x 4 x i32> @test_stlf_scalable(ptr %p, <vscale x 4 x i32> %v) {
-; CHECK-LABEL: test_stlf_scalable:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    str z0, [x0]
-; CHECK-NEXT:    ret
-  store <vscale x 4 x i32> %v, ptr %p
-  %res = load <vscale x 4 x i32>, ptr %p
-  ret <vscale x 4 x i32> %res
-}

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
index baaf8c3d906c9..39f8aa104f484 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll
@@ -639,7 +639,8 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fmov x8, d0
 ; NONEON-NOSVE-NEXT:    clz x8, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.ctlz.v1i64(<1 x i64> %op)
@@ -2308,7 +2309,8 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) {
 ; NONEON-NOSVE-NEXT:    and x9, x9, #0xf0f0f0f0f0f0f0f
 ; NONEON-NOSVE-NEXT:    mul x8, x9, x8
 ; NONEON-NOSVE-NEXT:    lsr x8, x8, #56
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.ctpop.v1i64(<1 x i64> %op)
@@ -3187,7 +3189,8 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d0
 ; NONEON-NOSVE-NEXT:    rbit x8, x8
 ; NONEON-NOSVE-NEXT:    clz x8, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %op)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index 85b09facb3285..72ea2f9bf3d87 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -451,7 +451,8 @@ define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x 
double> %op2) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fcmp d0, d1
 ; NONEON-NOSVE-NEXT:    csetm x8, eq
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %cmp = fcmp oeq <1 x double> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
index 4aea35dad369a..6c29666890bef 100644
--- 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
+++ 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-extend-trunc.ll
@@ -415,6 +415,8 @@ define void @fcvt_v1f16_v1f64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    ldr h0, [x0]
 ; NONEON-NOSVE-NEXT:    fcvt d0, h0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    str d0, [x1]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
index 3f6050c079f9b..fddd5df323e46 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
@@ -591,6 +591,8 @@ define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x 
double> %op2, <1 x double
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fmadd d0, d0, d1, d2
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %mul = fmul contract <1 x double> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 369b698f23eaf..e5266eb95f697 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -392,6 +392,8 @@ define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x 
double> %op2) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fmaxnm d0, d0, d1
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> 
%op2)
@@ -851,6 +853,8 @@ define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x 
double> %op2) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fminnm d0, d0, d1
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> 
%op2)
@@ -1310,6 +1314,8 @@ define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x 
double> %op2) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fmax d0, d0, d1
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> 
%op2)
@@ -1769,6 +1775,8 @@ define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x 
double> %op2) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fmin d0, d0, d1
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> 
%op2)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index f278423f9f85a..443cb93aa8ca1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -355,6 +355,8 @@ define <1 x double> @frintp_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frintp d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
@@ -769,6 +771,8 @@ define <1 x double> @frintm_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frintm d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
@@ -1183,6 +1187,8 @@ define <1 x double> @frinti_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frinti d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
@@ -1597,6 +1603,8 @@ define <1 x double> @frintx_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frintx d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
@@ -2011,6 +2019,8 @@ define <1 x double> @frinta_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frinta d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
@@ -2425,6 +2435,8 @@ define <1 x double> @frintn_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frintn d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
@@ -2839,6 +2851,8 @@ define <1 x double> @frintz_v1f64(<1 x double> %op) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    frintz d0, d0
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index 11446df7dbe4f..eced98dda447a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -379,6 +379,8 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x 
double> %op2, i1 %mask
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    tst w0, #0x1
 ; NONEON-NOSVE-NEXT:    fcsel d0, d0, d1, ne
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index 76aa8e45ccda3..21a26921ab031 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -433,7 +433,8 @@ define <1 x i64> @fcvtzu_v1f16_v1i64(<1 x half> %op1) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvtzu x8, s0
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = fptoui <1 x half> %op1 to <1 x i64>
@@ -1638,7 +1639,8 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fcvtzu x8, d0
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = fptoui <1 x double> %op1 to <1 x i64>
@@ -2131,7 +2133,8 @@ define <1 x i64> @fcvtzs_v1f16_v1i64(<1 x half> %op1) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fcvt s0, h0
 ; NONEON-NOSVE-NEXT:    fcvtzs x8, s0
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = fptosi <1 x half> %op1 to <1 x i64>
@@ -3339,7 +3342,8 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fcvtzs x8, d0
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = fptosi <1 x double> %op1 to <1 x i64>

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 1030e96939852..a08b71ce83ec1 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -482,6 +482,8 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x 
double> %op2, <1 x i1>
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    tst w0, #0x1
 ; NONEON-NOSVE-NEXT:    fcsel d0, d0, d1, ne
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
index 5f21c80c2fdd0..ad00e99b704dd 100644
--- 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
+++ 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll
@@ -426,7 +426,8 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    mov w8, #5 // =0x5
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
     %r = insertelement <1 x i64> %op1, i64 5, i64 0
@@ -759,7 +760,9 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) 
{
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    fmov d0, #5.00000000
+; NONEON-NOSVE-NEXT:    mov x8, #4617315517961601024 // =0x4014000000000000
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
     %r = insertelement <1 x double> %op1, double 5.0, i64 0

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
index 28980d3f08579..63b0242264678 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll
@@ -658,7 +658,8 @@ define <1 x i64> @add_v1i64(<1 x i64> %op1, <1 x i64> %op2) 
{
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    add x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = add <1 x i64> %op1, %op2
@@ -1461,7 +1462,8 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    mul x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = mul <1 x i64> %op1, %op2
@@ -2201,7 +2203,8 @@ define <1 x i64> @sub_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    sub x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = sub <1 x i64> %op1, %op2
@@ -2922,7 +2925,8 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d0
 ; NONEON-NOSVE-NEXT:    cmp x8, #0
 ; NONEON-NOSVE-NEXT:    cneg x8, x8, mi
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
index d24f4144d4983..b5c8461bf8a78 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll
@@ -723,7 +723,8 @@ define <1 x i64> @icmp_eq_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    cmp x9, x8
 ; NONEON-NOSVE-NEXT:    csetm x8, eq
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %cmp = icmp eq <1 x i64> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
index d22274e8312ca..7b99ffd79666f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll
@@ -816,7 +816,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    sdiv x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = sdiv <1 x i64> %op1, %op2
@@ -1697,7 +1698,8 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    udiv x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = udiv <1 x i64> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
index 9e8f56d127149..3a6981118be2a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll
@@ -599,7 +599,8 @@ define <1 x i64> @and_v1i64(<1 x i64> %op1, <1 x i64> %op2) 
{
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    and x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = and <1 x i64> %op1, %op2
@@ -1267,7 +1268,8 @@ define <1 x i64> @or_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    orr x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = or <1 x i64> %op1, %op2
@@ -1935,7 +1937,8 @@ define <1 x i64> @xor_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    eor x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = xor <1 x i64> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
index 2118b61d2667f..1ff2ab9b249b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll
@@ -702,7 +702,8 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    cmp x9, x8
 ; NONEON-NOSVE-NEXT:    csel x8, x9, x8, gt
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
@@ -1479,7 +1480,8 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    cmp x9, x8
 ; NONEON-NOSVE-NEXT:    csel x8, x9, x8, lt
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %op1, <1 x i64> %op2)
@@ -2256,7 +2258,8 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    cmp x9, x8
 ; NONEON-NOSVE-NEXT:    csel x8, x9, x8, hi
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %op1, <1 x i64> %op2)
@@ -3033,7 +3036,8 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    cmp x9, x8
 ; NONEON-NOSVE-NEXT:    csel x8, x9, x8, lo
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %op1, <1 x i64> %op2)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
index 05051a43484f5..951bc8b93c595 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll
@@ -1043,7 +1043,8 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d0
 ; NONEON-NOSVE-NEXT:    fmov x9, d1
 ; NONEON-NOSVE-NEXT:    smulh x8, x8, x9
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %1 = sext <1 x i64> %op1 to <1 x i128>
@@ -1074,7 +1075,9 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    ldp x11, x10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    smulh x8, x8, x10
 ; NONEON-NOSVE-NEXT:    smulh x9, x9, x11
-; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #64
 ; NONEON-NOSVE-NEXT:    ret
@@ -1122,8 +1125,12 @@ define void @smulh_v4i64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    smulh x11, x11, x13
 ; NONEON-NOSVE-NEXT:    smulh x8, x8, x12
 ; NONEON-NOSVE-NEXT:    smulh x9, x9, x14
-; NONEON-NOSVE-NEXT:    stp x11, x10, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #112]
+; NONEON-NOSVE-NEXT:    stp x11, x10, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #128
@@ -2171,7 +2178,8 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d0
 ; NONEON-NOSVE-NEXT:    fmov x9, d1
 ; NONEON-NOSVE-NEXT:    umulh x8, x8, x9
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %1 = zext <1 x i64> %op1 to <1 x i128>
@@ -2202,7 +2210,9 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    ldp x11, x10, [sp, #16]
 ; NONEON-NOSVE-NEXT:    umulh x8, x8, x10
 ; NONEON-NOSVE-NEXT:    umulh x9, x9, x11
-; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #48]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #32]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #32]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #64
 ; NONEON-NOSVE-NEXT:    ret
@@ -2250,8 +2260,12 @@ define void @umulh_v4i64(ptr %a, ptr %b) {
 ; NONEON-NOSVE-NEXT:    umulh x11, x11, x13
 ; NONEON-NOSVE-NEXT:    umulh x8, x8, x12
 ; NONEON-NOSVE-NEXT:    umulh x9, x9, x14
-; NONEON-NOSVE-NEXT:    stp x11, x10, [sp, #96]
-; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #112]
+; NONEON-NOSVE-NEXT:    stp x11, x10, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #80]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #80]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #112]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp, #64]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
 ; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #128

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
index cf8cd220c3287..48da301dbc37c 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll
@@ -937,7 +937,8 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    sdiv x10, x9, x8
 ; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = srem <1 x i64> %op1, %op2
@@ -1957,7 +1958,8 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    udiv x10, x9, x8
 ; NONEON-NOSVE-NEXT:    msub x8, x10, x8, x9
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = urem <1 x i64> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
index 1c637bbb31889..e91d78a75cb85 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll
@@ -718,7 +718,8 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> 
%op2, i1 %mask) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    tst w0, #0x1
 ; NONEON-NOSVE-NEXT:    csel x8, x9, x8, ne
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
index 49be29cdc143e..5d8466e31abff 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll
@@ -667,7 +667,8 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    asr x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = ashr <1 x i64> %op1, %op2
@@ -1402,7 +1403,8 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    lsr x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = lshr <1 x i64> %op1, %op2
@@ -2135,7 +2137,8 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> 
%op2) {
 ; NONEON-NOSVE-NEXT:    fmov x8, d1
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    lsl x8, x9, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = shl <1 x i64> %op1, %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
index 1901f24ef8167..4379194b306dc 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll
@@ -404,7 +404,8 @@ define <1 x double> @ucvtf_v1i16_v1f64(<1 x i16> %op1) {
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ucvtf d0, w8
-; NONEON-NOSVE-NEXT:    add sp, sp, #16
+; NONEON-NOSVE-NEXT:    str d0, [sp]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp], #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = uitofp <1 x i16> %op1 to <1 x double>
   ret <1 x double> %res

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 509cac0248a0a..d5bbbfa9899da 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -950,7 +950,8 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> 
%op2, <1 x i1> %mask) {
 ; NONEON-NOSVE-NEXT:    fmov x9, d0
 ; NONEON-NOSVE-NEXT:    tst w0, #0x1
 ; NONEON-NOSVE-NEXT:    csel x8, x9, x8, ne
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 1306331d2e44a..d74faa9f4fe09 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -81,7 +81,9 @@ define void @alloc_v6i8(ptr %st_ptr) nounwind {
 ; NONEON-NOSVE-NEXT:    mov x19, x0
 ; NONEON-NOSVE-NEXT:    add x0, sp, #24
 ; NONEON-NOSVE-NEXT:    bl def
-; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
+; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24]
+; NONEON-NOSVE-NEXT:    str x8, [sp]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp]
 ; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    ldrb w8, [sp, #11]
 ; NONEON-NOSVE-NEXT:    strb w8, [sp, #21]

diff  --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
index 8588b7a456405..cdee37c66ea81 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll
@@ -638,7 +638,8 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) {
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
 ; NONEON-NOSVE-NEXT:    fmov x8, d0
 ; NONEON-NOSVE-NEXT:    rbit x8, x8
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = call <1 x i64> @llvm.bitreverse.v1i64(<1 x i64> %op)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
index 5ca23889cf2d8..1f6c346ecc90f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll
@@ -847,7 +847,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) {
 ; NONEON-NOSVE-NEXT:    asr x9, x8, #63
 ; NONEON-NOSVE-NEXT:    add x8, x8, x9, lsr #59
 ; NONEON-NOSVE-NEXT:    asr x8, x8, #5
-; NONEON-NOSVE-NEXT:    fmov d0, x8
+; NONEON-NOSVE-NEXT:    str x8, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x 
i64> poison, i64 32, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
index d7b08e6fbd270..335a1f88ad3cf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-splat-vector.ll
@@ -302,7 +302,8 @@ define <1 x i64> @splat_v1i64(i64 %a) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    fmov d0, x0
+; NONEON-NOSVE-NEXT:    str x0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %insert = insertelement <1 x i64> poison, i64 %a, i64 0
@@ -531,6 +532,8 @@ define <1 x double> @splat_v1f64(double %a, <1 x double> 
%op2) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
+; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   %insert = insertelement <1 x double> poison, double %a, i64 0

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
index d2e9fd79e50a9..17579d79896da 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-stores.ll
@@ -314,7 +314,9 @@ define void @store_v1i64(ptr %a) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    str xzr, [x0]
+; NONEON-NOSVE-NEXT:    str xzr, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    str d0, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   store <1 x i64> zeroinitializer, ptr %a
@@ -332,7 +334,9 @@ define void @store_v1f64(ptr %a) {
 ; NONEON-NOSVE:       // %bb.0:
 ; NONEON-NOSVE-NEXT:    sub sp, sp, #16
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
-; NONEON-NOSVE-NEXT:    str xzr, [x0]
+; NONEON-NOSVE-NEXT:    str xzr, [sp, #8]
+; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
+; NONEON-NOSVE-NEXT:    str d0, [x0]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #16
 ; NONEON-NOSVE-NEXT:    ret
   store <1 x double> zeroinitializer, ptr %a

diff  --git 
a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll 
b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
index 0eb805d580d73..ae2b0d238ebd7 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll
@@ -138,11 +138,12 @@ define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) {
 ;
 ; NONEON-NOSVE-LABEL: store_trunc_v2i256i64:
 ; NONEON-NOSVE:       // %bb.0:
-; NONEON-NOSVE-NEXT:    sub sp, sp, #32
+; NONEON-NOSVE-NEXT:    ldr x8, [x0, #32]
+; NONEON-NOSVE-NEXT:    ldr x9, [x0]
+; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #-32]!
 ; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
-; NONEON-NOSVE-NEXT:    ldr d0, [x0, #32]
-; NONEON-NOSVE-NEXT:    ldr d1, [x0]
-; NONEON-NOSVE-NEXT:    stp d1, d0, [sp, #16]
+; NONEON-NOSVE-NEXT:    ldp d0, d1, [sp]
+; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
 ; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
 ; NONEON-NOSVE-NEXT:    str q0, [x1]
 ; NONEON-NOSVE-NEXT:    add sp, sp, #32

diff  --git a/llvm/test/CodeGen/AArch64/v3f-to-int.ll 
b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
index 25fa4c35bcff8..6d4061fb02cff 100644
--- a/llvm/test/CodeGen/AArch64/v3f-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/v3f-to-int.ll
@@ -6,8 +6,11 @@ define void @convert_v3f32() {
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #16
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    str wzr, [sp, #12]
+; CHECK-NEXT:    ldr s0, [sp, #12]
 ; CHECK-NEXT:    strb wzr, [x8]
-; CHECK-NEXT:    strh wzr, [x8]
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    str h0, [x8]
 ; CHECK-NEXT:    add sp, sp, #16
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/PowerPC/vsx-p9.ll 
b/llvm/test/CodeGen/PowerPC/vsx-p9.ll
index 0a62a284aa45f..b7598c6de4c10 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-p9.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-p9.ll
@@ -137,8 +137,8 @@ entry:
   tail call void (...) @sink(<2 x double> %add.i12)
 ; CHECK: lxv 0, 0(3)
 ; CHECK: lxv 1, 0(3)
-; CHECK: xvadddp {{[0-9]+}}, 0, 1
-; CHECK: stxv {{[0-9]+}},
+; CHECK: xvadddp 0, 0, 1
+; CHECK: stxv 0,
 ; CHECK: bl sink
   ret void
 }

diff  --git a/llvm/test/CodeGen/RISCV/rvv/stlf.ll 
b/llvm/test/CodeGen/RISCV/rvv/stlf.ll
deleted file mode 100644
index 171271a9b8967..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/stlf.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
-
-define <vscale x 4 x i32> @test_stlf_riscv_scalable(ptr %p, <vscale x 4 x i32> 
%v) {
-; CHECK-LABEL: test_stlf_riscv_scalable:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vs2r.v v8, (a0)
-; CHECK-NEXT:    ret
-  store <vscale x 4 x i32> %v, ptr %p
-  %res = load <vscale x 4 x i32>, ptr %p
-  ret <vscale x 4 x i32> %res
-}

diff  --git a/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll 
b/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
index e7bbd91d98fab..e8b4d3f6812d2 100644
--- a/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
+++ b/llvm/test/CodeGen/X86/atomic-non-integer-fp128.ll
@@ -55,6 +55,8 @@ define fp128 @load_fp128(ptr %fptr) {
 ; X64-AVX-LABEL: load_fp128:
 ; X64-AVX:       # %bb.0:
 ; X64-AVX-NEXT:    vmovaps (%rdi), %xmm0
+; X64-AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
 ; X64-AVX-NEXT:    retq
   %v = load atomic fp128, ptr %fptr unordered, align 16
   ret fp128 %v

diff  --git a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll 
b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
index c7200850af699..12ce721b8c5d5 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/shuffle-chained-bf16.ll
@@ -37,8 +37,11 @@ define <2 x bfloat> @shuffle_chained_v16bf16(<16 x bfloat> 
%a) {
 ; CHECK-NEXT:    .cfi_def_cfa_register %rbp
 ; CHECK-NEXT:    andq $-32, %rsp
 ; CHECK-NEXT:    subq $96, %rsp
-; CHECK-NEXT:    vmovdqa %ymm0, (%rsp)
+; CHECK-NEXT:    vmovaps %ymm0, (%rsp)
+; CHECK-NEXT:    vmovdqa (%rsp), %xmm0
 ; CHECK-NEXT:    vpunpcklwd {{.*#+}} xmm0 = 
xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; CHECK-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-NEXT:    movq %rbp, %rsp
 ; CHECK-NEXT:    popq %rbp
 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8

diff  --git a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll 
b/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
deleted file mode 100644
index a1ee713b32032..0000000000000
--- a/llvm/test/CodeGen/X86/dag-stlf-mismatch.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 6
-; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s
-
-%struct.Data = type { float }
-
-define float @test_stlf_integer(ptr %p, float %v) {
-; CHECK-LABEL: test_stlf_integer:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl $0, (%rdi)
-; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    mulss %xmm1, %xmm0
-; CHECK-NEXT:    retq
-  store i32 0, ptr %p, align 4
-  %f = load float, ptr %p, align 4
-  %r = fmul float %f, %v
-  ret float %r
-}
-
-define float @test_stlf_vector(ptr %p, float %v) {
-; CHECK-LABEL: test_stlf_vector:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    movups %xmm1, (%rdi)
-; CHECK-NEXT:    mulss (%rdi), %xmm0
-; CHECK-NEXT:    retq
-  store <4 x float> zeroinitializer, ptr %p, align 4
-  %f = load float, ptr %p, align 4
-  %r = fmul float %f, %v
-  ret float %r
-}
-
-define float @test_stlf_bitcast(ptr %p, float %v) {
-; CHECK-LABEL: test_stlf_bitcast:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorps %xmm1, %xmm1
-; CHECK-NEXT:    movups %xmm1, (%rdi)
-; CHECK-NEXT:    mulss (%rdi), %xmm0
-; CHECK-NEXT:    retq
-  store <2 x i64> zeroinitializer, ptr %p, align 4
-  %f = load float, ptr %p, align 4
-  %r = fmul float %f, %v
-  ret float %r
-}
-
-declare void @ext_func(ptr byval(%struct.Data) align 4 %p)
-define void @test_stlf_late_byval(ptr %ptr) nounwind {
-; CHECK-LABEL: test_stlf_late_byval:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movl $0, (%rdi)
-; CHECK-NEXT:    movl $0, (%rsp)
-; CHECK-NEXT:    callq ext_func@PLT
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
-  store i32 0, ptr %ptr, align 4
-  call void @ext_func(ptr byval(%struct.Data) align 4 %ptr)
-  ret void
-}
-
-define float @test_stlf_variable(ptr %p, i32 %val, float %v) {
-; CHECK-LABEL: test_stlf_variable:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movd %esi, %xmm1
-; CHECK-NEXT:    movl %esi, (%rdi)
-; CHECK-NEXT:    mulss %xmm1, %xmm0
-; CHECK-NEXT:    retq
-  store i32 %val, ptr %p, align 4
-  %f = load float, ptr %p, align 4
-  %r = fmul float %f, %v
-  ret float %r
-}

diff  --git a/llvm/test/CodeGen/X86/pr30290.ll 
b/llvm/test/CodeGen/X86/pr30290.ll
index 1cf0947e03ec6..74e553191331f 100644
--- a/llvm/test/CodeGen/X86/pr30290.ll
+++ b/llvm/test/CodeGen/X86/pr30290.ll
@@ -23,8 +23,9 @@ define void @foo(ptr byval(%struct.face) nocapture align 8) 
local_unnamed_addr {
 ; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,1,1]
 ; CHECK-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    movl $1, {{[0-9]+}}(%rsp)
-; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm1
-; CHECK-NEXT:    vmovups %xmm1, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
+; CHECK-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0
 ; CHECK-NEXT:    vmovups %xmm0, (%rsp)
 ; CHECK-NEXT:    callq bar@PLT
 ; CHECK-NEXT:    addq $40, %rsp

diff  --git a/llvm/test/CodeGen/X86/pr38533.ll 
b/llvm/test/CodeGen/X86/pr38533.ll
index 11db6bfa99207..f1bbb2ffdffd0 100644
--- a/llvm/test/CodeGen/X86/pr38533.ll
+++ b/llvm/test/CodeGen/X86/pr38533.ll
@@ -7,20 +7,23 @@
 define void @constant_fold_vector_to_half() {
 ; SSE2-LABEL: constant_fold_vector_to_half:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    pinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; SSE2-NEXT:    movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
+; SSE2-NEXT:    pinsrw $0, -{{[0-9]+}}(%rsp), %xmm0
 ; SSE2-NEXT:    pextrw $0, %xmm0, %eax
 ; SSE2-NEXT:    movw %ax, (%rax)
 ; SSE2-NEXT:    retq
 ;
 ; AVX512-LABEL: constant_fold_vector_to_half:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vpinsrw $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
+; AVX512-NEXT:    vpinsrw $0, -{{[0-9]+}}(%rsp), %xmm0, %xmm0
 ; AVX512-NEXT:    vpextrw $0, %xmm0, (%rax)
 ; AVX512-NEXT:    retq
 ;
 ; AVX512FP16-LABEL: constant_fold_vector_to_half:
 ; AVX512FP16:       # %bb.0:
-; AVX512FP16-NEXT:    vmovsh {{.*#+}} xmm0 = 
[2.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; AVX512FP16-NEXT:    movw $16384, -{{[0-9]+}}(%rsp) # imm = 0x4000
+; AVX512FP16-NEXT:    vmovsh -{{[0-9]+}}(%rsp), %xmm0
 ; AVX512FP16-NEXT:    vmovsh %xmm0, (%rax)
 ; AVX512FP16-NEXT:    retq
   store volatile half bitcast (<4 x i4> <i4 0, i4 0, i4 0, i4 4> to half), ptr 
undef

diff  --git a/llvm/test/CodeGen/X86/vectorcall.ll 
b/llvm/test/CodeGen/X86/vectorcall.ll
index 9a7d002fc3178..07446c6a7bfa4 100644
--- a/llvm/test/CodeGen/X86/vectorcall.ll
+++ b/llvm/test/CodeGen/X86/vectorcall.ll
@@ -152,10 +152,10 @@ entry:
 
 define x86_vectorcallcc %struct.HVA4 @test_mixed_6(%struct.HVA4 inreg %a, ptr 
%b) {
 ; CHECK-LABEL: test_mixed_6
-; CHECK-DAG:       movaps      (%{{.*}}), %xmm0
-; CHECK-DAG:       movaps      16(%{{.*}}), %xmm1
-; CHECK-DAG:       movaps      32(%{{.*}}), %xmm2
-; CHECK-DAG:       movaps      48(%{{.*}}), %xmm3
+; CHECK:       movaps  (%{{[re]}}sp), %xmm0
+; CHECK:       movaps  16(%{{[re]}}sp), %xmm1
+; CHECK:       movaps  32(%{{[re]}}sp), %xmm2
+; CHECK:       movaps  48(%{{[re]}}sp), %xmm3
 ; CHECK:       ret{{[ql]}}
 entry:
   %retval = alloca %struct.HVA4, align 16


        
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] e8f70c9 - Revert "[DAG] Enable bitcast STLF for Constant/Undef (#172523)"

Reply via email to