https://github.com/folkertdev updated 
https://github.com/llvm/llvm-project/pull/204887

>From 6306970fa9266b0d386a51a477e7aefe958363d7 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <[email protected]>
Date: Fri, 19 Jun 2026 20:05:36 +0200
Subject: [PATCH 1/2] [X86][Windows] Return `fp128` on the stack

This is in line with mingw64 gcc and follows the win64 CC (at least
more)
---
 clang/lib/CodeGen/Targets/X86.cpp             |  14 +-
 clang/test/CodeGen/win-fp128.c                |   4 +-
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp   |  14 +
 .../test/CodeGen/X86/fp128-libcalls-strict.ll | 451 +++++++++++++-----
 llvm/test/CodeGen/X86/fp128-libcalls.ll       | 251 +++++++---
 llvm/test/CodeGen/X86/i128-fp128-abi.ll       | 132 +++--
 6 files changed, 624 insertions(+), 242 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/X86.cpp 
b/clang/lib/CodeGen/Targets/X86.cpp
index dbe4d656aabc5..77c912b021604 100644
--- a/clang/lib/CodeGen/Targets/X86.cpp
+++ b/clang/lib/CodeGen/Targets/X86.cpp
@@ -3437,8 +3437,6 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, 
unsigned &FreeSSERegs,
     case BuiltinType::Int128:
     case BuiltinType::UInt128:
     case BuiltinType::Float128:
-      // 128-bit float and integer types share the same ABI.
-
       // If it's a parameter type, the normal ABI rule is that arguments larger
       // than 8 bytes are passed indirectly. GCC follows it. We follow it too,
       // even though it isn't particularly efficient.
@@ -3449,10 +3447,14 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, 
unsigned &FreeSSERegs,
 
       // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
       // Clang matches them for compatibility.
-      // NOTE: GCC actually returns f128 indirectly but will hopefully change.
-      // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054#c8.
-      return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
-          llvm::Type::getInt64Ty(getVMContext()), 2));
+      if (BT->getKind() == BuiltinType::Int128 ||
+          BT->getKind() == BuiltinType::UInt128)
+        return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
+            llvm::Type::getInt64Ty(getVMContext()), 2));
+
+      // Mingw64 GCC returns f128 via sret. Clang matches that for
+      // compatibility.
+      break;
 
     default:
       break;
diff --git a/clang/test/CodeGen/win-fp128.c b/clang/test/CodeGen/win-fp128.c
index 58e203d4fc8ed..dc144f899fa4f 100644
--- a/clang/test/CodeGen/win-fp128.c
+++ b/clang/test/CodeGen/win-fp128.c
@@ -3,10 +3,10 @@
 // __float128 is unsupported on MSVC
 
 __float128 fp128_ret(void) { return 0; }
-// CHECK-GNU64: define dso_local <2 x i64>  @fp128_ret()
+// CHECK-GNU64: define dso_local fp128 @fp128_ret()
 
 __float128 fp128_args(__float128 a, __float128 b) { return a * b; }
-// CHECK-GNU64: define dso_local <2 x i64> @fp128_args(ptr noundef 
dead_on_return %0, ptr noundef dead_on_return %1)
+// CHECK-GNU64: define dso_local fp128 @fp128_args(ptr noundef dead_on_return 
%0, ptr noundef dead_on_return %1)
 
 void fp128_vararg(int a, ...) {
   // CHECK-GNU64-LABEL: define dso_local void @fp128_vararg
diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp 
b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index 7c068115df481..bce581ad7a48b 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -670,6 +670,20 @@ bool X86TargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
     const Type *RetTy) const {
+  // Mingw64 GCC returns f128 via sret, which matches the documentation of the
+  // Windows x64 calling convention:
+  //
+  // 
https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values
+  //
+  // > Otherwise, the caller must allocate memory for the return value and pass
+  // a pointer to it as the first argument.
+  //
+  // Return false, which will perform sret demotion.
+  if (Subtarget.isCallingConvWin64(CallConv) &&
+      llvm::any_of(
+          Outs, [](const ISD::OutputArg &Out) { return Out.VT == MVT::f128; }))
+    return false;
+
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC_X86);
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll 
b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
index ad2d690fd7ed0..dfff88d30bcd4 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll
@@ -79,15 +79,22 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp {
 ;
 ; WIN-LABEL: add:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __addtf3
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: add:
@@ -201,15 +208,22 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp {
 ;
 ; WIN-LABEL: sub:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __subtf3
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: sub:
@@ -323,15 +337,22 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp {
 ;
 ; WIN-LABEL: mul:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __multf3
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: mul:
@@ -445,15 +466,22 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp {
 ;
 ; WIN-LABEL: div:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __divtf3
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: div:
@@ -568,18 +596,25 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind 
strictfp {
 ;
 ; WIN-LABEL: fma:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $88, %rsp
-; WIN-NEXT:    movaps (%r8), %xmm0
-; WIN-NEXT:    movaps (%rcx), %xmm1
-; WIN-NEXT:    movaps (%rdx), %xmm2
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $96, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%r9), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps (%r8), %xmm2
 ; WIN-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r9
 ; WIN-NEXT:    callq fmal
-; WIN-NEXT:    addq $88, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $96, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: fma:
@@ -694,15 +729,22 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp {
 ;
 ; WIN-LABEL: frem:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq fmodl
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: frem:
@@ -797,12 +839,19 @@ define fp128 @ceil(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: ceil:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq ceill
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: ceil:
@@ -887,12 +936,19 @@ define fp128 @acos(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: acos:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq acosl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: acos:
@@ -977,12 +1033,19 @@ define fp128 @cos(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: cos:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq cosl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: cos:
@@ -1067,12 +1130,19 @@ define fp128 @cosh(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: cosh:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq coshl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: cosh:
@@ -1157,12 +1227,19 @@ define fp128 @exp(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: exp:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq expl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: exp:
@@ -1247,12 +1324,19 @@ define fp128 @exp2(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: exp2:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq exp2l
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: exp2:
@@ -1337,12 +1421,19 @@ define fp128 @floor(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: floor:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq floorl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: floor:
@@ -1427,12 +1518,19 @@ define fp128 @log(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: log:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq logl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: log:
@@ -1517,12 +1615,19 @@ define fp128 @log10(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: log10:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq log10l
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: log10:
@@ -1607,12 +1712,19 @@ define fp128 @log2(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: log2:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq log2l
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: log2:
@@ -1709,15 +1821,22 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind 
strictfp {
 ;
 ; WIN-LABEL: maxnum:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq fmaxl
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: maxnum:
@@ -1824,15 +1943,22 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind 
strictfp {
 ;
 ; WIN-LABEL: minnum:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq fminl
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: minnum:
@@ -1927,12 +2053,19 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: nearbyint:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq nearbyintl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: nearbyint:
@@ -2029,15 +2162,22 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp 
{
 ;
 ; WIN-LABEL: pow:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq powl
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: pow:
@@ -2143,12 +2283,19 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp {
 ;
 ; WIN-LABEL: powi:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq __powitf2
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: powi:
@@ -2237,12 +2384,19 @@ define fp128 @rint(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: rint:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq rintl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: rint:
@@ -2327,12 +2481,19 @@ define fp128 @round(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: round:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq roundl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: round:
@@ -2417,12 +2578,19 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: roundeven:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq roundevenl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: roundeven:
@@ -2507,12 +2675,19 @@ define fp128 @asin(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: asin:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq asinl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: asin:
@@ -2597,12 +2772,19 @@ define fp128 @sin(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: sin:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq sinl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: sin:
@@ -2687,12 +2869,19 @@ define fp128 @sinh(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: sinh:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq sinhl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: sinh:
@@ -2777,12 +2966,19 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: sqrt:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq sqrtl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: sqrt:
@@ -2867,12 +3063,19 @@ define fp128 @atan(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: atan:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq atanl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: atan:
@@ -2969,15 +3172,22 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind 
strictfp {
 ;
 ; WIN-LABEL: atan2:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq atan2l
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: atan2:
@@ -3072,12 +3282,19 @@ define fp128 @tan(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: tan:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq tanl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: tan:
@@ -3162,12 +3379,19 @@ define fp128 @tanh(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: tanh:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq tanhl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: tanh:
@@ -3252,12 +3476,19 @@ define fp128 @trunc(fp128 %x) nounwind strictfp {
 ;
 ; WIN-LABEL: trunc:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq truncl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: trunc:
diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll 
b/llvm/test/CodeGen/X86/fp128-libcalls.ll
index 4b0449fd7502e..c594b15ef1cbe 100644
--- a/llvm/test/CodeGen/X86/fp128-libcalls.ll
+++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll
@@ -78,16 +78,18 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) 
nounwind {
 ;
 ; WIN-LABEL: Test128Add:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps (%rdx), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __addtf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Add:
@@ -207,16 +209,18 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128_1Add:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps vf128(%rip), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __addtf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128_1Add:
@@ -331,16 +335,18 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) 
nounwind {
 ;
 ; WIN-LABEL: Test128Sub:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps (%rdx), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __subtf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Sub:
@@ -460,16 +466,18 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128_1Sub:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps vf128(%rip), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __subtf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128_1Sub:
@@ -584,16 +592,18 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) 
nounwind {
 ;
 ; WIN-LABEL: Test128Mul:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps (%rdx), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __multf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Mul:
@@ -713,16 +723,18 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128_1Mul:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps vf128(%rip), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __multf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128_1Mul:
@@ -837,16 +849,18 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) 
nounwind {
 ;
 ; WIN-LABEL: Test128Div:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps (%rdx), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __divtf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Div:
@@ -966,16 +980,18 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128_1Div:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps vf128(%rip), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq __divtf3
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128_1Div:
@@ -1082,16 +1098,18 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) 
nounwind {
 ;
 ; WIN-LABEL: Test128Rem:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps (%rdx), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Rem:
@@ -1201,16 +1219,18 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind 
{
 ;
 ; WIN-LABEL: Test128_1Rem:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $72, %rsp
+; WIN-NEXT:    subq $88, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps vf128(%rip), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq fmodl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    addq $88, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128_1Rem:
@@ -1303,13 +1323,15 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128Sqrt:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq sqrtl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Sqrt:
@@ -1390,13 +1412,15 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128Sin:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq sinl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Sin:
@@ -1477,13 +1501,15 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128Cos:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq cosl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Cos:
@@ -1564,13 +1590,15 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128Ceil:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq ceill
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Ceil:
@@ -1651,13 +1679,15 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind 
{
 ;
 ; WIN-LABEL: Test128Floor:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq floorl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Floor:
@@ -1738,13 +1768,15 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind 
{
 ;
 ; WIN-LABEL: Test128Trunc:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq truncl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Trunc:
@@ -1825,13 +1857,15 @@ define dso_local void @Test128Nearbyint(fp128 %d1) 
nounwind {
 ;
 ; WIN-LABEL: Test128Nearbyint:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq nearbyintl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Nearbyint:
@@ -1912,13 +1946,15 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind {
 ;
 ; WIN-LABEL: Test128Rint:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq rintl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Rint:
@@ -1999,13 +2035,15 @@ define dso_local void @Test128Round(fp128 %d1) nounwind 
{
 ;
 ; WIN-LABEL: Test128Round:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $56, %rsp
+; WIN-NEXT:    subq $72, %rsp
 ; WIN-NEXT:    movaps (%rcx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq roundl
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 ; WIN-NEXT:    movaps %xmm0, vf128(%rip)
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    addq $72, %rsp
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Round:
@@ -2102,18 +2140,25 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) 
nounwind {
 ;
 ; WIN-LABEL: Test128FMA:
 ; WIN:       # %bb.0: # %entry
-; WIN-NEXT:    subq $88, %rsp
-; WIN-NEXT:    movaps (%r8), %xmm0
-; WIN-NEXT:    movaps (%rcx), %xmm1
-; WIN-NEXT:    movaps (%rdx), %xmm2
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $96, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%r9), %xmm0
+; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    movaps (%r8), %xmm2
 ; WIN-NEXT:    movaps %xmm2, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r9
 ; WIN-NEXT:    callq fmal
-; WIN-NEXT:    addq $88, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $96, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128FMA:
@@ -2211,12 +2256,19 @@ define fp128 @Test128Acos(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Acos:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq acosl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Acos:
@@ -2294,12 +2346,19 @@ define fp128 @Test128Asin(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Asin:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq asinl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Asin:
@@ -2377,12 +2436,19 @@ define fp128 @Test128Atan(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Atan:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq atanl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Atan:
@@ -2472,15 +2538,22 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind 
{
 ;
 ; WIN-LABEL: Test128Atan2:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
-; WIN-NEXT:    movaps (%rdx), %xmm1
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $80, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
+; WIN-NEXT:    movaps (%r8), %xmm1
 ; WIN-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; WIN-NEXT:    callq atan2l
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $80, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Atan2:
@@ -2568,12 +2641,19 @@ define fp128 @Test128Cosh(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Cosh:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq coshl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Cosh:
@@ -2651,12 +2731,19 @@ define fp128 @Test128Sinh(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Sinh:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq sinhl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Sinh:
@@ -2734,12 +2821,19 @@ define fp128 @Test128Tan(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Tan:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq tanl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Tan:
@@ -2817,12 +2911,19 @@ define fp128 @Test128Tanh(fp128 %a) nounwind {
 ;
 ; WIN-LABEL: Test128Tanh:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $56, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
+; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq tanhl
-; WIN-NEXT:    addq $56, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Tanh:
@@ -2916,14 +3017,20 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind 
{
 ;
 ; WIN-LABEL: Test128Modf:
 ; WIN:       # %bb.0:
-; WIN-NEXT:    subq $72, %rsp
-; WIN-NEXT:    movaps (%rcx), %xmm0
+; WIN-NEXT:    pushq %rsi
+; WIN-NEXT:    subq $64, %rsp
+; WIN-NEXT:    movq %rcx, %rsi
+; WIN-NEXT:    movaps (%rdx), %xmm0
 ; WIN-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; WIN-NEXT:    leaq 16(%rcx), %r8
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
 ; WIN-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
 ; WIN-NEXT:    callq modfl
-; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
-; WIN-NEXT:    addq $72, %rsp
+; WIN-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
+; WIN-NEXT:    movaps %xmm0, (%rsi)
+; WIN-NEXT:    movq %rsi, %rax
+; WIN-NEXT:    addq $64, %rsp
+; WIN-NEXT:    popq %rsi
 ; WIN-NEXT:    retq
 ;
 ; WIN-X86-LABEL: Test128Modf:
diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll 
b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
index 2174d5056e6ce..9f385ee2faf4e 100644
--- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll
+++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll
@@ -190,7 +190,9 @@ define PrimTy @return(ptr %p) nounwind {
 ;
 ; CHECK-MSVC64-F128-LABEL: return:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movq %rcx, %rax
+; CHECK-MSVC64-F128-NEXT:    movaps (%rdx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%rcx)
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: return:
@@ -201,7 +203,9 @@ define PrimTy @return(ptr %p) nounwind {
 ;
 ; CHECK-MINGW-F128-LABEL: return:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movq %rcx, %rax
+; CHECK-MINGW-F128-NEXT:    movaps (%rdx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%rcx)
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: return:
@@ -262,7 +266,9 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-MSVC64-F128-LABEL: first_arg:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movq %rcx, %rax
+; CHECK-MSVC64-F128-NEXT:    movaps (%rdx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%rcx)
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: first_arg:
@@ -272,7 +278,9 @@ define PrimTy @first_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-MINGW-F128-LABEL: first_arg:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movq %rcx, %rax
+; CHECK-MINGW-F128-NEXT:    movaps (%rdx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%rcx)
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: first_arg:
@@ -338,8 +346,10 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 
%_3, PrimTy %x) nounw
 ;
 ; CHECK-MSVC64-F128-LABEL: leading_args:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movq 40(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movaps (%rax), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movq %rcx, %rax
+; CHECK-MSVC64-F128-NEXT:    movq 48(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%rax)
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: leading_args:
@@ -350,8 +360,10 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 
%_3, PrimTy %x) nounw
 ;
 ; CHECK-MINGW-F128-LABEL: leading_args:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movq 40(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movaps (%rax), %xmm0
+; CHECK-MINGW-F128-NEXT:    movq %rcx, %rax
+; CHECK-MINGW-F128-NEXT:    movq 48(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%rax)
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: leading_args:
@@ -418,8 +430,10 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 
%_2, i64 %_3, i64 %_4, Pr
 ;
 ; CHECK-MSVC64-F128-LABEL: many_leading_args:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movq 56(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movaps (%rax), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movq %rcx, %rax
+; CHECK-MSVC64-F128-NEXT:    movq 64(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%rax)
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: many_leading_args:
@@ -430,8 +444,10 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 
%_2, i64 %_3, i64 %_4, Pr
 ;
 ; CHECK-MINGW-F128-LABEL: many_leading_args:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movq 56(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movaps (%rax), %xmm0
+; CHECK-MINGW-F128-NEXT:    movq %rcx, %rax
+; CHECK-MINGW-F128-NEXT:    movq 64(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%rax)
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: many_leading_args:
@@ -496,8 +512,10 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 
%_3, i64 %_4, PrimTy
 ;
 ; CHECK-MSVC64-F128-LABEL: trailing_arg:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    movq 48(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movaps (%rax), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movq %rcx, %rax
+; CHECK-MSVC64-F128-NEXT:    movq 56(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, (%rax)
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: trailing_arg:
@@ -508,8 +526,10 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 
%_3, i64 %_4, PrimTy
 ;
 ; CHECK-MINGW-F128-LABEL: trailing_arg:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    movq 48(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movaps (%rax), %xmm0
+; CHECK-MINGW-F128-NEXT:    movq %rcx, %rax
+; CHECK-MINGW-F128-NEXT:    movq 56(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, (%rax)
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: trailing_arg:
@@ -578,12 +598,13 @@ define void @call_first_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-MSVC64-F128-LABEL: call_first_arg:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    subq $56, %rsp
+; CHECK-MSVC64-F128-NEXT:    subq $72, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 32(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq 32(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rcx
+; CHECK-MSVC64-F128-NEXT:    leaq 32(%rsp), %rdx
 ; CHECK-MSVC64-F128-NEXT:    callq first_arg
-; CHECK-MSVC64-F128-NEXT:    addq $56, %rsp
+; CHECK-MSVC64-F128-NEXT:    addq $72, %rsp
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: call_first_arg:
@@ -595,12 +616,13 @@ define void @call_first_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-MINGW-F128-LABEL: call_first_arg:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    subq $56, %rsp
+; CHECK-MINGW-F128-NEXT:    subq $72, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 32(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq 32(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rcx
+; CHECK-MINGW-F128-NEXT:    leaq 32(%rsp), %rdx
 ; CHECK-MINGW-F128-NEXT:    callq first_arg
-; CHECK-MINGW-F128-NEXT:    addq $56, %rsp
+; CHECK-MINGW-F128-NEXT:    addq $72, %rsp
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: call_first_arg:
@@ -682,17 +704,18 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ;
 ; CHECK-MSVC64-F128-LABEL: call_leading_args:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    subq $72, %rsp
+; CHECK-MSVC64-F128-NEXT:    subq $88, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 48(%rsp)
 ; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, 32(%rsp)
-; CHECK-MSVC64-F128-NEXT:    xorl %ecx, %ecx
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movq $0, 32(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 64(%rsp), %rcx
 ; CHECK-MSVC64-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-F128-NEXT:    xorl %r8d, %r8d
 ; CHECK-MSVC64-F128-NEXT:    xorl %r9d, %r9d
 ; CHECK-MSVC64-F128-NEXT:    callq leading_args
-; CHECK-MSVC64-F128-NEXT:    addq $72, %rsp
+; CHECK-MSVC64-F128-NEXT:    addq $88, %rsp
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: call_leading_args:
@@ -710,17 +733,18 @@ define void @call_leading_args(PrimTy %x) nounwind {
 ;
 ; CHECK-MINGW-F128-LABEL: call_leading_args:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    subq $72, %rsp
+; CHECK-MINGW-F128-NEXT:    subq $88, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 48(%rsp)
 ; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, 32(%rsp)
-; CHECK-MINGW-F128-NEXT:    xorl %ecx, %ecx
+; CHECK-MINGW-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MINGW-F128-NEXT:    movq $0, 32(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 64(%rsp), %rcx
 ; CHECK-MINGW-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-F128-NEXT:    xorl %r8d, %r8d
 ; CHECK-MINGW-F128-NEXT:    xorl %r9d, %r9d
 ; CHECK-MINGW-F128-NEXT:    callq leading_args
-; CHECK-MINGW-F128-NEXT:    addq $72, %rsp
+; CHECK-MINGW-F128-NEXT:    addq $88, %rsp
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: call_leading_args:
@@ -831,21 +855,22 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ;
 ; CHECK-MSVC64-F128-LABEL: call_many_leading_args:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    subq $88, %rsp
+; CHECK-MSVC64-F128-NEXT:    subq $120, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    xorps %xmm1, %xmm1
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm1, 64(%rsp)
-; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 48(%rsp)
-; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm1, 80(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 80(%rsp), %rax
 ; CHECK-MSVC64-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 64(%rsp)
 ; CHECK-MSVC64-F128-NEXT:    leaq 64(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, 32(%rsp)
-; CHECK-MSVC64-F128-NEXT:    xorl %ecx, %ecx
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 48(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movq $0, 32(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 96(%rsp), %rcx
 ; CHECK-MSVC64-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-F128-NEXT:    xorl %r8d, %r8d
 ; CHECK-MSVC64-F128-NEXT:    xorl %r9d, %r9d
 ; CHECK-MSVC64-F128-NEXT:    callq many_leading_args
-; CHECK-MSVC64-F128-NEXT:    addq $88, %rsp
+; CHECK-MSVC64-F128-NEXT:    addq $120, %rsp
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: call_many_leading_args:
@@ -865,21 +890,22 @@ define void @call_many_leading_args(PrimTy %x) nounwind {
 ;
 ; CHECK-MINGW-F128-LABEL: call_many_leading_args:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    subq $88, %rsp
+; CHECK-MINGW-F128-NEXT:    subq $120, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MINGW-F128-NEXT:    xorps %xmm1, %xmm1
-; CHECK-MINGW-F128-NEXT:    movaps %xmm1, 64(%rsp)
-; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 48(%rsp)
-; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rax
+; CHECK-MINGW-F128-NEXT:    movaps %xmm1, 80(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 80(%rsp), %rax
 ; CHECK-MINGW-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 64(%rsp)
 ; CHECK-MINGW-F128-NEXT:    leaq 64(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, 32(%rsp)
-; CHECK-MINGW-F128-NEXT:    xorl %ecx, %ecx
+; CHECK-MINGW-F128-NEXT:    movq %rax, 48(%rsp)
+; CHECK-MINGW-F128-NEXT:    movq $0, 32(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 96(%rsp), %rcx
 ; CHECK-MINGW-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-F128-NEXT:    xorl %r8d, %r8d
 ; CHECK-MINGW-F128-NEXT:    xorl %r9d, %r9d
 ; CHECK-MINGW-F128-NEXT:    callq many_leading_args
-; CHECK-MINGW-F128-NEXT:    addq $88, %rsp
+; CHECK-MINGW-F128-NEXT:    addq $120, %rsp
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: call_many_leading_args:
@@ -993,17 +1019,18 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-MSVC64-F128-LABEL: call_trailing_arg:
 ; CHECK-MSVC64-F128:       # %bb.0:
-; CHECK-MSVC64-F128-NEXT:    subq $72, %rsp
+; CHECK-MSVC64-F128-NEXT:    subq $88, %rsp
 ; CHECK-MSVC64-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MSVC64-F128-NEXT:    movaps %xmm0, 48(%rsp)
 ; CHECK-MSVC64-F128-NEXT:    leaq 48(%rsp), %rax
-; CHECK-MSVC64-F128-NEXT:    movq %rax, 32(%rsp)
-; CHECK-MSVC64-F128-NEXT:    xorl %ecx, %ecx
+; CHECK-MSVC64-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MSVC64-F128-NEXT:    movq $0, 32(%rsp)
+; CHECK-MSVC64-F128-NEXT:    leaq 64(%rsp), %rcx
 ; CHECK-MSVC64-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MSVC64-F128-NEXT:    xorl %r8d, %r8d
 ; CHECK-MSVC64-F128-NEXT:    xorl %r9d, %r9d
 ; CHECK-MSVC64-F128-NEXT:    callq trailing_arg
-; CHECK-MSVC64-F128-NEXT:    addq $72, %rsp
+; CHECK-MSVC64-F128-NEXT:    addq $88, %rsp
 ; CHECK-MSVC64-F128-NEXT:    retq
 ;
 ; CHECK-MSVC64-I128-LABEL: call_trailing_arg:
@@ -1021,17 +1048,18 @@ define void @call_trailing_arg(PrimTy %x) nounwind {
 ;
 ; CHECK-MINGW-F128-LABEL: call_trailing_arg:
 ; CHECK-MINGW-F128:       # %bb.0:
-; CHECK-MINGW-F128-NEXT:    subq $72, %rsp
+; CHECK-MINGW-F128-NEXT:    subq $88, %rsp
 ; CHECK-MINGW-F128-NEXT:    movaps (%rcx), %xmm0
 ; CHECK-MINGW-F128-NEXT:    movaps %xmm0, 48(%rsp)
 ; CHECK-MINGW-F128-NEXT:    leaq 48(%rsp), %rax
-; CHECK-MINGW-F128-NEXT:    movq %rax, 32(%rsp)
-; CHECK-MINGW-F128-NEXT:    xorl %ecx, %ecx
+; CHECK-MINGW-F128-NEXT:    movq %rax, 40(%rsp)
+; CHECK-MINGW-F128-NEXT:    movq $0, 32(%rsp)
+; CHECK-MINGW-F128-NEXT:    leaq 64(%rsp), %rcx
 ; CHECK-MINGW-F128-NEXT:    xorl %edx, %edx
 ; CHECK-MINGW-F128-NEXT:    xorl %r8d, %r8d
 ; CHECK-MINGW-F128-NEXT:    xorl %r9d, %r9d
 ; CHECK-MINGW-F128-NEXT:    callq trailing_arg
-; CHECK-MINGW-F128-NEXT:    addq $72, %rsp
+; CHECK-MINGW-F128-NEXT:    addq $88, %rsp
 ; CHECK-MINGW-F128-NEXT:    retq
 ;
 ; CHECK-MINGW-I128-LABEL: call_trailing_arg:

>From 70e8db9f599fa870339d87619dba4227ac2043d0 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <[email protected]>
Date: Sat, 20 Jun 2026 12:46:08 +0200
Subject: [PATCH 2/2] add more nuance to GCC compat comment

---
 llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp 
b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
index bce581ad7a48b..ba177c0cc63a1 100644
--- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
+++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp
@@ -670,13 +670,24 @@ bool X86TargetLowering::CanLowerReturn(
     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
     const Type *RetTy) const {
-  // Mingw64 GCC returns f128 via sret, which matches the documentation of the
-  // Windows x64 calling convention:
+  // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility.
+  //
+  //
+  // Using sret is a reasonable implementation of the Windows x64 calling
+  // convention:
   //
   // 
https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values
   //
   // > Otherwise, the caller must allocate memory for the return value and pass
-  // a pointer to it as the first argument.
+  // > a pointer to it as the first argument.
+  //
+  // Although it is not the only reasonable interpretation:
+  //
+  // > Nonscalar types including floats, doubles, and vector types such as
+  // > __m128, __m128i, __m128d are returned in XMM0.
+  //
+  // For now, we prefer compatibility with GCC. If official guidelines are ever
+  // published, this can be revisited.
   //
   // Return false, which will perform sret demotion.
   if (Subtarget.isCallingConvWin64(CallConv) &&

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to