https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/165817

This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.

>From b9d0a05c767424df2698618f131bba9a6357f95f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <[email protected]>
Date: Mon, 27 Oct 2025 14:34:43 -0700
Subject: [PATCH] AArch64: Stop changing legality rules based on sincos_stret
 availability

This should be treated like a program property and not a static property
of the subtarget. The regression is the 3 element vector case; a combine
happens to replace the original undef value with non-undef, so the 4th
component is never eliminated. Trying to avoid that particular case
hits other combine regressions, so leave that for later.
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 25 +++--
 llvm/test/CodeGen/AArch64/llvm.sincos.ll      | 97 ++++++++++++-------
 2 files changed, 73 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60aa61e993b26..6324d9d18e31b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1052,15 +1052,9 @@ AArch64TargetLowering::AArch64TargetLowering(const 
TargetMachine &TM,
   // Lower READCYCLECOUNTER using an mrs from CNTVCT_EL0.
   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
 
-  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
-      getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
     // Issue __sincos_stret if available.
-    setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
-    setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
-  } else {
-    setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
-    setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
-  }
+  setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+  setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
 
   // Make floating-point constants legal for the large code model, so they 
don't
   // become loads from the constant pool.
@@ -5353,20 +5347,23 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
   SDLoc DL(Op);
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
+  RTLIB::Libcall LC = RTLIB::getSINCOS_STRET(ArgVT);
+  RTLIB::LibcallImpl SincosStret = getLibcallImpl(LC);
+  if (SincosStret == RTLIB::Unsupported)
+    return SDValue();
+
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 
   ArgListTy Args;
   Args.emplace_back(Arg, ArgTy);
 
-  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
-                                        : RTLIB::SINCOS_STRET_F32;
-  const char *LibcallName = getLibcallName(LC);
-  SDValue Callee =
-      DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
+  StringRef LibcallImplName = getLibcallImplName(SincosStret);
+  SDValue Callee = DAG.getExternalSymbol(LibcallImplName.data(),
+                                         getPointerTy(DAG.getDataLayout()));
 
   StructType *RetTy = StructType::get(ArgTy, ArgTy);
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CallingConv::ID CC = getLibcallCallingConv(LC);
+  CallingConv::ID CC = getLibcallImplCallingConv(SincosStret);
   CLI.setDebugLoc(DL)
       .setChain(DAG.getEntryNode())
       .setLibCallee(CC, RetTy, Callee, std::move(Args));
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll 
b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
index 21da8645b9b16..fa3ada6212894 100644
--- a/llvm/test/CodeGen/AArch64/llvm.sincos.ll
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -374,84 +374,111 @@ define { float, float } @test_sincos_f32(float %a) 
nounwind {
 define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) 
nounwind {
 ; CHECK-LABEL: test_sincos_v3f32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #80
-; CHECK-NEXT:    add x0, sp, #20
-; CHECK-NEXT:    add x1, sp, #16
-; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #96] // 16-byte Folded Spill
 ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
 ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add x0, sp, #28
-; CHECK-NEXT:    add x1, sp, #24
-; CHECK-NEXT:    add x19, sp, #28
-; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    add x19, sp, #44
+; CHECK-NEXT:    add x20, sp, #40
 ; CHECK-NEXT:    mov s0, v0.s[1]
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT:    add x0, sp, #44
-; CHECK-NEXT:    add x1, sp, #40
-; CHECK-NEXT:    add x21, sp, #44
-; CHECK-NEXT:    add x22, sp, #40
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    add x21, sp, #36
+; CHECK-NEXT:    add x22, sp, #32
 ; CHECK-NEXT:    mov s0, v0.s[2]
 ; CHECK-NEXT:    bl sincosf
-; CHECK-NEXT:    ldp s1, s0, [sp, #16]
-; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x23, sp, #28
+; CHECK-NEXT:    add x24, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[3]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #56]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
 ; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
-; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ldp x20, x19, [sp, #96] // 16-byte Folded Reload
 ; CHECK-NEXT:    ld1 { v0.s }[2], [x21]
 ; CHECK-NEXT:    ld1 { v1.s }[2], [x22]
-; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[3], [x23]
+; CHECK-NEXT:    ld1 { v1.s }[3], [x24]
+; CHECK-NEXT:    ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #112
 ; CHECK-NEXT:    ret
 ;
 ; NO-LIBCALL-LABEL: test_sincos_v3f32:
 ; NO-LIBCALL:       // %bb.0:
 ; NO-LIBCALL-NEXT:    sub sp, sp, #80
-; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #56] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
-; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str d10, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #72] // 8-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s8
 ; NO-LIBCALL-NEXT:    bl sinf
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
-; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl sinf
-; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    mov s9, v0.s[2]
 ; NO-LIBCALL-NEXT:    fmov s0, s9
 ; NO-LIBCALL-NEXT:    bl sinf
-; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov s10, v0.s[3]
+; NO-LIBCALL-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v1.s[3], v0.s[0]
 ; NO-LIBCALL-NEXT:    fmov s0, s8
-; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str q1, [sp, #32] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
-; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
 ; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
 ; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
-; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
 ; NO-LIBCALL-NEXT:    fmov s0, s9
 ; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
 ; NO-LIBCALL-NEXT:    fmov s2, s0
-; NO-LIBCALL-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
-; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
-; NO-LIBCALL-NEXT:    mov v1.s[2], v2.s[0]
+; NO-LIBCALL-NEXT:    ldp q1, q0, [sp, #16] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #56] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #72] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr d10, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.s[3], v2.s[0]
 ; NO-LIBCALL-NEXT:    add sp, sp, #80
 ; NO-LIBCALL-NEXT:    ret
   %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> 
%a)

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to