[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

Albion Fung via Phabricator via cfe-commits Tue, 26 Jan 2021 10:35:31 -0800

Conanap created this revision.
Conanap added reviewers: nemanjai, saghir, PowerPC.
Conanap added projects: LLVM, clang, PowerPC.
Herald added a subscriber: kbarton.
Conanap requested review of this revision.


Previously related differential (exploit xxsplti32dx for vectors) here: 
https://reviews.llvm.org/D90173

This patch exploits the xxsplti32dx instruction available on Power10 in place 
of constant pool loads where xxspltidp would not be able to, usually because 
the immediate cannot fit into 32 bits.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D95458

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCISelLowering.h
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/constant-pool.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
  llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
  llvm/test/CodeGen/PowerPC/pcrel.ll

Index: llvm/test/CodeGen/PowerPC/pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel.ll
+++ llvm/test/CodeGen/PowerPC/pcrel.ll
@@ -8,13 +8,14 @@
 
 ; Constant Pool Index.
 ; CHECK-S-LABEL: ConstPool
-; CHECK-S:       plfd f1, .LCPI0_0@PCREL(0), 1
+; CHECK-S:       xxsplti32dx vs1, 0, 1081002676
+; CHECK-S-NEXT:       xxsplti32dx vs1, 1, 962072674
 ; CHECK-S:       blr
 
 ; CHECK-O-LABEL: ConstPool
-; CHECK-O:       plfd 1, 0(0), 1
-; CHECK-O-NEXT:  R_PPC64_PCREL34  .rodata.cst8
-; CHECK-O:       blr
+; CHECK-O:       xxsplti32dx 1, 0, 1081002676
+; CHECK-O-NEXT:  xxsplti32dx 1, 1, 962072674
+; CHECK-O-NEXT:  blr
 define dso_local double @ConstPool() local_unnamed_addr {
   entry:
     ret double 0x406ECAB439581062
Index: llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll
@@ -35,6 +35,9 @@
 @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8
 
 define dso_local void @ReadWrite8() local_unnamed_addr #0 {
+; In this test the stb r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lbz r3, 0(r3)
+; which is defined between the pld and the stb.
 ; CHECK-LABEL: ReadWrite8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, input8@got@pcrel(0), 1
@@ -42,9 +45,6 @@
 ; CHECK-NEXT:    pld r4, output8@got@pcrel(0), 1
 ; CHECK-NEXT:    .reloc .Lpcrel0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel0-8)
 ; CHECK-NEXT:    lbz r3, 0(r3)
-; In this test the stb r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lbz r3, 0(r3)
-; which is defined between the pld and the stb.
 ; CHECK-NEXT:    stb r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
@@ -54,6 +54,9 @@
 }
 
 define dso_local void @ReadWrite16() local_unnamed_addr #0 {
+; In this test the sth r3, 0(r4) cannot be optimized because it
+; uses the register r3 and that register is defined by lhz r3, 0(r3)
+; which is defined between the pld and the sth.
 ; CHECK-LABEL: ReadWrite16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, input16@got@pcrel(0), 1
@@ -61,9 +64,6 @@
 ; CHECK-NEXT:    pld r4, output16@got@pcrel(0), 1
 ; CHECK-NEXT:    .reloc .Lpcrel1-8,R_PPC64_PCREL_OPT,.-(.Lpcrel1-8)
 ; CHECK-NEXT:    lhz r3, 0(r3)
-; In this test the sth r3, 0(r4) cannot be optimized because it
-; uses the register r3 and that register is defined by lhz r3, 0(r3)
-; which is defined between the pld and the sth.
 ; CHECK-NEXT:    sth r3, 0(r4)
 ; CHECK-NEXT:    blr
 entry:
@@ -144,7 +144,8 @@
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, inputf64@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel5:
-; CHECK-NEXT:    plfd f1, .LCPI6_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1075524403
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 858993459
 ; CHECK-NEXT:    .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8)
 ; CHECK-NEXT:    lfd f0, 0(r3)
 ; CHECK-NEXT:    pld r3, outputf64@got@pcrel(0), 1
@@ -286,8 +287,7 @@
 
 define dso_local void @FuncPtrCall() local_unnamed_addr #0 {
 ; CHECK-LABEL: FuncPtrCall:
-; CHECK:         .localentry FuncPtrCall, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pld r3, FuncPtrIn@got@pcrel(0), 1
 ; CHECK-NEXT:  .Lpcrel10:
 ; CHECK-NEXT:    .reloc .Lpcrel10-8,R_PPC64_PCREL_OPT,.-(.Lpcrel10-8)
@@ -317,8 +317,7 @@
 
 define dso_local signext i32 @VecMultiUse() local_unnamed_addr #0 {
 ; CHECK-LABEL: VecMultiUse:
-; CHECK:         .localentry VecMultiUse, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
@@ -355,8 +354,7 @@
 
 define dso_local signext i32 @UseAddr(i32 signext %a) local_unnamed_addr #0 {
 ; CHECK-LABEL: UseAddr:
-; CHECK:         .localentry UseAddr, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std r0, 16(r1)
Index: llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -173,7 +173,9 @@
 ; CHECK-LARGE:     add r2, r2, r12
 ; CHECK-S-NOT:       .localentry
 ; CHECK-ALL:       # %bb.0: # %entry
-; CHECK-S-NEXT:    plfd f1, .LCPI7_0@PCREL(0), 1
+; CHECK-S-NEXT:    xxsplti32dx vs1, 0, 1078011044
+; CHECK-S-NEXT:    xxsplti32dx vs1, 1, -337824948
+; CHECK-S-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-S-NEXT:    blr
 entry:
   ret double 0x404124A4EBDD334C
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -26,7 +26,6 @@
 ; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI0_0@toc@l
 ; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
 ; CHECK-NOPCREL-NEXT:    blr
-
 entry:
   ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
 }
@@ -43,7 +42,6 @@
 ; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
 ; CHECK-NOPCREL-NEXT:    blr
-
 entry:
   ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
 }
@@ -60,7 +58,6 @@
 ; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI2_0@toc@l
 ; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
 ; CHECK-NOPCREL-NEXT:    blr
-
 entry:
   ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
 }
@@ -68,15 +65,10 @@
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr {
 ; CHECK-LABEL: testDoubleNonRepresentableScalar:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1081435463
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -1374389535
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
 entry:
   ret double 3.423300e+02
 }
@@ -92,7 +84,6 @@
 ; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
 ; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
 ; CHECK-NOPCREL-NEXT:    blr
-
 entry:
   ret float 0x380B38FB80000000
 }
@@ -100,15 +91,10 @@
 define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
 ; CHECK-LABEL: testFloatDenormToDoubleScalar:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 940259579
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -2147483648
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
 entry:
   ret double 0x380B38FB80000000
 }
Index: llvm/test/CodeGen/PowerPC/constant-pool.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/constant-pool.ll
+++ llvm/test/CodeGen/PowerPC/constant-pool.ll
@@ -39,8 +39,10 @@
  define ppc_fp128 @LongDoubleConstantPool() {
 ; CHECK-LABEL: LongDoubleConstantPool:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI2_0@PCREL(0), 1
-; CHECK-NEXT:    plfd f2, .LCPI2_1@PCREL(0), 1
+; CHECK-NEXT:    plfd f2, .LCPI2_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 56623104
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -609716532
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: LongDoubleConstantPool:
@@ -185,9 +187,11 @@
 define double @two_constants(double %a) {
 ; CHECK-LABEL: two_constants:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f0, .LCPI11_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1074446467
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    plfd f1, .LCPI11_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073922179
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 309237645
 ; CHECK-NEXT:    xsadddp f1, f0, f1
 ; CHECK-NEXT:    blr
 ;
@@ -212,11 +216,15 @@
 ; CHECK-NEXT:    cmplwi r3, 0
 ; CHECK-NEXT:    beq cr0, .LBB12_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    plfd f1, .LCPI12_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1074935889
+; CHECK-NEXT:    xxsplti32dx vs1, 1, -343597384
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ; CHECK-NEXT:  .LBB12_2: # %if.end
-; CHECK-NEXT:    plfd f0, .LCPI12_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1076085391
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 1546188227
 ; CHECK-NEXT:    xsadddp f1, f1, f0
+; CHECK-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: two_constants_two_bb:
@@ -248,11 +256,14 @@
 define double @three_constants_f64(double %a, double %c) {
 ; CHECK-LABEL: three_constants_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f0, .LCPI13_0@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs0, 0, 1074446467
+; CHECK-NEXT:    xxsplti32dx vs0, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f1, f0
-; CHECK-NEXT:    plfd f1, .LCPI13_1@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073922179
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 309237645
 ; CHECK-NEXT:    xsadddp f0, f0, f1
-; CHECK-NEXT:    plfd f1, .LCPI13_2@PCREL(0), 1
+; CHECK-NEXT:    xxsplti32dx vs1, 0, 1073948393
+; CHECK-NEXT:    xxsplti32dx vs1, 1, 2027224564
 ; CHECK-NEXT:    xsadddp f1, f0, f1
 ; CHECK-NEXT:    blr
 ;
@@ -340,23 +351,29 @@
 
 define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) {
 ; CHECK-LABEL: three_constants_ppcf128:
-; CHECK:         .localentry three_constants_ppcf128, 1
-; CHECK-NEXT:  # %bb.0: # %entry
+; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mflr r0
 ; CHECK-NEXT:    std r0, 16(r1)
-; CHECK-NEXT:    stdu r1, -32(r1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stdu r1, -48(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset lr, 16
-; CHECK-NEXT:    plfd f3, .LCPI16_0@PCREL(0), 1
 ; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxsplti32dx vs3, 0, 1074935889
+; CHECK-NEXT:    stxv vs3, 32(r1) # 16-byte Folded Spill
+; CHECK-NEXT:    xxsplti32dx vs3, 1, -343597384
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    plfd f3, .LCPI16_1@PCREL(0), 1
+; CHECK-NEXT:    lxv vs3, 32(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxsplti32dx vs3, 1, -1719329096
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    plfd f3, .LCPI16_2@PCREL(0), 1
+; CHECK-NEXT:    lxv vs3, 32(r1) # 16-byte Folded Reload
 ; CHECK-NEXT:    xxlxor f4, f4, f4
+; CHECK-NEXT:    xxsplti32dx vs3, 1, 8724152
+; CHECK-NEXT:    # kill: def $f3 killed $f3 killed $vsl3
 ; CHECK-NEXT:    bl __gcc_qadd@notoc
-; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    addi r1, r1, 48
 ; CHECK-NEXT:    ld r0, 16(r1)
 ; CHECK-NEXT:    mtlr r0
 ; CHECK-NEXT:    blr
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2603,6 +2603,21 @@
            (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)),
                              VSFRC)>;
 
+// To replace constant pool with XXSPLI32DX for scalars.
+def : Pat<(f32 nzFPImmAsi64
+           : $A),
+          (COPY_TO_REGCLASS(XXSPLTI32DX(XXSPLTI32DX(IMPLICIT_DEF), 0,
+                                        (getFPAs64BitIntHi $A)),
+                                        1, (getFPAs64BitIntLo $A)),
+                            VSRC)>;
+
+def : Pat<(f64 nzFPImmAsi64
+           : $A),
+          (COPY_TO_REGCLASS(XXSPLTI32DX(XXSPLTI32DX(IMPLICIT_DEF), 0,
+                                        (getFPAs64BitIntHi $A)),
+                                        1, (getFPAs64BitIntLo $A)),
+                            VSRC)>;
+
   // Anonymous patterns for XXEVAL
   // AND
   // and(A, B, C)
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -398,6 +398,32 @@
                                    SDLoc(N), MVT::i32);
 }]>;
 
+// Check if the value can be converted to be single precision immediate, which
+// can be exploited by XXSPLTIDP. Ensure that it cannot be converted to single
+// precision before exploiting with XXSPLTI32DX.
+def nzFPImmAsi64 : PatLeaf<(fpimm), [{
+  APFloat APFloatOfN = N->getValueAPF();
+  return checkNonDenormCannotConvertToSingle(APFloatOfN) && !N->isExactlyValue(+0.0);
+}]>;
+
+// Get the Hi bits of a 64 bit immediate.
+def getFPAs64BitIntHi : SDNodeXForm<fpimm, [{
+  APFloat APFloatOfN = N->getValueAPF();
+  checkNonDenormCannotConvertToSingle(APFloatOfN);
+  uint32_t Hi = (uint32_t)((APFloatOfN.bitcastToAPInt().getZExtValue() &
+                            0xFFFFFFFF00000000LL) >> 32);
+  return CurDAG->getTargetConstant(Hi, SDLoc(N), MVT::i32);
+}]>;
+
+// Get the Lo bits of a 64 bit immediate.
+def getFPAs64BitIntLo : SDNodeXForm<fpimm, [{
+  APFloat APFloatOfN = N->getValueAPF();
+  checkNonDenormCannotConvertToSingle(APFloatOfN);
+  uint32_t Lo = (uint32_t)(APFloatOfN.bitcastToAPInt().getZExtValue() &
+                           0xFFFFFFFF);
+  return CurDAG->getTargetConstant(Lo, SDLoc(N), MVT::i32);
+}]>;
+
 def SHL32 : SDNodeXForm<imm, [{
   // Transformation function: 31 - imm
   return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1318,6 +1318,8 @@
 
   bool convertToNonDenormSingle(APInt &ArgAPInt);
   bool convertToNonDenormSingle(APFloat &ArgAPFloat);
+  bool checkNonDenormCannotConvertToSingle(APInt &ArgAPInt);
+  bool checkNonDenormCannotConvertToSingle(APFloat &ArgAPFloat);
 
 } // end namespace llvm
 
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8583,6 +8583,29 @@
   return Success;
 }
 
+// Check that the APFloat cannot be exploited with XXSPLTIDP
+// which can only exploit values that can fit in 32 bits. This
+// allows us to exploit with XXSPLTI32DX instead.
+bool llvm::checkNonDenormCannotConvertToSingle(APFloat &ArgAPFloat) {
+  // Only convert if it loses info, since XXSPLTIDP should
+  // handle the other case
+  if (convertToNonDenormSingle(ArgAPFloat))
+    return false;
+
+  if (!ArgAPFloat.isDenormal())
+    return true;
+  return false;
+}
+
+// Check that the APInt cannot be exploited with XXSPLTIDP
+// which can only exploit values that can fit in 32 bits. This
+// allows us to exploit with XXSPLTI32DX instead.
+bool llvm::checkNonDenormCannotConvertToSingle(APInt &ArgAPInt) {
+  double DpValue = ArgAPInt.bitsToDouble();
+  APFloat APFloatDp(DpValue);
+  return checkNonDenormCannotConvertToSingle(APFloatDp);
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -15804,7 +15827,8 @@
       // With prefixed instructions, we can materialize anything that can be
       // represented with a 32-bit immediate, not just positive zero.
       APFloat APFloatOfImm = Imm;
-      return convertToNonDenormSingle(APFloatOfImm);
+      return convertToNonDenormSingle(APFloatOfImm) ||
+             checkNonDenormCannotConvertToSingle(APFloatOfImm);
     }
     LLVM_FALLTHROUGH;
   case MVT::ppcf128:

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D95458: [PowerPC] Exploit xxsplti32dx (constant materialization) for scalars

Reply via email to