[llvm-branch-commits] [llvm] 719b563 - [PowerPC][Power10] Exploit splat instruction xxsplti32dx in Power10

Albion Fung via llvm-branch-commits Wed, 20 Jan 2021 10:00:36 -0800

Author: Albion Fung
Date: 2021-01-20T12:55:52-05:00
New Revision: 719b563ecf6851136e4c1e6a5ff6c407522dd024


URL: 
https://github.com/llvm/llvm-project/commit/719b563ecf6851136e4c1e6a5ff6c407522dd024
DIFF: 
https://github.com/llvm/llvm-project/commit/719b563ecf6851136e4c1e6a5ff6c407522dd024.diff

LOG: [PowerPC][Power10] Exploit splat instruction xxsplti32dx in Power10

Exploits the instruction xxsplti32dx.

It can be used to materialize any 64 bit scalar/vector splat by using two 
instances, one for the upper 32 bits and the other for the lower 32 bits. It 
should not materialize the cases which can be materialized by using the 
instruction xxspltidp.

Differential Revision: https://https://reviews.llvm.org/D90173

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/lib/Target/PowerPC/PPCInstrPrefix.td
    llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
    llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp 
b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index d6dd70fb1a0c..b37ac7d72dc2 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8604,14 +8604,41 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
-  // double. This is to exploit the XXSPLTIDP instruction.
-  if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
-      (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
-      convertToNonDenormSingle(APSplatBits)) {
-    SDValue SplatNode = DAG.getNode(
-        PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
-        DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
-    return DAG.getBitcast(Op.getValueType(), SplatNode);
+  // double. This is to exploit the XXSPLTIDP instruction.+  // If we lose 
precision, we use XXSPLTI32DX.
+  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
+      Subtarget.hasPrefixInstrs()) {
+    if (convertToNonDenormSingle(APSplatBits) &&
+        (Op->getValueType(0) == MVT::v2f64)) {
+      SDValue SplatNode = DAG.getNode(
+          PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+          DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    } else { // We may lose precision, so we have to use XXSPLTI32DX.
+
+      uint32_t Hi =
+          (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 
32);
+      uint32_t Lo =
+          (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
+      SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
+
+      if (!Hi || !Lo)
+        // If either load is 0, then we should generate XXLXOR to set to 0.
+        SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+      if (Hi)
+        SplatNode = DAG.getNode(
+            PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+            DAG.getTargetConstant(0, dl, MVT::i32),
+            DAG.getTargetConstant(Hi, dl, MVT::i32));
+
+      if (Lo)
+        SplatNode =
+            DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
+                        DAG.getTargetConstant(1, dl, MVT::i32),
+                        DAG.getTargetConstant(Lo, dl, MVT::i32));
+
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    }
   }
 
   if (!BVNIsConstantSplat || SplatBitSize > 32) {

diff  --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td 
b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index b6e9562dd0f6..b9eb3b3b7d37 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2533,6 +2533,9 @@ let Predicates = [IsISA3_1] in {
 
   def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
             (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+  def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+             (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
 }
 
 let Predicates = [IsISA3_1, HasVSX] in {

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll 
b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
index 0cb8fafe7a3c..0836c4cb7bbe 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -1,114 +1,216 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
+; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s 
--check-prefixes=CHECK-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
-; RUN:     --check-prefix=CHECK-NOPCREL
+; RUN:     --check-prefixes=CHECK-NOPCREL-BE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s 
| \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPCREL-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPREFIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s
+; RUN:     FileCheck %s --check-prefixes=CHECK-BE
 
 define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
-; CHECK-LABEL: testDoubleToDoubleFail:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI0_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI0_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleToDoubleFail:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleToDoubleFail:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 1081435463
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
 }
 
 define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormToDouble:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI1_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI1_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormToDouble:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDouble:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDouble:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI1_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormToDouble:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -2147483648
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
 }
 
 define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
-; CHECK-LABEL: testDoubleToDoubleNaNFail:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI2_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI2_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI2_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, -16
+; CHECK-BE-NEXT:    blr
 entry:
   ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
 }
 
 define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr 
{
-; CHECK-LABEL: testDoubleNonRepresentableScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret double 3.423300e+02
 }
 
 define dso_local float @testFloatDenormScalar() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret float 0x380B38FB80000000
 }
 
 define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr {
-; CHECK-LABEL: testFloatDenormToDoubleScalar:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
-; CHECK-NEXT:    blr
-;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
-
+; CHECK-LE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPCREL-BE:       # %bb.0: # %entry
+; CHECK-NOPCREL-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NOPCREL-BE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-BE-NEXT:    blr
+;
+; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPCREL-LE:       # %bb.0: # %entry
+; CHECK-NOPCREL-LE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NOPCREL-LE-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPCREL-LE-NEXT:    blr
+;
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NOPREFIX-NEXT:    blr
+;
+; CHECK-BE-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
+; CHECK-BE-NEXT:    blr
 entry:
   ret double 0x380B38FB80000000
 }

diff  --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll 
b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
index d610bd260fc9..420a96dc1495 100644
--- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -1,22 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck --check-prefix=CHECK-LE %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck --check-prefix=CHECK-BE %s
+; RUN:     FileCheck %s
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_1(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_1:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 566
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_1:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 566
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 566
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 566, i32 
undef, i32 566>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -24,15 +16,10 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_2(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_2:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, 33
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_2:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 33
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 33
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 33, i32 undef, i32 33, i32 undef>, 
<4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -40,15 +27,10 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_3(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_3:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 12
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_3:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 12
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 12
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 12, i32 
undef, i32 12>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -56,15 +38,10 @@ entry:
 
 ; Function Attrs: norecurse nounwind readnone
 define  <4 x i32> @test_xxsplti32dx_4(<4 x i32> %a) {
-; CHECK-LE-LABEL: test_xxsplti32dx_4:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -683
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_4:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -683
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -683
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 -683, i32 undef, i32 -683, i32 
undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
@@ -72,15 +49,10 @@ entry:
 
 ; Function Attrs: nounwind
 define  <4 x float> @test_xxsplti32dx_5(<4 x float> %vfa) {
-; CHECK-LE-LABEL: test_xxsplti32dx_5:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 0, 1065353216
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_5:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 1, 1065353216
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1065353216
+; CHECK-NEXT:    blr
 entry:
   %vecins3.i = shufflevector <4 x float> %vfa, <4 x float> <float undef, float 
1.000000e+00, float undef, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, 
i32 7>
   ret <4 x float> %vecins3.i
@@ -88,15 +60,10 @@ entry:
 
 ; Function Attrs: nounwind
 define  <4 x float> @test_xxsplti32dx_6(<4 x float> %vfa) {
-; CHECK-LE-LABEL: test_xxsplti32dx_6:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, 1073741824
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_6:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, 1073741824
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_6:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 1073741824
+; CHECK-NEXT:    blr
 entry:
   %vecins3.i = shufflevector <4 x float> <float 2.000000e+00, float undef, 
float 2.000000e+00, float undef>, <4 x float> %vfa, <4 x i32> <i32 0, i32 5, 
i32 2, i32 7>
   ret <4 x float> %vecins3.i
@@ -105,16 +72,31 @@ entry:
 ; Function Attrs: norecurse nounwind readnone
 ; Test to illustrate when the splat is narrower than 32-bits.
 define dso_local <4 x i32> @test_xxsplti32dx_7(<4 x i32> %a) 
local_unnamed_addr #0 {
-; CHECK-LE-LABEL: test_xxsplti32dx_7:
-; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1414812757
-; CHECK-LE-NEXT:    blr
-;
-; CHECK-BE-LABEL: test_xxsplti32dx_7:
-; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1414812757
-; CHECK-BE-NEXT:    blr
+; CHECK-LABEL: test_xxsplti32dx_7:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -1414812757
+; CHECK-NEXT:    blr
 entry:
   %vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 
-1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
 }
+
+define dso_local <2 x double> @test_xxsplti32dx_8() {
+; CHECK-LABEL: test_xxsplti32dx_8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 1082660167
+; CHECK-NEXT:    xxsplti32dx vs34, 1, -1374389535
+; CHECK-NEXT:    blr
+entry:
+  ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1>
+}
+
+define dso_local <8 x i16> @test_xxsplti32dx_9() {
+; CHECK-LABEL: test_xxsplti32dx_9:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxsplti32dx vs34, 0, 23855277
+; CHECK-NEXT:    xxsplti32dx vs34, 1, 65827
+; CHECK-NEXT:    blr
+entry:
+  ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 
173, i16 undef>
+}


        
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

[llvm-branch-commits] [llvm] 719b563 - [PowerPC][Power10] Exploit splat instruction xxsplti32dx in Power10

Reply via email to