Author: Albion Fung Date: 2021-01-20T12:55:52-05:00 New Revision: 719b563ecf6851136e4c1e6a5ff6c407522dd024
URL: https://github.com/llvm/llvm-project/commit/719b563ecf6851136e4c1e6a5ff6c407522dd024 DIFF: https://github.com/llvm/llvm-project/commit/719b563ecf6851136e4c1e6a5ff6c407522dd024.diff LOG: [PowerPC][Power10] Exploit splat instruction xxsplti32dx in Power10 Exploits the instruction xxsplti32dx. It can be used to materialize any 64 bit scalar/vector splat by using two instances, one for the upper 32 bits and the other for the lower 32 bits. It should not materialize the cases which can be materialized by using the instruction xxspltidp. Differential Revision: https://https://reviews.llvm.org/D90173 Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCInstrPrefix.td llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll llvm/test/CodeGen/PowerPC/p10-splatImm32.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6dd70fb1a0c..b37ac7d72dc2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8604,14 +8604,41 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, // If it is a splat of a double, check if we can shrink it to a 32 bit // non-denormal float which when converted back to double gives us the same - // double. This is to exploit the XXSPLTIDP instruction. - if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() && - (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) && - convertToNonDenormSingle(APSplatBits)) { - SDValue SplatNode = DAG.getNode( - PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, - DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); - return DAG.getBitcast(Op.getValueType(), SplatNode); + // double. This is to exploit the XXSPLTIDP instruction.+ // If we lose precision, we use XXSPLTI32DX. + if (BVNIsConstantSplat && (SplatBitSize == 64) && + Subtarget.hasPrefixInstrs()) { + if (convertToNonDenormSingle(APSplatBits) && + (Op->getValueType(0) == MVT::v2f64)) { + SDValue SplatNode = DAG.getNode( + PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64, + DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32)); + return DAG.getBitcast(Op.getValueType(), SplatNode); + } else { // We may lose precision, so we have to use XXSPLTI32DX. + + uint32_t Hi = + (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32); + uint32_t Lo = + (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF); + SDValue SplatNode = DAG.getUNDEF(MVT::v2i64); + + if (!Hi || !Lo) + // If either load is 0, then we should generate XXLXOR to set to 0. + SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64); + + if (Hi) + SplatNode = DAG.getNode( + PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, + DAG.getTargetConstant(0, dl, MVT::i32), + DAG.getTargetConstant(Hi, dl, MVT::i32)); + + if (Lo) + SplatNode = + DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode, + DAG.getTargetConstant(1, dl, MVT::i32), + DAG.getTargetConstant(Lo, dl, MVT::i32)); + + return DAG.getBitcast(Op.getValueType(), SplatNode); + } } if (!BVNIsConstantSplat || SplatBitSize > 32) { diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td index b6e9562dd0f6..b9eb3b3b7d37 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -2533,6 +2533,9 @@ let Predicates = [IsISA3_1] in { def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)), (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>; + + def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)), + (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>; } let Predicates = [IsISA3_1, HasVSX] in { diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll index 0cb8fafe7a3c..0836c4cb7bbe 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll @@ -1,114 +1,216 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ -; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-NOPCREL +; RUN: --check-prefixes=CHECK-NOPCREL-BE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPCREL-LE ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL +; RUN: FileCheck %s --check-prefixes=CHECK-NOPREFIX ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \ -; RUN: FileCheck %s +; RUN: FileCheck %s --check-prefixes=CHECK-BE define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr { -; CHECK-LABEL: testDoubleToDoubleFail: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleToDoubleFail: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleFail: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleToDoubleFail: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1081435463 +; CHECK-BE-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-BE-NEXT: blr entry: ret <2 x double> <double 3.423300e+02, double 3.423300e+02> } define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormToDouble: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormToDouble: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormToDouble: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDouble: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormToDouble: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 940259579 +; CHECK-BE-NEXT: xxsplti32dx vs34, 1, -2147483648 +; CHECK-BE-NEXT: blr entry: ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000> } define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr { -; CHECK-LABEL: testDoubleToDoubleNaNFail: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha -; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l -; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3 -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3 +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleToDoubleNaNFail: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1 +; CHECK-BE-NEXT: xxsplti32dx vs34, 1, -16 +; CHECK-BE-NEXT: blr entry: ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0> } define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr { -; CHECK-LABEL: testDoubleNonRepresentableScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testDoubleNonRepresentableScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret double 3.423300e+02 } define dso_local float @testFloatDenormScalar() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret float 0x380B38FB80000000 } define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr { -; CHECK-LABEL: testFloatDenormToDoubleScalar: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 -; CHECK-NEXT: blr -; -; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar: -; CHECK-NOPCREL: # %bb.0: # %entry -; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3) -; CHECK-NOPCREL-NEXT: blr - +; CHECK-LE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-LE-NEXT: blr +; +; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL-BE: # %bb.0: # %entry +; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: blr +; +; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPCREL-LE: # %bb.0: # %entry +; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: blr +; +; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar: +; CHECK-NOPREFIX: # %bb.0: # %entry +; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-NOPREFIX-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPREFIX-NEXT: blr +; +; CHECK-BE-LABEL: testFloatDenormToDoubleScalar: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-BE-NEXT: blr entry: ret double 0x380B38FB80000000 } diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll index d610bd260fc9..420a96dc1495 100644 --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32.ll @@ -1,22 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck --check-prefix=CHECK-LE %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \ -; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ -; RUN: FileCheck --check-prefix=CHECK-BE %s +; RUN: FileCheck %s ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_1(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_1: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 566 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_1: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 566 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 566 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 566, i32 undef, i32 566>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -24,15 +16,10 @@ entry: ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_2(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_2: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 33 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_2: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 33 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, 33 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> <i32 33, i32 undef, i32 33, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -40,15 +27,10 @@ entry: ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_3(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_3: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 12 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 12 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 12 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> %a, <4 x i32> <i32 undef, i32 12, i32 undef, i32 12>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -56,15 +38,10 @@ entry: ; Function Attrs: norecurse nounwind readnone define <4 x i32> @test_xxsplti32dx_4(<4 x i32> %a) { -; CHECK-LE-LABEL: test_xxsplti32dx_4: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -683 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -683 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, -683 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> <i32 -683, i32 undef, i32 -683, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 @@ -72,15 +49,10 @@ entry: ; Function Attrs: nounwind define <4 x float> @test_xxsplti32dx_5(<4 x float> %vfa) { -; CHECK-LE-LABEL: test_xxsplti32dx_5: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 0, 1065353216 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_5: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 1, 1065353216 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 1065353216 +; CHECK-NEXT: blr entry: %vecins3.i = shufflevector <4 x float> %vfa, <4 x float> <float undef, float 1.000000e+00, float undef, float 1.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x float> %vecins3.i @@ -88,15 +60,10 @@ entry: ; Function Attrs: nounwind define <4 x float> @test_xxsplti32dx_6(<4 x float> %vfa) { -; CHECK-LE-LABEL: test_xxsplti32dx_6: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1073741824 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_6: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1073741824 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, 1073741824 +; CHECK-NEXT: blr entry: %vecins3.i = shufflevector <4 x float> <float 2.000000e+00, float undef, float 2.000000e+00, float undef>, <4 x float> %vfa, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x float> %vecins3.i @@ -105,16 +72,31 @@ entry: ; Function Attrs: norecurse nounwind readnone ; Test to illustrate when the splat is narrower than 32-bits. define dso_local <4 x i32> @test_xxsplti32dx_7(<4 x i32> %a) local_unnamed_addr #0 { -; CHECK-LE-LABEL: test_xxsplti32dx_7: -; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1414812757 -; CHECK-LE-NEXT: blr -; -; CHECK-BE-LABEL: test_xxsplti32dx_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1414812757 -; CHECK-BE-NEXT: blr +; CHECK-LABEL: test_xxsplti32dx_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 1, -1414812757 +; CHECK-NEXT: blr entry: %vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7> ret <4 x i32> %vecins1 } + +define dso_local <2 x double> @test_xxsplti32dx_8() { +; CHECK-LABEL: test_xxsplti32dx_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 1082660167 +; CHECK-NEXT: xxsplti32dx vs34, 1, -1374389535 +; CHECK-NEXT: blr +entry: + ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1> +} + +define dso_local <8 x i16> @test_xxsplti32dx_9() { +; CHECK-LABEL: test_xxsplti32dx_9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxsplti32dx vs34, 0, 23855277 +; CHECK-NEXT: xxsplti32dx vs34, 1, 65827 +; CHECK-NEXT: blr +entry: + ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef> +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits