Author: Stefan Pintilie Date: 2021-11-26T14:27:46-06:00 New Revision: 050c335ab42f96acfdfa6572cad1310e685cd199
URL: https://github.com/llvm/llvm-project/commit/050c335ab42f96acfdfa6572cad1310e685cd199 DIFF: https://github.com/llvm/llvm-project/commit/050c335ab42f96acfdfa6572cad1310e685cd199.diff LOG: [PowerPC] Replace MFVSRLD with MFVSRD when the vector is symmetrical The MFVSRD is faster than the MFVSRLD instruction and if the input vector is symmetrical then both instructions produce the same result and we should prefer the faster one. WIP. Added: Modified: llvm/lib/Target/PowerPC/PPCMIPeephole.cpp llvm/test/CodeGen/PowerPC/vector-reduce-add.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp index d12a9b806fd03..ac1204f230656 100644 --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -1064,6 +1064,49 @@ bool PPCMIPeephole::simplifyCode(void) { Simplified = true; break; } + case PPC::MFVSRLD: + // It is more efficient to use MFVSRD instead of MFVSRLD in cases where + // it is known that the two doublewords of the vector are identical. + MachineOperand &VecInput = MI.getOperand(1); + MachineInstr *DefVecReg = getVRegDefOrNull(&VecInput, MRI); + if (DefVecReg && DefVecReg->getOpcode() == PPC::VADDUDM) { + MachineOperand &AddInput1 = DefVecReg->getOperand(1); + MachineOperand &AddInput2 = DefVecReg->getOperand(2); + MachineInstr *DefAdd1 = getVRegDefOrNull(&AddInput1, MRI); + MachineInstr *DefAdd2 = getVRegDefOrNull(&AddInput2, MRI); + + if (!DefAdd1 || !DefAdd2) + break; + + // Check if one of the inputs to the vector add is a permute. + MachineInstr *PermInstr = nullptr; + Register NonPermReg; + if (DefAdd1->getOpcode() == PPC::XXPERMDI) { + PermInstr = DefAdd1; + NonPermReg = AddInput2.getReg(); + } else if (DefAdd2->getOpcode() == PPC::XXPERMDI) { + PermInstr = DefAdd2; + NonPermReg = AddInput1.getReg(); + } + + // Abort if there is no permute instruction. + if (!PermInstr) + break; + + MachineOperand &PermInput1 = PermInstr->getOperand(1); + MachineOperand &PermInput2 = PermInstr->getOperand(2); + unsigned PermType = PermInstr->getOperand(3).getImm(); + if (PermInput1.getReg() == PermInput2.getReg() && PermType == 2 && + PermInput1.getReg() == NonPermReg) { + Register VecInputReg = VecInput.getReg(); + BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::MFVSRD), + MI.getOperand(0).getReg()) + .addReg(VecInputReg, 0, PPC::sub_64); + ToErase = &MI; + Simplified = true; + } + } + break; } } diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll index 2ba113f59da3c..b2ecce6777e05 100644 --- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll @@ -1245,7 +1245,7 @@ define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 { ; PWR9LE: # %bb.0: # %entry ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v2i64: @@ -1259,7 +1259,7 @@ define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 { ; PWR10LE: # %bb.0: # %entry ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v2i64: @@ -1279,7 +1279,7 @@ define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 { ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v4i64: @@ -1295,7 +1295,7 @@ define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 { ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v4i64: @@ -1318,7 +1318,7 @@ define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 { ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v8i64: @@ -1338,7 +1338,7 @@ define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 { ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v8i64: @@ -1367,7 +1367,7 @@ define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 { ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i64: @@ -1395,7 +1395,7 @@ define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 { ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i64: @@ -1468,7 +1468,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i8tov16i64_sign: @@ -1560,7 +1560,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i8tov16i64_sign: @@ -1667,7 +1667,7 @@ define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 { ; PWR9LE-NEXT: vaddudm v2, v2, v3 ; PWR9LE-NEXT: xxswapd v3, v2 ; PWR9LE-NEXT: vaddudm v2, v2, v3 -; PWR9LE-NEXT: mfvsrld r3, v2 +; PWR9LE-NEXT: mfvsrd r3, v2 ; PWR9LE-NEXT: blr ; ; PWR9BE-LABEL: v16i8tov16i64_zero: @@ -1745,7 +1745,7 @@ define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10LE-NEXT: vaddudm v2, v2, v3 ; PWR10LE-NEXT: xxswapd v3, v2 ; PWR10LE-NEXT: vaddudm v2, v2, v3 -; PWR10LE-NEXT: mfvsrld r3, v2 +; PWR10LE-NEXT: mfvsrd r3, v2 ; PWR10LE-NEXT: blr ; ; PWR10BE-LABEL: v16i8tov16i64_zero: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits