https://gcc.gnu.org/g:b5906edb9d2e7dcc3c1c70030133ef0a3f5fb5b3

commit b5906edb9d2e7dcc3c1c70030133ef0a3f5fb5b3
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Sun Nov 17 18:13:28 2024 -0500

    PR target/108958 -- use mtvsrdd to zero extend GPR DImode to VSX TImode
    
    Previously GCC would zero externd a DImode GPR value to TImode by first zero
    extending the DImode value into a GPR TImode value, and then do a MTVSRDD to
    move this value to a VSX register.
    
    This patch does the move directly, since if the middle argument to MTVSRDD 
is 0,
    it does the zero extend.
    
    This patch also generates LXVRDX if the DImode value is in memory.
    
    Finally, it the DImode is already in a vector register, it does a XXSPLTIB 
and
    XXPERMDI to get the value into the bottom 64-bits of the register.
    
    I have built GCC with the patches in this patch set applied on both little 
and
    big endian PowerPC systems and there were no regressions.  Can I apply this
    patch to GCC 15?
    
    2024-11-17  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            * gcc/config/rs6000/rs6000.md (zero_extendditi2): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/pr108958.c: New test.

Diff:
---
 gcc/config/rs6000/rs6000.md                 | 47 +++++++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr108958.c | 58 +++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index d266f93ff2e4..e3ac69430f39 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1026,6 +1026,53 @@
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
+(define_insn_and_split "zero_extendditi2"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=r,r,wa,wa,?&wa")
+       (zero_extend:TI
+        (match_operand:DI 1 "reg_or_mem_operand" "r,m,r,Z,wa")))]
+  "TARGET_DIRECT_MOVE_64BIT"
+  "@
+  #
+  #
+  mtvsrdd %x0,0,%1
+  lxvrdx %x0,%y1
+  #"
+  "&& reload_completed
+   && (int_reg_operand (operands[0], TImode)
+       || (vsx_register_operand (operands[0], TImode)
+           && vsx_register_operand (operands[1], DImode)))"
+  [(set (match_dup 2)
+       (match_dup 3))
+   (set (match_dup 4)
+       (match_dup 5))]
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+
+  if (int_reg_operand (op0, TImode))
+    {
+      operands[2] = gen_lowpart (DImode, op0);
+      operands[3] = op1;
+      operands[4] = gen_highpart (DImode, op0);
+      operands[5] = const0_rtx;
+    }
+  else
+    {
+      int op0_r = reg_or_subregno (op0);
+      rtx op0_di = gen_rtx_REG (DImode, op0_r);
+      rtx op0_v2di = gen_rtx_REG (V2DImode, op0_r);
+      rtx lo = WORDS_BIG_ENDIAN ? op0_di : op1;
+      rtx hi = WORDS_BIG_ENDIAN ? op1 : op0_di;
+
+      operands[2] = op0_v2di;
+      operands[3] = CONST0_RTX (V2DImode);
+      operands[4] = op0_v2di;
+      operands[5] = gen_rtx_VEC_CONCAT (V2DImode, hi, lo);
+    }
+}
+  [(set_attr "type" "*,load,mtvsr,vecload,vecperm")
+   (set_attr "length" "8,8,*,*,8")
+   (set_attr "isa" "*,*,*,p10,*")])
 
 (define_insn "extendqi<mode>2"
   [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,?*v")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr108958.c 
b/gcc/testsuite/gcc.target/powerpc/pr108958.c
new file mode 100644
index 000000000000..80155cff0b9e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr108958.c
@@ -0,0 +1,58 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* PR target/108958, use mtvsrdd to zero extend gpr to vsx register.  */
+
+void
+arg_to_vsx (unsigned long long x, __uint128_t *p)
+{
+  /* mtvsrdd vsx,0,gpr.  */
+  __uint128_t y = x;
+  __asm__ (" # %x0" : "+wa" (y));
+  *p = y;
+}
+
+void
+mem_to_vsx (unsigned long long *p, __uint128_t *q)
+{
+  /* lxrdx vsx,0,ptr.  */
+  __uint128_t y = *p;
+  __asm__ (" # %x0" : "+wa" (y));
+  *q = y;
+}
+
+
+void
+vsx_to_vsx (double d, __uint128_t *p)
+{
+  /* xxspltib + xxpermdir.  */
+  unsigned long long ull = d;
+  __uint128_t x = ull;
+  __asm__ (" # %x0" : "+wa" (x));
+  *p = x;
+}
+
+void
+arg_to_gpr (unsigned long long x, __uint128_t *p)
+{
+  /* mr gpr1_lo,gpr2; li gpr1_hi,0.  */
+  __uint128_t y = x;
+  __asm__ (" # %0" : "+r" (y));
+  *p = y;
+}
+
+void
+mem_to_gpr (unsigned long long *p, __uint128_t *q)
+{
+  /* ld gpr1_lo,addr; li gpr1_hi,0.  */
+  __uint128_t y = *p;
+  __asm__ (" # %0" : "+r" (y));
+  *q = y;
+}
+
+/* { dg-final { scan-assembler-times {\mmtvsrdd .*,0,.*\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mlxrdx\M}           1 } } */
+/* { dg-final { scan-assembler-times {\mxxpermdi\M}        1 } } */

Reply via email to