117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

Michael Meissner via Gcc-cvs Tue, 10 Jun 2025 15:26:54 -0700

https://gcc.gnu.org/g:742340663eeb49c659420603e1d5a73579316004


commit 742340663eeb49c659420603e1d5a73579316004
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Tue Jun 10 15:45:21 2025 -0400

    PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations
    
    2025-06-10  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            PR target/117251
            * config/rs6000/fusion.md: Regenerate.
            * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to
            generate vector/vector and/and fusion if XXEVAL is supported.
            * config/rs6000/predicates.md (vector_fusion_operand): New 
predicate.
            * config/rs6000/rs6000.h (TARGET_XXEVAL): New macro.
            * config/rs6000/rs6000.md (isa attribute): Add xxeval.
            (enabled attribute): Add support for XXEVAL support.

Diff:
---
 gcc/config/rs6000/fusion.md     | 15 ++++++-----
 gcc/config/rs6000/genfusion.pl  | 58 ++++++++++++++++++++++++++++++++++++++---
 gcc/config/rs6000/predicates.md | 12 +++++++++
 gcc/config/rs6000/rs6000.h      |  4 +++
 gcc/config/rs6000/rs6000.md     |  7 ++++-
 5 files changed, 85 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 621b346f9eb9..d24837d68d83 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1871,20 +1871,23 @@
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vand -> vand
 (define_insn "*fuse_vand_vand"
-  [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v")
-        (and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" 
"v,v,v,v")
-                          (match_operand:VM 1 "altivec_register_operand" 
"%v,v,v,v"))
-                 (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))
-   (clobber (match_scratch:VM 4 "=X,X,X,&v"))]
+  [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v")
+        (and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" 
"v,v,v,wa,v")
+                          (match_operand:VM 1 "vector_fusion_operand" 
"%v,v,v,wa,v"))
+                 (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v")))
+   (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))]
   "(TARGET_P10_FUSION)"
   "@
    vand %3,%1,%0\;vand %3,%3,%2
    vand %3,%1,%0\;vand %3,%3,%2
    vand %3,%1,%0\;vand %3,%3,%2
+   xxeval %x3,%x2,%x1,%x0,1
    vand %4,%1,%0\;vand %3,%4,%2"
   [(set_attr "type" "fused_vector")
    (set_attr "cost" "6")
-   (set_attr "length" "8")])
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
 
 ;; logical-logical fusion pattern generated by gen_logical_addsubf
 ;; vector vandc -> vand
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index e5d3b1ee449d..351a4d914a4a 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -211,25 +211,33 @@ sub gen_logical_addsubf
        $inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4,
        $bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp,
        $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name,
-       $fuse_type);
-  KIND: foreach $kind ('scalar','vector') {
+       $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, 
$vect_inner_arg1,
+       $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp);
+
+    my %xxeval_fusions = (
+      "vand_vand"   =>   1,
+    );
+
+    KIND: foreach $kind ('scalar','vector') {
       @outer_ops = @logicals;
       if ( $kind eq 'vector' ) {
          $vchr = "v";
          $mode = "VM";
          $pred = "altivec_register_operand";
+         $vect_pred = "vector_fusion_operand";
          $constraint = "v";
          $fuse_type = "fused_vector";
       } else {
          $vchr = "";
          $mode = "GPR";
-         $pred = "gpc_reg_operand";
+         $vect_pred = $pred = "gpc_reg_operand";
          $constraint = "r";
          $fuse_type = "fused_arith_logical";
          push (@outer_ops, @addsub);
          push (@outer_ops, ( "rsubf" ));
       }
       $c4 = "${constraint},${constraint},${constraint},${constraint}";
+      $c5 = "${constraint},${constraint},${constraint},wa,${constraint}";
     OUTER: foreach $outer ( @outer_ops ) {
        $outer_name = "${vchr}${outer}";
        $is_subf = ( $outer eq "subf" );
@@ -263,23 +271,33 @@ sub gen_logical_addsubf
          $bc = ""; if ( $both_commute ) { $bc = "%"; }
          $inner_arg0 = "(match_operand:${mode} 0 \"${pred}\" \"${c4}\")";
          $inner_arg1 = "(match_operand:${mode} 1 \"${pred}\" \"${bc}${c4}\")";
+         $vect_inner_arg0 = "(match_operand:${mode} 0 \"${vect_pred}\" 
\"${c5}\")";
+         $vect_inner_arg1 = "(match_operand:${mode} 1 \"${vect_pred}\" 
\"${bc}${c5}\")";
          if ( ($inner_comp & 1) == 1 ) {
              $inner_arg0 = "(not:${mode} $inner_arg0)";
+             $vect_inner_arg0 = "(not:${mode} $vect_inner_arg0)";
          }
          if ( ($inner_comp & 2) == 2 ) {
              $inner_arg1 = "(not:${mode} $inner_arg1)";
+             $vect_inner_arg1 = "(not:${mode} $vect_inner_arg1)";
          }
          $inner_exp = "(${inner_rtl}:${mode} ${inner_arg0}
                           ${inner_arg1})";
+         $vect_inner_exp = "(${inner_rtl}:${mode} ${vect_inner_arg0}
+                          ${vect_inner_arg1})";
          if ( $inner_inv == 1 ) {
              $inner_exp = "(not:${mode} $inner_exp)";
+             $vect_inner_exp = "(not:${mode} $vect_inner_exp)";
          }
          $outer_arg2 = "(match_operand:${mode} 2 \"${pred}\" \"${c4}\")";
+         $vect_outer_arg2 = "(match_operand:${mode} 2 \"${vect_pred}\" 
\"${c5}\")";
          if ( ($outer_comp & 1) == 1 ) {
              $outer_arg2 = "(not:${mode} $outer_arg2)";
+             $vect_outer_arg2 = "(not:${mode} $vect_outer_arg2)";
          }
          if ( ($outer_comp & 2) == 2 ) {
              $inner_exp = "(not:${mode} $inner_exp)";
+             $vect_inner_exp = "(not:${mode} $vect_inner_exp)";
          }
          if ( $is_subf ) {
              $outer_32 = "%2,%3";
@@ -291,15 +309,23 @@ sub gen_logical_addsubf
          if ( $is_rsubf == 1 ) {
              $outer_exp = "(${outer_rtl}:${mode} ${outer_arg2}
                  ${inner_exp})";
+             $vect_outer_exp = "(${outer_rtl}:${mode} ${vect_outer_arg2}
+                 ${vect_inner_exp})";
          } else {
              $outer_exp = "(${outer_rtl}:${mode} ${inner_exp}
                  ${outer_arg2})";
+             $vect_outer_exp = "(${outer_rtl}:${mode} ${vect_inner_exp}
+                 ${vect_outer_arg2})";
          }
          if ( $outer_inv == 1 ) {
              $outer_exp = "(not:${mode} $outer_exp)";
+             $vect_outer_exp = "(not:${mode} $vect_outer_exp)";
          }
 
-         $insn =  <<"EOF";
+         # See if we can use xxeval on vector fusion
+         $xxeval = $xxeval_fusions{"${inner_op}_${outer_name}"};
+         if (!$xxeval) {
+             $insn =  <<"EOF";
 
 ;; $ftype fusion pattern generated by gen_logical_addsubf
 ;; $kind $inner_op -> $outer_name
@@ -318,6 +344,30 @@ sub gen_logical_addsubf
    (set_attr "length" "8")])
 EOF
 
+         } else {
+             $insn =  <<"EOF";
+
+;; $ftype fusion pattern generated by gen_logical_addsubf
+;; $kind $inner_op -> $outer_name
+(define_insn "*fuse_${inner_op}_${outer_name}"
+  [(set (match_operand:${mode} 3 "${vect_pred}" 
"=&0,&1,&${constraint},wa,${constraint}")
+        ${vect_outer_exp})
+   (clobber (match_scratch:${mode} 4 "=X,X,X,X,&${constraint}"))]
+  "(TARGET_P10_FUSION)"
+  "@
+   ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32}
+   ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32}
+   ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32}
+   xxeval %x3,%x2,%x1,%x0,${xxeval}
+   ${inner_op} %4,%1,%0\\;${outer_op} %3,${outer_42}"
+  [(set_attr "type" "$fuse_type")
+   (set_attr "cost" "6")
+   (set_attr "length" "8")
+   (set_attr "prefixed" "*,*,*,yes,*")
+   (set_attr "isa" "*,*,*,xxeval,*")])
+EOF
+         }
+
          print $insn;
       }
     }
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 647e89afb6a7..a6010a8c8a81 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -119,6 +119,18 @@
   return VSX_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if op is a register that can be used for vector fusion.  If XXEVAL
+;; is supported, return true for all VSX registers, otherwise the fusion is
+;; limited to Altivec registers since the machine only fuses Altivec
+;; operations.
+(define_predicate "vector_fusion_operand"
+  (match_operand 0 "register_operand")
+{
+  return (TARGET_XXEVAL
+         ? vsx_register_operand (op, mode)
+         : altivec_register_operand (op, mode));
+})
+
 ;; Return 1 if op is a vector register that operates on floating point vectors
 ;; (either altivec or VSX).
 (define_predicate "vfloat_operand"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9267612fbc9c..0430876eea82 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -500,6 +500,10 @@ extern int rs6000_vector_align[];
 #define TARGET_MINMAX  (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT         \
                         && (TARGET_P9_MINMAX || !flag_trapping_math))
 
+/* Enable XXEVAL support if we support prefixed instructions and at least
+   power10.  */
+#define TARGET_XXEVAL  (TARGET_POWER10 && TARGET_PREFIXED)
+
 /* In the past we represented the various power cpus (power4, power5, power6,
    etc.) via ISA bits that highlighted a new instruction or we used an extra
    option to represent the hardware (i.e. -mpower8-internal or -mpower10).  Now
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4c2bc81caf56..3b876462ec32 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -369,7 +369,7 @@
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10,xxeval"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -421,6 +421,11 @@
      (and (eq_attr "isa" "p10")
          (match_test "TARGET_POWER10"))
      (const_int 1)
+
+     (and (eq_attr "isa" "xxeval")
+         (match_test "TARGET_PREFIXED && TARGET_XXEVAL"))
+     (const_int 1)
+
     ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor

[gcc(refs/users/meissner/heads/work210-sha)] PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations

Reply via email to