https://gcc.gnu.org/g:742340663eeb49c659420603e1d5a73579316004
commit 742340663eeb49c659420603e1d5a73579316004 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Tue Jun 10 15:45:21 2025 -0400 PR target/117251: Add PowerPC XXEVAL support to speed up SHA3 calculations 2025-06-10 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/117251 * config/rs6000/fusion.md: Regenerate. * config/rs6000/genfusion.pl (gen_logical_addsubf): Add support to generate vector/vector and/and fusion if XXEVAL is supported. * config/rs6000/predicates.md (vector_fusion_operand): New predicate. * config/rs6000/rs6000.h (TARGET_XXEVAL): New macro. * config/rs6000/rs6000.md (isa attribute): Add xxeval. (enabled attribute): Add support for XXEVAL support. Diff: --- gcc/config/rs6000/fusion.md | 15 ++++++----- gcc/config/rs6000/genfusion.pl | 58 ++++++++++++++++++++++++++++++++++++++--- gcc/config/rs6000/predicates.md | 12 +++++++++ gcc/config/rs6000/rs6000.h | 4 +++ gcc/config/rs6000/rs6000.md | 7 ++++- 5 files changed, 85 insertions(+), 11 deletions(-) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 621b346f9eb9..d24837d68d83 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -1871,20 +1871,23 @@ ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vand -> vand (define_insn "*fuse_vand_vand" - [(set (match_operand:VM 3 "altivec_register_operand" "=&0,&1,&v,v") - (and:VM (and:VM (match_operand:VM 0 "altivec_register_operand" "v,v,v,v") - (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) - (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) - (clobber (match_scratch:VM 4 "=X,X,X,&v"))] + [(set (match_operand:VM 3 "vector_fusion_operand" "=&0,&1,&v,wa,v") + (and:VM (and:VM (match_operand:VM 0 "vector_fusion_operand" "v,v,v,wa,v") + (match_operand:VM 1 "vector_fusion_operand" "%v,v,v,wa,v")) + (match_operand:VM 2 "vector_fusion_operand" "v,v,v,wa,v"))) + (clobber (match_scratch:VM 4 "=X,X,X,X,&v"))] "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 + xxeval %x3,%x2,%x1,%x0,1 vand %4,%1,%0\;vand %3,%4,%2" [(set_attr "type" "fused_vector") (set_attr "cost" "6") - (set_attr "length" "8")]) + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) ;; logical-logical fusion pattern generated by gen_logical_addsubf ;; vector vandc -> vand diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index e5d3b1ee449d..351a4d914a4a 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -211,25 +211,33 @@ sub gen_logical_addsubf $inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4, $bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name, - $fuse_type); - KIND: foreach $kind ('scalar','vector') { + $fuse_type, $xxeval, $c5, $vect_pred, $vect_inner_arg0, $vect_inner_arg1, + $vect_inner_exp, $vect_outer_arg2, $vect_outer_exp); + + my %xxeval_fusions = ( + "vand_vand" => 1, + ); + + KIND: foreach $kind ('scalar','vector') { @outer_ops = @logicals; if ( $kind eq 'vector' ) { $vchr = "v"; $mode = "VM"; $pred = "altivec_register_operand"; + $vect_pred = "vector_fusion_operand"; $constraint = "v"; $fuse_type = "fused_vector"; } else { $vchr = ""; $mode = "GPR"; - $pred = "gpc_reg_operand"; + $vect_pred = $pred = "gpc_reg_operand"; $constraint = "r"; $fuse_type = "fused_arith_logical"; push (@outer_ops, @addsub); push (@outer_ops, ( "rsubf" )); } $c4 = "${constraint},${constraint},${constraint},${constraint}"; + $c5 = "${constraint},${constraint},${constraint},wa,${constraint}"; OUTER: foreach $outer ( @outer_ops ) { $outer_name = "${vchr}${outer}"; $is_subf = ( $outer eq "subf" ); @@ -263,23 +271,33 @@ sub gen_logical_addsubf $bc = ""; if ( $both_commute ) { $bc = "%"; } $inner_arg0 = "(match_operand:${mode} 0 \"${pred}\" \"${c4}\")"; $inner_arg1 = "(match_operand:${mode} 1 \"${pred}\" \"${bc}${c4}\")"; + $vect_inner_arg0 = "(match_operand:${mode} 0 \"${vect_pred}\" \"${c5}\")"; + $vect_inner_arg1 = "(match_operand:${mode} 1 \"${vect_pred}\" \"${bc}${c5}\")"; if ( ($inner_comp & 1) == 1 ) { $inner_arg0 = "(not:${mode} $inner_arg0)"; + $vect_inner_arg0 = "(not:${mode} $vect_inner_arg0)"; } if ( ($inner_comp & 2) == 2 ) { $inner_arg1 = "(not:${mode} $inner_arg1)"; + $vect_inner_arg1 = "(not:${mode} $vect_inner_arg1)"; } $inner_exp = "(${inner_rtl}:${mode} ${inner_arg0} ${inner_arg1})"; + $vect_inner_exp = "(${inner_rtl}:${mode} ${vect_inner_arg0} + ${vect_inner_arg1})"; if ( $inner_inv == 1 ) { $inner_exp = "(not:${mode} $inner_exp)"; + $vect_inner_exp = "(not:${mode} $vect_inner_exp)"; } $outer_arg2 = "(match_operand:${mode} 2 \"${pred}\" \"${c4}\")"; + $vect_outer_arg2 = "(match_operand:${mode} 2 \"${vect_pred}\" \"${c5}\")"; if ( ($outer_comp & 1) == 1 ) { $outer_arg2 = "(not:${mode} $outer_arg2)"; + $vect_outer_arg2 = "(not:${mode} $vect_outer_arg2)"; } if ( ($outer_comp & 2) == 2 ) { $inner_exp = "(not:${mode} $inner_exp)"; + $vect_inner_exp = "(not:${mode} $vect_inner_exp)"; } if ( $is_subf ) { $outer_32 = "%2,%3"; @@ -291,15 +309,23 @@ sub gen_logical_addsubf if ( $is_rsubf == 1 ) { $outer_exp = "(${outer_rtl}:${mode} ${outer_arg2} ${inner_exp})"; + $vect_outer_exp = "(${outer_rtl}:${mode} ${vect_outer_arg2} + ${vect_inner_exp})"; } else { $outer_exp = "(${outer_rtl}:${mode} ${inner_exp} ${outer_arg2})"; + $vect_outer_exp = "(${outer_rtl}:${mode} ${vect_inner_exp} + ${vect_outer_arg2})"; } if ( $outer_inv == 1 ) { $outer_exp = "(not:${mode} $outer_exp)"; + $vect_outer_exp = "(not:${mode} $vect_outer_exp)"; } - $insn = <<"EOF"; + # See if we can use xxeval on vector fusion + $xxeval = $xxeval_fusions{"${inner_op}_${outer_name}"}; + if (!$xxeval) { + $insn = <<"EOF"; ;; $ftype fusion pattern generated by gen_logical_addsubf ;; $kind $inner_op -> $outer_name @@ -318,6 +344,30 @@ sub gen_logical_addsubf (set_attr "length" "8")]) EOF + } else { + $insn = <<"EOF"; + +;; $ftype fusion pattern generated by gen_logical_addsubf +;; $kind $inner_op -> $outer_name +(define_insn "*fuse_${inner_op}_${outer_name}" + [(set (match_operand:${mode} 3 "${vect_pred}" "=&0,&1,&${constraint},wa,${constraint}") + ${vect_outer_exp}) + (clobber (match_scratch:${mode} 4 "=X,X,X,X,&${constraint}"))] + "(TARGET_P10_FUSION)" + "@ + ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32} + ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32} + ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32} + xxeval %x3,%x2,%x1,%x0,${xxeval} + ${inner_op} %4,%1,%0\\;${outer_op} %3,${outer_42}" + [(set_attr "type" "$fuse_type") + (set_attr "cost" "6") + (set_attr "length" "8") + (set_attr "prefixed" "*,*,*,yes,*") + (set_attr "isa" "*,*,*,xxeval,*")]) +EOF + } + print $insn; } } diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..a6010a8c8a81 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -119,6 +119,18 @@ return VSX_REGNO_P (REGNO (op)); }) +;; Return 1 if op is a register that can be used for vector fusion. If XXEVAL +;; is supported, return true for all VSX registers, otherwise the fusion is +;; limited to Altivec registers since the machine only fuses Altivec +;; operations. +(define_predicate "vector_fusion_operand" + (match_operand 0 "register_operand") +{ + return (TARGET_XXEVAL + ? vsx_register_operand (op, mode) + : altivec_register_operand (op, mode)); +}) + ;; Return 1 if op is a vector register that operates on floating point vectors ;; (either altivec or VSX). (define_predicate "vfloat_operand" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 9267612fbc9c..0430876eea82 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -500,6 +500,10 @@ extern int rs6000_vector_align[]; #define TARGET_MINMAX (TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT \ && (TARGET_P9_MINMAX || !flag_trapping_math)) +/* Enable XXEVAL support if we support prefixed instructions and at least + power10. */ +#define TARGET_XXEVAL (TARGET_POWER10 && TARGET_PREFIXED) + /* In the past we represented the various power cpus (power4, power5, power6, etc.) via ISA bits that highlighted a new instruction or we used an extra option to represent the hardware (i.e. -mpower8-internal or -mpower10). Now diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4c2bc81caf56..3b876462ec32 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -369,7 +369,7 @@ (const (symbol_ref "(enum attr_cpu) rs6000_tune"))) ;; The ISA we implement. -(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10" +(define_attr "isa" "any,p5,p6,p7,p7v,p8,p8v,p9,p9v,p9kf,p9tf,p10,xxeval" (const_string "any")) ;; Is this alternative enabled for the current CPU/ISA/etc.? @@ -421,6 +421,11 @@ (and (eq_attr "isa" "p10") (match_test "TARGET_POWER10")) (const_int 1) + + (and (eq_attr "isa" "xxeval") + (match_test "TARGET_PREFIXED && TARGET_XXEVAL")) + (const_int 1) + ] (const_int 0))) ;; If this instruction is microcoded on the CELL processor