There are only a couple of uses of the old reduction optabs remaining
(the optabs producing a vector with only one element set). This migrates the
uses in gcc.target/rs6000/paired.md.

In the absence of a vec_extract pattern, I generate two subreg moves, the same
as usually produced by the midend when using the old pattern.

I don't have hardware to properly test this, but using a stage 1 compiler, I
have compiled all the tests in gcc.dg/vect, at -O2 -ftree-vectorize -mpaired -S
-mno-altivec -ffast-math -ffinite-math-only, on both
powerpc-none-linux-gnupaired and ppcel-none-linux-gnupaired.

The patterns were triggered on
fast-math-vect-reduc-5.c, fast-math-vect-reduc-8.c, no-fast-math-vect16.c,
vect-reduc-6.c.

no-fast-math-vect16.c exhibited some regalloc differences (regs 0 and 12 are
swapped):

        addi 9,1,136
+       lfs 12,136(29)
        psq_stx 0,0,9,0,0
-       lfs 0,136(29)
-       lfs 12,140(1)
-       fcmpu 7,12,0
+       lfs 0,140(1)
+       fcmpu 7,0,12

no other assembly was changed.

Is this OK for trunk? (stage 3?)

Cheers, Alan

gcc/ChangeLog:

        * gcc.target/rs6000/paired.md (reduc_smax_v2sf): Rename to...
        (reduc_smax_scal_v2sf): ...here, make result SFmode, extract element.
        (reduc_smin_v2sf): Rename to...
        (reduc_smin_scal_v2sf): ...here, make result SFmode, extract element.
        (reduc_splus_v2sf): Rename to...
        (reduc_plus_scal_v2sf): ...here, make result SFmode, extract element.
---
 gcc/config/rs6000/paired.md | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/paired.md b/gcc/config/rs6000/paired.md
index 5d094fb..c3f4d66 100644
--- a/gcc/config/rs6000/paired.md
+++ b/gcc/config/rs6000/paired.md
@@ -421,45 +421,62 @@
   DONE;
 })
 
-(define_expand "reduc_smax_v2sf"
-  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+(define_expand "reduc_smax_scal_v2sf"
+  [(match_operand:SF 0 "gpc_reg_operand" "=f")
    (match_operand:V2SF 1 "gpc_reg_operand" "f")]
   "TARGET_PAIRED_FLOAT"
 {
   rtx tmp_swap = gen_reg_rtx (V2SFmode);
   rtx tmp = gen_reg_rtx (V2SFmode);
+  rtx vec_res = gen_reg_rtx (V2SFmode);
+  rtx di_res = gen_reg_rtx (DImode);
 
   emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1]));
   emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap));
-  emit_insn (gen_selv2sf4 (operands[0], tmp, operands[1], tmp_swap, CONST0_RTX 
(SFmode)));
+  emit_insn (gen_selv2sf4 (vec_res, tmp, operands[1], tmp_swap,
+                          CONST0_RTX (SFmode)));
+  emit_move_insn (di_res, simplify_gen_subreg (DImode, vec_res, V2SFmode, 0));
+  emit_move_insn (operands[0], simplify_gen_subreg (SFmode, di_res, DImode,
+                                                   BYTES_BIG_ENDIAN ? 4 : 0));
 
   DONE;
 })
 
-(define_expand "reduc_smin_v2sf"
-  [(match_operand:V2SF 0 "gpc_reg_operand" "=f")
+(define_expand "reduc_smin_scal_v2sf"
+  [(match_operand:SF 0 "gpc_reg_operand" "=f")
    (match_operand:V2SF 1 "gpc_reg_operand" "f")]
   "TARGET_PAIRED_FLOAT"
 {
   rtx tmp_swap = gen_reg_rtx (V2SFmode);
   rtx tmp = gen_reg_rtx (V2SFmode);
+  rtx vec_res = gen_reg_rtx (V2SFmode);
+  rtx di_res = gen_reg_rtx (DImode);
 
   emit_insn (gen_paired_merge10 (tmp_swap, operands[1], operands[1]));
   emit_insn (gen_subv2sf3 (tmp, operands[1], tmp_swap));
-  emit_insn (gen_selv2sf4 (operands[0], tmp, tmp_swap, operands[1], CONST0_RTX 
(SFmode)));
+  emit_insn (gen_selv2sf4 (vec_res, tmp, tmp_swap, operands[1],
+                          CONST0_RTX (SFmode)));
+  emit_move_insn (di_res, simplify_gen_subreg (DImode, vec_res, V2SFmode, 0));
+  emit_move_insn (operands[0], simplify_gen_subreg (SFmode, di_res, DImode,
+                                                   BYTES_BIG_ENDIAN ? 4 : 0));
 
   DONE;
 })
 
-(define_expand "reduc_splus_v2sf"
-  [(set (match_operand:V2SF 0 "gpc_reg_operand" "=f")
+(define_expand "reduc_plus_scal_v2sf"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
         (match_operand:V2SF 1 "gpc_reg_operand" "f"))]
   "TARGET_PAIRED_FLOAT"
-  "
 {
-  emit_insn (gen_paired_sum1 (operands[0], operands[1], operands[1], 
operands[1]));
+  rtx vec_res = gen_reg_rtx (V2SFmode);
+  rtx di_res = gen_reg_rtx (DImode);
+
+  emit_insn (gen_paired_sum1 (vec_res, operands[1], operands[1], operands[1]));
+  emit_move_insn (di_res, simplify_gen_subreg (DImode, vec_res, V2SFmode, 0));
+  emit_move_insn (operands[0], simplify_gen_subreg (SFmode, di_res, DImode,
+                                                   BYTES_BIG_ENDIAN ? 4 : 0));
   DONE;
-}")
+})
 
 (define_expand "movmisalignv2sf"
   [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
-- 
1.9.1

Reply via email to