The following patch fixes the vec_shr expander to do a shift instead of
a rotate. CPU2006 benchmark 482.sphinx3 recently started failing due to
this issue. Bootstrapped and tested on ppc64/ppc64le with no new
regressions. Ok for trunk? And ok for 4.9/5 (with equivalent change to
vec_shl expander which exists in those releases) after bootstrap/regtest?
-Pat
2015-08-25 Pat Haugen pthau...@us.ibm.com
* config/rs6000/vector.md (vec_shr_mode): Fix to do a shift
instead of a rotate.
gcc/testsuite:
* gcc.target/powerpc/vec-shr.c: New.
Index: gcc/config/rs6000/vector.md
===
--- gcc/config/rs6000/vector.md (revision 227041)
+++ gcc/config/rs6000/vector.md (working copy)
@@ -977,6 +977,8 @@ (define_expand movmisalignmode
;; General shift amounts can be supported using vsro + vsr. We're
;; not expecting to see these yet (the vectorizer currently
;; generates only shifts by a whole number of vector elements).
+;; Note that the vec_shr operation is actually defined as
+;; 'shift toward element 0' so is a shr for LE and shl for BE.
(define_expand vec_shr_mode
[(match_operand:VEC_L 0 vlogical_operand )
(match_operand:VEC_L 1 vlogical_operand )
@@ -987,6 +989,7 @@ (define_expand vec_shr_mode
rtx bitshift = operands[2];
rtx shift;
rtx insn;
+ rtx zero_reg, op1, op2;
HOST_WIDE_INT bitshift_val;
HOST_WIDE_INT byteshift_val;
@@ -996,19 +999,29 @@ (define_expand vec_shr_mode
if (bitshift_val 0x7)
FAIL;
byteshift_val = (bitshift_val 3);
+ zero_reg = gen_reg_rtx(MODEmode);
+ emit_move_insn (zero_reg, CONST0_RTX (MODEmode));
if (!BYTES_BIG_ENDIAN)
-byteshift_val = 16 - byteshift_val;
+{
+ byteshift_val = 16 - byteshift_val;
+ op1 = zero_reg;
+ op2 = operands[1];
+}
+ else
+{
+ op1 = operands[1];
+ op2 = zero_reg;
+}
+
if (TARGET_VSX (byteshift_val 0x3) == 0)
{
shift = gen_rtx_CONST_INT (QImode, byteshift_val 2);
- insn = gen_vsx_xxsldwi_mode (operands[0], operands[1], operands[1],
- shift);
+ insn = gen_vsx_xxsldwi_mode (operands[0], op1, op2, shift);
}
else
{
shift = gen_rtx_CONST_INT (QImode, byteshift_val);
- insn = gen_altivec_vsldoi_mode (operands[0], operands[1], operands[1],
- shift);
+ insn = gen_altivec_vsldoi_mode (operands[0], op1, op2, shift);
}
emit_insn (insn);
Index: gcc/testsuite/gcc.target/powerpc/vec-shr.c
===
--- gcc/testsuite/gcc.target/powerpc/vec-shr.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vec-shr.c (working copy)
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+/* { dg-options -O3 -fno-inline } */
+
+#include stdlib.h
+
+typedef struct { double r, i; } complex;
+#define LEN 30
+complex c[LEN];
+double d[LEN];
+
+void
+foo (complex *c, double *d, int len1)
+{
+ int i;
+ for (i = 0; i len1; i++)
+{
+ c[i].r = d[i];
+ c[i].i = 0.0;
+}
+}
+
+int
+main (void)
+{
+ int i;
+ for (i = 0; i LEN; i++)
+d[i] = (double) i;
+ foo (c, d, LEN);
+ for (i=0;iLEN;i++)
+if ((c[i].r != (double) i) || (c[i].i != 0.0))
+ abort ();
+ return 0;
+}
+