The direction of VEC_RSHIFT_EXPR has been endian-dependent, contrary to the
general principles of tree. This patch updates fold-const and the vectorizer
(the only place where such expressions are created), such that VEC_RSHIFT_EXPR
always shifts towards element 0.
The tree code still maps directly onto the vec_shr_optab, and so this patch
*will break any bigendian platform defining the vec_shr optab*.
--> For AArch64_be, patch follows next in series;
--> For PowerPC, I think patch/rfc 15 should fix, please inspect;
--> For MIPS, I think patch/rfc 16 should fix, please inspect.
gcc/ChangeLog:
* fold-const.c (const_binop): VEC_RSHIFT_EXPR always shifts towards
element 0.
* tree-vect-loop.c (vect_create_epilog_for_reduction): always extract
the result of a reduction with vector shifts from element 0.
* tree.def (VEC_RSHIFT_EXPR, VEC_LSHIFT_EXPR): Comment shift direction.
* doc/md.texi (vec_shr_m, vec_shl_m): Document shift direction.
Testing Done:
Bootstrap and check-gcc on x86_64-none-linux-gnu; check-gcc on aarch64-none-elf.
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f94e0f62c622d43e2df0d0619fb1eba74c415165..a2e8f297fbdd69dfec23e6e0769a21917b06b5c7 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4885,7 +4885,7 @@ of a wider mode.)
@cindex @code{vec_shr_@var{m}} instruction pattern
@item @samp{vec_shr_@var{m}}
-Whole vector right shift in bits.
+Whole vector right shift in bits, i.e. towards element 0.
Operand 1 is a vector to be shifted.
Operand 2 is an integer shift amount in bits, which must be a multiple of the
element size.
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index bd4ba5f0c64c710df9fa36d4059f7b08e949fae0..2a4fafa1b0634edd7a56f2484dec3a51a4699222 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -1418,15 +1418,10 @@ const_binop (enum tree_code code, tree arg1, tree arg2)
if (shiftc >= outerc || (shiftc % innerc) != 0)
return NULL_TREE;
int offset = shiftc / innerc;
- /* The direction of VEC_RSHIFT_EXPR is endian dependent.
- For reductions, if !BYTES_BIG_ENDIAN then compiler picks first
- vector element, but last element if BYTES_BIG_ENDIAN. */
- if (BYTES_BIG_ENDIAN)
- offset = -offset;
tree zero = build_zero_cst (TREE_TYPE (type));
for (i = 0; i < count; i++)
{
- if (i + offset < 0 || i + offset >= count)
+ if (i + offset >= count)
elts[i] = zero;
else
elts[i] = VECTOR_CST_ELT (arg1, i + offset);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d0a29d312bfd9a7eb552d937e3c64cf9b30d558a..016e2c1fc839fc4d1c97caaa38064fb8bbb510d8 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3860,7 +3860,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
gimple epilog_stmt = NULL;
enum tree_code code = gimple_assign_rhs_code (stmt);
gimple exit_phi;
- tree bitsize, bitpos;
+ tree bitsize;
tree adjustment_def = NULL;
tree vec_initial_def = NULL;
tree reduction_op, expr, def;
@@ -4371,14 +4371,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
dump_printf_loc (MSG_NOTE, vect_location,
"extract scalar result\n");
- if (BYTES_BIG_ENDIAN)
- bitpos = size_binop (MULT_EXPR,
- bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
- TYPE_SIZE (scalar_type));
- else
- bitpos = bitsize_zero_node;
-
- rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
+ rhs = build3 (BIT_FIELD_REF, scalar_type,
+ new_temp, bitsize, bitsize_zero_node);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_temp);
diff --git a/gcc/tree.def b/gcc/tree.def
index ff56bfc18bc00e8dac2dfc072fd4fa878a0f2a04..90bc27fde303e1606baac858738a7a86a517573b 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1238,7 +1238,7 @@ DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
before adding operand three. */
DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
-/* Whole vector right shift in bits.
+/* Whole vector right shift in bits, i.e. towards element 0.
Operand 0 is a vector to be shifted.
Operand 1 is an integer shift amount in bits, which must be a multiple of the
element size. */