The direction of VEC_RSHIFT_EXPR has been endian-dependent, contrary to the general principles of tree. This patch updates fold-const and the vectorizer (the only place where such expressions are created), such that VEC_RSHIFT_EXPR always shifts towards element 0.

The tree code still maps directly onto the vec_shr_optab, and so this patch *will break any bigendian platform defining the vec_shr optab*.
--> For AArch64_be, patch follows next in series;
--> For PowerPC, I think patch/rfc 15 should fix, please inspect;
--> For MIPS, I think patch/rfc 16 should fix, please inspect.

gcc/ChangeLog:

        * fold-const.c (const_binop): VEC_RSHIFT_EXPR always shifts towards
        element 0.

        * tree-vect-loop.c (vect_create_epilog_for_reduction): always extract
        the result of a reduction with vector shifts from element 0.

        * tree.def (VEC_RSHIFT_EXPR, VEC_LSHIFT_EXPR): Comment shift direction.

        * doc/md.texi (vec_shr_m, vec_shl_m): Document shift direction.

Testing Done:

Bootstrap and check-gcc on x86_64-none-linux-gnu; check-gcc on aarch64-none-elf.
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f94e0f62c622d43e2df0d0619fb1eba74c415165..a2e8f297fbdd69dfec23e6e0769a21917b06b5c7 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4885,7 +4885,7 @@ of a wider mode.)
 
 @cindex @code{vec_shr_@var{m}} instruction pattern
 @item @samp{vec_shr_@var{m}}
-Whole vector right shift in bits.
+Whole vector right shift in bits, i.e. towards element 0.
 Operand 1 is a vector to be shifted.
 Operand 2 is an integer shift amount in bits, which must be a multiple of the
 element size.
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index bd4ba5f0c64c710df9fa36d4059f7b08e949fae0..2a4fafa1b0634edd7a56f2484dec3a51a4699222 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -1418,15 +1418,10 @@ const_binop (enum tree_code code, tree arg1, tree arg2)
 	  if (shiftc >= outerc || (shiftc % innerc) != 0)
 	    return NULL_TREE;
 	  int offset = shiftc / innerc;
-	  /* The direction of VEC_RSHIFT_EXPR is endian dependent.
-	     For reductions, if !BYTES_BIG_ENDIAN then compiler picks first
-	     vector element, but last element if BYTES_BIG_ENDIAN.  */
-	  if (BYTES_BIG_ENDIAN)
-	    offset = -offset;
 	  tree zero = build_zero_cst (TREE_TYPE (type));
 	  for (i = 0; i < count; i++)
 	    {
-	      if (i + offset < 0 || i + offset >= count)
+	      if (i + offset >= count)
 		elts[i] = zero;
 	      else
 		elts[i] = VECTOR_CST_ELT (arg1, i + offset);
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index d0a29d312bfd9a7eb552d937e3c64cf9b30d558a..016e2c1fc839fc4d1c97caaa38064fb8bbb510d8 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3860,7 +3860,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
   gimple epilog_stmt = NULL;
   enum tree_code code = gimple_assign_rhs_code (stmt);
   gimple exit_phi;
-  tree bitsize, bitpos;
+  tree bitsize;
   tree adjustment_def = NULL;
   tree vec_initial_def = NULL;
   tree reduction_op, expr, def;
@@ -4371,14 +4371,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
         dump_printf_loc (MSG_NOTE, vect_location,
 			 "extract scalar result\n");
 
-      if (BYTES_BIG_ENDIAN)
-        bitpos = size_binop (MULT_EXPR,
-                             bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
-                             TYPE_SIZE (scalar_type));
-      else
-        bitpos = bitsize_zero_node;
-
-      rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
+      rhs = build3 (BIT_FIELD_REF, scalar_type,
+		    new_temp, bitsize, bitsize_zero_node);
       epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
       new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
       gimple_assign_set_lhs (epilog_stmt, new_temp);
diff --git a/gcc/tree.def b/gcc/tree.def
index ff56bfc18bc00e8dac2dfc072fd4fa878a0f2a04..90bc27fde303e1606baac858738a7a86a517573b 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1238,7 +1238,7 @@ DEFTREECODE (WIDEN_LSHIFT_EXPR, "widen_lshift_expr", tcc_binary, 2)
    before adding operand three.  */
 DEFTREECODE (FMA_EXPR, "fma_expr", tcc_expression, 3)
 
-/* Whole vector right shift in bits.
+/* Whole vector right shift in bits, i.e. towards element 0.
    Operand 0 is a vector to be shifted.
    Operand 1 is an integer shift amount in bits, which must be a multiple of the
    element size.  */

Reply via email to