Hi Richard,
> -----Original Message-----
> From: Richard Biener [mailto:[email protected]]
> Sent: Tuesday, August 04, 2015 4:07 PM
> To: Kumar, Venkataramanan
> Cc: Jeff Law; Jakub Jelinek; [email protected]
> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
> power 2 integer constant.
>
> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
> <[email protected]> wrote:
> > Hi Jeff,
> >
> >> -----Original Message-----
> >> From: [email protected] [mailto:gcc-patches-
> >> [email protected]] On Behalf Of Jeff Law
> >> Sent: Monday, August 03, 2015 11:42 PM
> >> To: Kumar, Venkataramanan; Jakub Jelinek
> >> Cc: Richard Beiner ([email protected]);
> >> [email protected]
> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
> >> expr with power 2 integer constant.
> >>
> >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
> >> > Hi Jakub,
> >> >
> >> > Thank you for reviewing the patch.
> >> >
> >> > I have incorporated your comments in the attached patch.
> >> Note Jakub is on PTO for the next 3 weeks.
> >
> > Thank you for this information.
> >
> >>
> >>
> >> >
> >> >
> >> >
> >> > vectorize_mults_via_shift.diff.txt
> >> >
> >> >
> >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> Jakub would probably like more testcases :-)
> >>
> >> The most obvious thing to test would be other shift factors.
> >>
> >> A negative test to verify we don't try to turn a multiply by
> >> non-constant or multiply by a constant that is not a power of 2 into
> >> shifts.
> >
> > I have added negative test in the attached patch.
> >
> >
> >>
> >> [ Would it make sense, for example, to turn a multiply by 3 into a
> >> shift-add sequence? As Jakub said, choose_mult_variant can be your
> >> friend. ]
> >
> > Yes I will do that in a follow up patch.
> >
> > The new change log becomes
> >
> > gcc/ChangeLog
> > 2015-08-04 Venkataramanan Kumar
> <[email protected]>
> > * tree-vect-patterns.c (vect_recog_mult_pattern): New function for
> vectorizing
> > multiplication patterns.
> > * tree-vectorizer.h: Adjust the number of patterns.
> >
> > gcc/testsuite/ChangeLog
> > 2015-08-04 Venkataramanan Kumar
> <[email protected]>
> > * gcc.dg/vect/vect-mult-pattern-1.c: New
> > * gcc.dg/vect/vect-mult-pattern-2.c: New
> >
> > Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
> >
> > Ok for trunk ?
>
> + if (TREE_CODE (oprnd0) != SSA_NAME
> + || TREE_CODE (oprnd1) != INTEGER_CST
> + || TREE_CODE (itype) != INTEGER_TYPE
>
> INTEGRAL_TYPE_P (itype)
>
> + optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); if
> + (!optab
> + || optab_handler (optab, TYPE_MODE (vectype)) ==
> CODE_FOR_nothing)
> + return NULL;
> +
>
> indent of the return stmt looks wrong
>
> + /* Handle constant operands that are postive or negative powers of 2.
> + */ if ( wi::exact_log2 (oprnd1) != -1 ||
> + wi::exact_log2 (wi::neg (oprnd1)) != -1)
>
> no space after (, || goes to the next line.
>
> + {
> + tree shift;
> +
> + if (wi::exact_log2 (oprnd1) != -1)
>
> please cache wi::exact_log2
>
> in fact the first if () looks redundant if you simply put an else return NULL
> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>
> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
> again, but it seems that wi::exact_log2 returns -1 in that case so you are
> fine
> (and in fact not handling this case).
>
I have updated your review comments in the attached patch.
For the INT_MIN case, I am getting vectorized output with the patch. I
believe x86_64 also vectorizes but does not negates the results.
#include <limits.h>
unsigned long int __attribute__ ((aligned (64)))arr[100];
int i;
#if 1
void test_vector_shifts()
{
for(i=0; i<=99;i++)
arr[i]=arr[i] * INT_MIN;
}
#endif
void test_vectorshift_via_mul()
{
for(i=0; i<=99;i++)
arr[i]=arr[i]*(-INT_MIN);
}
Before
---------
ldr x1, [x0]
neg x1, x1, lsl 31
str x1, [x0], 8
cmp x0, x2
After
-------
ldr q0, [x0]
shl v0.2d, v0.2d, 31
neg v0.2d, v0.2d
str q0, [x0], 16
cmp x1, x0
is this fine ?
> Thanks,
> Richard.
>
> >>
> >>
> >>
> >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
> >> (vec<gimple> *stmts,
> >> > return pattern_stmt;
> >> > }
> >> >
> >> > +/* Detect multiplication by constant which are postive or
> >> > +negatives of power 2,
> >> s/postive/positive/
> >>
> >>
> >> Jeff
> >
> > Regards,
> > Venkat.
> >
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
new file mode 100644
index 0000000..764d0e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void test_for_vectorshifts_via_mul_with_power2_const ()
+{
+ for (i=0; i<=99; i++)
+ arr[i] = arr[i] * 4;
+}
+
+void test_for_vectorshifts_via_mul_with_negative_power2_const ()
+{
+ for (i=0; i<=99; i++)
+ arr[i] = arr[i] * (-4);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" {target {
! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2
"vect" {target { ! { vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
new file mode 100644
index 0000000..77e8cff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void negative_test_for_vectorshifts_via_mul_with_const ()
+{
+ for (i=0; i<=99; i++)
+ arr[i] = arr[i] * 123;
+}
+
+void negative_test_for_vectorshifts_via_mul_with_negative_const ()
+{
+ for (i=0; i<=99; i++)
+ arr[i] = arr[i] * (-123);
+}
+
+void negative_test_for_vectorshifts_via_mul_with_varable (int x)
+{
+ for (i=0; i<=99; i++)
+ arr[i] = arr[i] * x;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect" {target {
! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-not "vect_recog_mult_pattern: detected" "vect"
{target { ! { vect_int_mult } } } } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f034635..bc3117d 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern
(vec<gimple> *,
tree *, tree *);
static gimple vect_recog_divmod_pattern (vec<gimple> *,
tree *, tree *);
+
+static gimple vect_recog_mult_pattern (vec<gimple> *,
+ tree *, tree *);
+
static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
tree *, tree *);
static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
@@ -90,6 +94,7 @@ static vect_recog_func_ptr
vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_rotate_pattern,
vect_recog_vector_vector_shift_pattern,
vect_recog_divmod_pattern,
+ vect_recog_mult_pattern,
vect_recog_mixed_size_cond_pattern,
vect_recog_bool_pattern};
@@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern (vec<gimple>
*stmts,
return pattern_stmt;
}
+/* Detect multiplication by constant which are postive or negatives of power 2,
+ and convert them to shift patterns.
+
+ Mult with constants that are postive power of two.
+ type a_t;
+ type b_t
+ S1: b_t = a_t * n
+
+ or
+
+ Mult with constants that are negative power of two.
+ S2: b_t = a_t * -n
+
+ Input/Output:
+
+ STMTS: Contains a stmt from which the pattern search begins,
+ i.e. the mult stmt. Convert the mult operation to LSHIFT if
+ constant operand is a power of 2.
+ type a_t, b_t
+ S1': b_t = a_t << log2 (n)
+
+ Convert the mult operation to LSHIFT and followed by a NEGATE
+ if constant operand is a negative power of 2.
+ type a_t, b_t, res_T;
+ S2': b_t = a_t << log2 (n)
+ S3': res_T = - (b_t)
+
+ Output:
+
+ * TYPE_IN: The type of the input arguments to the pattern.
+
+ * TYPE_OUT: The type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the multiplication
+ S1 or S2 stmt. */
+
+static gimple
+vect_recog_mult_pattern (vec<gimple> *stmts,
+ tree *type_in, tree *type_out)
+{
+ gimple last_stmt = stmts->pop ();
+ tree oprnd0, oprnd1, vectype, itype;
+ gimple pattern_stmt, def_stmt;
+ optab optab;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ int power2_val, power2_neg_val;
+ tree shift;
+
+ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+ if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
+ return NULL;
+
+ oprnd0 = gimple_assign_rhs1 (last_stmt);
+ oprnd1 = gimple_assign_rhs2 (last_stmt);
+ itype = TREE_TYPE (oprnd0);
+
+ if (TREE_CODE (oprnd0) != SSA_NAME
+ || TREE_CODE (oprnd1) != INTEGER_CST
+ || !INTEGRAL_TYPE_P (itype)
+ || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
+ return NULL;
+
+ vectype = get_vectype_for_scalar_type (itype);
+ if (vectype == NULL_TREE)
+ return NULL;
+
+ /* If the target can handle vectorized multiplication natively,
+ don't attempt to optimize this. */
+ optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
+ if (optab != unknown_optab)
+ {
+ machine_mode vec_mode = TYPE_MODE (vectype);
+ int icode = (int) optab_handler (optab, vec_mode);
+ if (icode != CODE_FOR_nothing)
+ return NULL;
+ }
+
+ /* If target cannot handle vector left shift then we cannot
+ optimize and bail out. */
+ optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+ if (!optab
+ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+ return NULL;
+
+ power2_val = wi::exact_log2 (oprnd1);
+ power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
+
+ /* Handle constant operands that are postive or negative powers of 2. */
+ if (power2_val != -1)
+ {
+ shift = build_int_cst (itype, power2_val);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+ LSHIFT_EXPR, oprnd0, shift);
+ }
+ else if (power2_neg_val != -1)
+ {
+ /* If the target cannot handle vector NEGATE then we cannot
+ do the optimization. */
+ optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
+ if (!optab
+ || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+ return NULL;
+
+ shift = build_int_cst (itype, power2_neg_val);
+ def_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+ LSHIFT_EXPR, oprnd0, shift);
+ new_pattern_def_seq (stmt_vinfo, def_stmt);
+ pattern_stmt
+ = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+ NEGATE_EXPR, gimple_assign_lhs (def_stmt));
+ }
+ else
+ return NULL;
+
+ /* Pattern detected. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_recog_mult_pattern: detected:\n");
+
+ if (dump_enabled_p ())
+ dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
+ pattern_stmt,0);
+
+ stmts->safe_push (last_stmt);
+ *type_in = vectype;
+ *type_out = vectype;
+
+ return pattern_stmt;
+}
+
/* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index dfa8795..b490af4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1132,7 +1132,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added
in the future. */
typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 12
+#define NUM_PATTERNS 13
void vect_pattern_recog (loop_vec_info, bb_vec_info);
/* In tree-vectorizer.c. */