Hi Richard,
> -----Original Message----- > From: Richard Biener [mailto:richard.guent...@gmail.com] > Sent: Tuesday, August 04, 2015 4:07 PM > To: Kumar, Venkataramanan > Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org > Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with > power 2 integer constant. > > On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan > <venkataramanan.ku...@amd.com> wrote: > > Hi Jeff, > > > >> -----Original Message----- > >> From: gcc-patches-ow...@gcc.gnu.org [mailto:gcc-patches- > >> ow...@gcc.gnu.org] On Behalf Of Jeff Law > >> Sent: Monday, August 03, 2015 11:42 PM > >> To: Kumar, Venkataramanan; Jakub Jelinek > >> Cc: Richard Beiner (richard.guent...@gmail.com); > >> gcc-patches@gcc.gnu.org > >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult > >> expr with power 2 integer constant. > >> > >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote: > >> > Hi Jakub, > >> > > >> > Thank you for reviewing the patch. > >> > > >> > I have incorporated your comments in the attached patch. > >> Note Jakub is on PTO for the next 3 weeks. > > > > Thank you for this information. > > > >> > >> > >> > > >> > > >> > > >> > vectorize_mults_via_shift.diff.txt > >> > > >> > > >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c > >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c > >> Jakub would probably like more testcases :-) > >> > >> The most obvious thing to test would be other shift factors. > >> > >> A negative test to verify we don't try to turn a multiply by > >> non-constant or multiply by a constant that is not a power of 2 into > >> shifts. > > > > I have added negative test in the attached patch. > > > > > >> > >> [ Would it make sense, for example, to turn a multiply by 3 into a > >> shift-add sequence? As Jakub said, choose_mult_variant can be your > >> friend. ] > > > > Yes I will do that in a follow up patch. > > > > The new change log becomes > > > > gcc/ChangeLog > > 2015-08-04 Venkataramanan Kumar > <venkataramanan.ku...@amd.com> > > * tree-vect-patterns.c (vect_recog_mult_pattern): New function for > vectorizing > > multiplication patterns. > > * tree-vectorizer.h: Adjust the number of patterns. > > > > gcc/testsuite/ChangeLog > > 2015-08-04 Venkataramanan Kumar > <venkataramanan.ku...@amd.com> > > * gcc.dg/vect/vect-mult-pattern-1.c: New > > * gcc.dg/vect/vect-mult-pattern-2.c: New > > > > Bootstrapped and reg tested on aarch64-unknown-linux-gnu. > > > > Ok for trunk ? > > + if (TREE_CODE (oprnd0) != SSA_NAME > + || TREE_CODE (oprnd1) != INTEGER_CST > + || TREE_CODE (itype) != INTEGER_TYPE > > INTEGRAL_TYPE_P (itype) > > + optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); if > + (!optab > + || optab_handler (optab, TYPE_MODE (vectype)) == > CODE_FOR_nothing) > + return NULL; > + > > indent of the return stmt looks wrong > > + /* Handle constant operands that are postive or negative powers of 2. > + */ if ( wi::exact_log2 (oprnd1) != -1 || > + wi::exact_log2 (wi::neg (oprnd1)) != -1) > > no space after (, || goes to the next line. > > + { > + tree shift; > + > + if (wi::exact_log2 (oprnd1) != -1) > > please cache wi::exact_log2 > > in fact the first if () looks redundant if you simply put an else return NULL > after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1) > > Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN > again, but it seems that wi::exact_log2 returns -1 in that case so you are > fine > (and in fact not handling this case). > I have updated your review comments in the attached patch. For the INT_MIN case, I am getting vectorized output with the patch. I believe x86_64 also vectorizes but does not negates the results. #include <limits.h> unsigned long int __attribute__ ((aligned (64)))arr[100]; int i; #if 1 void test_vector_shifts() { for(i=0; i<=99;i++) arr[i]=arr[i] * INT_MIN; } #endif void test_vectorshift_via_mul() { for(i=0; i<=99;i++) arr[i]=arr[i]*(-INT_MIN); } Before --------- ldr x1, [x0] neg x1, x1, lsl 31 str x1, [x0], 8 cmp x0, x2 After ------- ldr q0, [x0] shl v0.2d, v0.2d, 31 neg v0.2d, v0.2d str q0, [x0], 16 cmp x1, x0 is this fine ? > Thanks, > Richard. > > >> > >> > >> > >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern > >> (vec<gimple> *stmts, > >> > return pattern_stmt; > >> > } > >> > > >> > +/* Detect multiplication by constant which are postive or > >> > +negatives of power 2, > >> s/postive/positive/ > >> > >> > >> Jeff > > > > Regards, > > Venkat. > >
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c new file mode 100644 index 0000000..764d0e3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_shift } */ + +unsigned long int __attribute__ ((aligned (64)))arr[100]; +int i; + +void test_for_vectorshifts_via_mul_with_power2_const () +{ + for (i=0; i<=99; i++) + arr[i] = arr[i] * 4; +} + +void test_for_vectorshifts_via_mul_with_negative_power2_const () +{ + for (i=0; i<=99; i++) + arr[i] = arr[i] * (-4); +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" {target { ! { vect_int_mult } } } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" {target { ! { vect_int_mult } } } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c new file mode 100644 index 0000000..77e8cff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_shift } */ + +unsigned long int __attribute__ ((aligned (64)))arr[100]; +int i; + +void negative_test_for_vectorshifts_via_mul_with_const () +{ + for (i=0; i<=99; i++) + arr[i] = arr[i] * 123; +} + +void negative_test_for_vectorshifts_via_mul_with_negative_const () +{ + for (i=0; i<=99; i++) + arr[i] = arr[i] * (-123); +} + +void negative_test_for_vectorshifts_via_mul_with_varable (int x) +{ + for (i=0; i<=99; i++) + arr[i] = arr[i] * x; +} + + +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect" {target { ! { vect_int_mult } } } } } */ +/* { dg-final { scan-tree-dump-not "vect_recog_mult_pattern: detected" "vect" {target { ! { vect_int_mult } } } } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index f034635..bc3117d 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_divmod_pattern (vec<gimple> *, tree *, tree *); + +static gimple vect_recog_mult_pattern (vec<gimple> *, + tree *, tree *); + static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *, tree *, tree *); static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *); @@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_rotate_pattern, vect_recog_vector_vector_shift_pattern, vect_recog_divmod_pattern, + vect_recog_mult_pattern, vect_recog_mixed_size_cond_pattern, vect_recog_bool_pattern}; @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts, return pattern_stmt; } +/* Detect multiplication by constant which are postive or negatives of power 2, + and convert them to shift patterns. + + Mult with constants that are postive power of two. + type a_t; + type b_t + S1: b_t = a_t * n + + or + + Mult with constants that are negative power of two. + S2: b_t = a_t * -n + + Input/Output: + + STMTS: Contains a stmt from which the pattern search begins, + i.e. the mult stmt. Convert the mult operation to LSHIFT if + constant operand is a power of 2. + type a_t, b_t + S1': b_t = a_t << log2 (n) + + Convert the mult operation to LSHIFT and followed by a NEGATE + if constant operand is a negative power of 2. + type a_t, b_t, res_T; + S2': b_t = a_t << log2 (n) + S3': res_T = - (b_t) + + Output: + + * TYPE_IN: The type of the input arguments to the pattern. + + * TYPE_OUT: The type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the multiplication + S1 or S2 stmt. */ + +static gimple +vect_recog_mult_pattern (vec<gimple> *stmts, + tree *type_in, tree *type_out) +{ + gimple last_stmt = stmts->pop (); + tree oprnd0, oprnd1, vectype, itype; + gimple pattern_stmt, def_stmt; + optab optab; + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + int power2_val, power2_neg_val; + tree shift; + + if (!is_gimple_assign (last_stmt)) + return NULL; + + if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) + return NULL; + + oprnd0 = gimple_assign_rhs1 (last_stmt); + oprnd1 = gimple_assign_rhs2 (last_stmt); + itype = TREE_TYPE (oprnd0); + + if (TREE_CODE (oprnd0) != SSA_NAME + || TREE_CODE (oprnd1) != INTEGER_CST + || !INTEGRAL_TYPE_P (itype) + || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))) + return NULL; + + vectype = get_vectype_for_scalar_type (itype); + if (vectype == NULL_TREE) + return NULL; + + /* If the target can handle vectorized multiplication natively, + don't attempt to optimize this. */ + optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default); + if (optab != unknown_optab) + { + machine_mode vec_mode = TYPE_MODE (vectype); + int icode = (int) optab_handler (optab, vec_mode); + if (icode != CODE_FOR_nothing) + return NULL; + } + + /* If target cannot handle vector left shift then we cannot + optimize and bail out. */ + optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); + if (!optab + || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) + return NULL; + + power2_val = wi::exact_log2 (oprnd1); + power2_neg_val = wi::exact_log2 (wi::neg (oprnd1)); + + /* Handle constant operands that are postive or negative powers of 2. */ + if (power2_val != -1) + { + shift = build_int_cst (itype, power2_val); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), + LSHIFT_EXPR, oprnd0, shift); + } + else if (power2_neg_val != -1) + { + /* If the target cannot handle vector NEGATE then we cannot + do the optimization. */ + optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector); + if (!optab + || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) + return NULL; + + shift = build_int_cst (itype, power2_neg_val); + def_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), + LSHIFT_EXPR, oprnd0, shift); + new_pattern_def_seq (stmt_vinfo, def_stmt); + pattern_stmt + = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), + NEGATE_EXPR, gimple_assign_lhs (def_stmt)); + } + else + return NULL; + + /* Pattern detected. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "vect_recog_mult_pattern: detected:\n"); + + if (dump_enabled_p ()) + dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, + pattern_stmt,0); + + stmts->safe_push (last_stmt); + *type_in = vectype; + *type_out = vectype; + + return pattern_stmt; +} + /* Detect a signed division by a constant that wouldn't be otherwise vectorized: diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index dfa8795..b490af4 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -1132,7 +1132,7 @@ extern void vect_slp_transform_bb (basic_block); Additional pattern recognition functions can (and will) be added in the future. */ typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *); -#define NUM_PATTERNS 12 +#define NUM_PATTERNS 13 void vect_pattern_recog (loop_vec_info, bb_vec_info); /* In tree-vectorizer.c. */