On Sat, Dec 7, 2013 at 12:45 AM, Cong Hou <co...@google.com> wrote: > After further reviewing this patch, I found I don't have to change the > code in tree-vect-stmts.c to allow further type conversion after > widen-mult operation. Instead, I detect the following pattern in > vect_recog_widen_mult_pattern(): > > T1 a, b; > ai = (T2) a; > bi = (T2) b; > c = ai * bi; > > where T2 is more that double the size of T1. (e.g. T1 is char and T2 is int). > > In this case I just create a new type T3 whose size is double of the > size of T1, then get an intermediate result of type T3 from > widen-mult. Then I add a new statement to STMT_VINFO_PATTERN_DEF_SEQ > converting the result into type T2. > > This strategy makes the patch more clean. > > Bootstrapped and tested on an x86-64 machine.
Ok for trunk (please re-bootstrap/test of course). Thanks, Richard. > > thanks, > Cong > > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index f298c0b..12990b2 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,10 @@ > +2013-12-02 Cong Hou <co...@google.com> > + > + * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Enhance > + the widen-mult pattern by handling two operands with different > + sizes, and operands whose size is smaller than half of the result > + type. > + > 2013-11-22 Jakub Jelinek <ja...@redhat.com> > > PR sanitizer/59061 > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 12d2c90..611ae1c 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,8 @@ > +2013-12-02 Cong Hou <co...@google.com> > + > + * gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: New test. > + * gcc.dg/vect/vect-widen-mult-u8-u32.c: New test. > + > 2013-11-22 Jakub Jelinek <ja...@redhat.com> > > * c-c++-common/asan/no-redundant-instrumentation-7.c: Fix > diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c > b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c > new file mode 100644 > index 0000000..9f9081b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c > @@ -0,0 +1,48 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +#define N 64 > + > +unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > +short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > +int result[N]; > + > +/* unsigned char * short -> int widening-mult. */ > +__attribute__ ((noinline)) int > +foo1(int len) { > + int i; > + > + for (i=0; i<len; i++) { > + result[i] = X[i] * Y[i]; > + } > +} > + > +int main (void) > +{ > + int i; > + > + check_vect (); > + > + for (i=0; i<N; i++) { > + X[i] = i; > + Y[i] = 64-i; > + __asm__ volatile (""); > + } > + > + foo1 (N); > + > + for (i=0; i<N; i++) { > + if (result[i] != X[i] * Y[i]) > + abort (); > + } > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { > target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ > +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: > detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ > +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { > target vect_widen_mult_hi_to_si_pattern } } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > + > diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c > b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c > new file mode 100644 > index 0000000..12c4692 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c > @@ -0,0 +1,48 @@ > +/* { dg-require-effective-target vect_int } */ > + > +#include <stdarg.h> > +#include "tree-vect.h" > + > +#define N 64 > + > +unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > +unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); > +unsigned int result[N]; > + > +/* unsigned char-> unsigned int widening-mult. */ > +__attribute__ ((noinline)) int > +foo1(int len) { > + int i; > + > + for (i=0; i<len; i++) { > + result[i] = X[i] * Y[i]; > + } > +} > + > +int main (void) > +{ > + int i; > + > + check_vect (); > + > + for (i=0; i<N; i++) { > + X[i] = i; > + Y[i] = 64-i; > + __asm__ volatile (""); > + } > + > + foo1 (N); > + > + for (i=0; i<N; i++) { > + if (result[i] != X[i] * Y[i]) > + abort (); > + } > + > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { > target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */ > +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: > detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ > +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { > target vect_widen_mult_qi_to_hi_pattern } } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > + > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index 7823cc3..f412e2d 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -529,7 +529,8 @@ vect_handle_widen_op_by_const (gimple stmt, enum > tree_code code, > > Try to find the following pattern: > > - type a_t, b_t; > + type1 a_t; > + type2 b_t; > TYPE a_T, b_T, prod_T; > > S1 a_t = ; > @@ -538,11 +539,12 @@ vect_handle_widen_op_by_const (gimple stmt, enum > tree_code code, > S4 b_T = (TYPE) b_t; > S5 prod_T = a_T * b_T; > > - where type 'TYPE' is at least double the size of type 'type'. > + where type 'TYPE' is at least double the size of type 'type1' and 'type2'. > > Also detect unsigned cases: > > - unsigned type a_t, b_t; > + unsigned type1 a_t; > + unsigned type2 b_t; > unsigned TYPE u_prod_T; > TYPE a_T, b_T, prod_T; > > @@ -596,6 +598,8 @@ vect_handle_widen_op_by_const (gimple stmt, enum > tree_code code, > * Return value: A new stmt that will be used to replace the sequence of > stmts that constitute the pattern. In this case it will be: > WIDEN_MULT <a_t, b_t> > + If the result of WIDEN_MULT needs to be converted to a larger type, the > + returned stmt will be this type conversion stmt. > */ > > static gimple > @@ -606,8 +610,8 @@ vect_recog_widen_mult_pattern (vec<gimple> *stmts, > gimple def_stmt0, def_stmt1; > tree oprnd0, oprnd1; > tree type, half_type0, half_type1; > - gimple pattern_stmt; > - tree vectype, vectype_out = NULL_TREE; > + gimple new_stmt = NULL, pattern_stmt = NULL; > + tree vectype, vecitype; > tree var; > enum tree_code dummy_code; > int dummy_int; > @@ -661,6 +665,33 @@ vect_recog_widen_mult_pattern (vec<gimple> *stmts, > return NULL; > } > > + /* If the two arguments have different sizes, convert the one with > + the smaller type into the larger type. */ > + if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1)) > + { > + tree* oprnd = NULL; > + gimple def_stmt = NULL; > + > + if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1)) > + { > + def_stmt = def_stmt0; > + half_type0 = half_type1; > + oprnd = &oprnd0; > + } > + else > + { > + def_stmt = def_stmt1; > + half_type1 = half_type0; > + oprnd = &oprnd1; > + } > + > + tree old_oprnd = gimple_assign_rhs1 (def_stmt); > + tree new_oprnd = make_ssa_name (half_type0, NULL); > + new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, > + old_oprnd, NULL_TREE); > + *oprnd = new_oprnd; > + } > + > /* Handle unsigned case. Look for > S6 u_prod_T = (unsigned TYPE) prod_T; > Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ > @@ -692,6 +723,15 @@ vect_recog_widen_mult_pattern (vec<gimple> *stmts, > if (!types_compatible_p (half_type0, half_type1)) > return NULL; > > + /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT > + to get an intermediate result of type ITYPE. In this case we need > + to build a statement to convert this intermediate result to type TYPE. > */ > + tree itype = type; > + if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2) > + itype = build_nonstandard_integer_type > + (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2, > + TYPE_UNSIGNED (type)); > + > /* Pattern detected. */ > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, > @@ -699,23 +739,56 @@ vect_recog_widen_mult_pattern (vec<gimple> *stmts, > > /* Check target support */ > vectype = get_vectype_for_scalar_type (half_type0); > - vectype_out = get_vectype_for_scalar_type (type); > + vecitype = get_vectype_for_scalar_type (itype); > if (!vectype > - || !vectype_out > + || !vecitype > || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, > - vectype_out, vectype, > + vecitype, vectype, > &dummy_code, &dummy_code, > &dummy_int, &dummy_vec)) > return NULL; > > *type_in = vectype; > - *type_out = vectype_out; > + *type_out = get_vectype_for_scalar_type (type); > > /* Pattern supported. Create a stmt to be used to replace the pattern: */ > - var = vect_recog_temp_ssa_var (type, NULL); > + var = vect_recog_temp_ssa_var (itype, NULL); > pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0, > oprnd1); > > + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); > + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); > + bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo); > + STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; > + > + /* If the original two operands have different sizes, we may need to > convert > + the smaller one into the larget type. If this is the case, at this > point > + the new stmt is already built. */ > + if (new_stmt) > + { > + append_pattern_def_seq (stmt_vinfo, new_stmt); > + stmt_vec_info new_stmt_info > + = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo); > + set_vinfo_for_stmt (new_stmt, new_stmt_info); > + STMT_VINFO_VECTYPE (new_stmt_info) = vectype; > + } > + > + /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert > + the result of the widen-mult operation into type TYPE. */ > + if (itype != type) > + { > + append_pattern_def_seq (stmt_vinfo, pattern_stmt); > + stmt_vec_info pattern_stmt_info > + = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo); > + set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info); > + STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype; > + pattern_stmt > + = gimple_build_assign_with_ops (NOP_EXPR, > + vect_recog_temp_ssa_var (type, NULL), > + gimple_assign_lhs (pattern_stmt), > + NULL_TREE); > + } > + > if (dump_enabled_p ()) > dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, > 0); > > > > > > > On Tue, Dec 3, 2013 at 2:25 PM, Cong Hou <co...@google.com> wrote: >> Hi >> >> The current widen-mult pattern only considers two operands with the >> same size. However, operands with different sizes can also benefit >> from this pattern. The following loop shows such an example: >> >> >> char a[N]; >> short b[N]; >> int c[N]; >> >> for (int i = 0; i < N; ++i) >> c[i] = a[i] * b[i]; >> >> >> In this case, we can convert a[i] into short type then perform >> widen-mult on b[i] and the converted value: >> >> >> for (int i = 0; i < N; ++i) { >> short t = a[i]; >> c[i] = t w* b[i]; >> } >> >> >> This patch adds such support. In addition, the following loop fails to >> be recognized as a widen-mult pattern because the widening operation >> from char to int is not directly supported by the target: >> >> >> char a[N], b[N]; >> int c[N]; >> >> for (int i = 0; i < N; ++i) >> c[i] = a[i] * b[i]; >> >> >> In this case, we can still perform widen-mult on a[i] and b[i], and >> get a result of short type, then convert it to int: >> >> >> char a[N], b[N]; >> int c[N]; >> >> for (int i = 0; i < N; ++i) { >> short t = a[i] w* b[i]; >> c[i] = (int) t; >> } >> >> >> Currently GCC does not allow multi-step conversions for binary >> widening operations. This pattern removes this restriction and use >> VEC_UNPACK_LO_EXPR/VEC_UNPACK_HI_EXPR to arrange data after the >> widen-mult is performed for the widen-mult pattern. This can reduce >> several unpacking instructions (for this example, the number of >> packings/unpackings is reduced from 12 to 8. For SSE2, the inefficient >> multiplication between two V4SI vectors can also be avoided). >> >> Bootstrapped and tested on an x86_64 machine. >> >> >> >> thanks, >> Cong >> >> >> >> diff --git a/gcc/ChangeLog b/gcc/ChangeLog >> index f298c0b..44ed204 100644 >> --- a/gcc/ChangeLog >> +++ b/gcc/ChangeLog >> @@ -1,3 +1,12 @@ >> +2013-12-02 Cong Hou <co...@google.com> >> + >> + * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Enhance >> + the widen-mult pattern by handling two operands with different >> + sizes. >> + * tree-vect-stmts.c (vectorizable_conversion): Allow multi-steps >> + conversions after widening mult operation. >> + (supportable_widening_operation): Likewise. >> + >> 2013-11-22 Jakub Jelinek <ja...@redhat.com> >> >> PR sanitizer/59061 >> diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog >> index 12d2c90..611ae1c 100644 >> --- a/gcc/testsuite/ChangeLog >> +++ b/gcc/testsuite/ChangeLog >> @@ -1,3 +1,8 @@ >> +2013-12-02 Cong Hou <co...@google.com> >> + >> + * gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: New test. >> + * gcc.dg/vect/vect-widen-mult-u8-u32.c: New test. >> + >> 2013-11-22 Jakub Jelinek <ja...@redhat.com> >> >> * c-c++-common/asan/no-redundant-instrumentation-7.c: Fix >> diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c >> b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c >> new file mode 100644 >> index 0000000..9f9081b >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c >> @@ -0,0 +1,48 @@ >> +/* { dg-require-effective-target vect_int } */ >> + >> +#include <stdarg.h> >> +#include "tree-vect.h" >> + >> +#define N 64 >> + >> +unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); >> +short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); >> +int result[N]; >> + >> +/* unsigned char * short -> int widening-mult. */ >> +__attribute__ ((noinline)) int >> +foo1(int len) { >> + int i; >> + >> + for (i=0; i<len; i++) { >> + result[i] = X[i] * Y[i]; >> + } >> +} >> + >> +int main (void) >> +{ >> + int i; >> + >> + check_vect (); >> + >> + for (i=0; i<N; i++) { >> + X[i] = i; >> + Y[i] = 64-i; >> + __asm__ volatile (""); >> + } >> + >> + foo1 (N); >> + >> + for (i=0; i<N; i++) { >> + if (result[i] != X[i] * Y[i]) >> + abort (); >> + } >> + >> + return 0; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { >> target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ >> +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: >> detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ >> +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { >> target vect_widen_mult_hi_to_si_pattern } } } */ >> +/* { dg-final { cleanup-tree-dump "vect" } } */ >> + >> diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c >> b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c >> new file mode 100644 >> index 0000000..51e9178 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c >> @@ -0,0 +1,48 @@ >> +/* { dg-require-effective-target vect_int } */ >> + >> +#include <stdarg.h> >> +#include "tree-vect.h" >> + >> +#define N 64 >> + >> +unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); >> +unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); >> +unsigned int result[N]; >> + >> +/* unsigned char-> unsigned int widening-mult. */ >> +__attribute__ ((noinline)) int >> +foo1(int len) { >> + int i; >> + >> + for (i=0; i<len; i++) { >> + result[i] = X[i] * Y[i]; >> + } >> +} >> + >> +int main (void) >> +{ >> + int i; >> + >> + check_vect (); >> + >> + for (i=0; i<N; i++) { >> + X[i] = i; >> + Y[i] = 64-i; >> + __asm__ volatile (""); >> + } >> + >> + foo1 (N); >> + >> + for (i=0; i<N; i++) { >> + if (result[i] != X[i] * Y[i]) >> + abort (); >> + } >> + >> + return 0; >> +} >> + >> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { >> target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */ >> +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: >> detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ >> +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { >> target vect_widen_mult_qi_to_hi_pattern } } } */ >> +/* { dg-final { cleanup-tree-dump "vect" } } */ >> + >> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c >> index 7823cc3..06279bc 100644 >> --- a/gcc/tree-vect-patterns.c >> +++ b/gcc/tree-vect-patterns.c >> @@ -529,7 +529,8 @@ vect_handle_widen_op_by_const (gimple stmt, enum >> tree_code code, >> >> Try to find the following pattern: >> >> - type a_t, b_t; >> + type1 a_t; >> + type2 b_t; >> TYPE a_T, b_T, prod_T; >> >> S1 a_t = ; >> @@ -538,11 +539,12 @@ vect_handle_widen_op_by_const (gimple stmt, enum >> tree_code code, >> S4 b_T = (TYPE) b_t; >> S5 prod_T = a_T * b_T; >> >> - where type 'TYPE' is at least double the size of type 'type'. >> + where type 'TYPE' is at least double the size of type 'type1' and >> 'type2'. >> >> Also detect unsigned cases: >> >> - unsigned type a_t, b_t; >> + unsigned type1 a_t; >> + unsigned type2 b_t; >> unsigned TYPE u_prod_T; >> TYPE a_T, b_T, prod_T; >> >> @@ -661,6 +663,50 @@ vect_recog_widen_mult_pattern (vec<gimple> *stmts, >> return NULL; >> } >> >> + /* If the two arguments have different sizes, convert the one with >> + the smaller type into the larger type. */ >> + if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1)) >> + { >> + tree* oprnd = NULL; >> + gimple def_stmt = NULL; >> + >> + if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1)) >> + { >> + def_stmt = def_stmt0; >> + half_type0 = half_type1; >> + oprnd = &oprnd0; >> + } >> + else >> + { >> + def_stmt = def_stmt1; >> + half_type1 = half_type0; >> + oprnd = &oprnd1; >> + } >> + >> + if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt))) >> + { >> + gimple new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); >> + /* Check if the already created pattern stmt is what we need. */ >> + if (!is_gimple_assign (new_stmt) >> + || gimple_assign_rhs_code (new_stmt) != NOP_EXPR >> + || TREE_TYPE (gimple_assign_lhs (new_stmt)) != half_type0) >> + return NULL; >> + >> + stmts->safe_push (def_stmt); >> + *oprnd = gimple_assign_lhs (new_stmt); >> + } >> + else >> + { >> + tree old_oprnd = gimple_assign_rhs1 (def_stmt); >> + tree new_oprnd = make_ssa_name (half_type0, NULL); >> + gimple new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, >> + old_oprnd, NULL_TREE); >> + STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt; >> + stmts->safe_push (def_stmt); >> + *oprnd = new_oprnd; >> + } >> + } >> + >> /* Handle unsigned case. Look for >> S6 u_prod_T = (unsigned TYPE) prod_T; >> Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ >> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c >> index 72dfacd..e1ca2a2 100644 >> --- a/gcc/tree-vect-stmts.c >> +++ b/gcc/tree-vect-stmts.c >> @@ -2504,12 +2504,7 @@ vectorizable_conversion (gimple stmt, >> gimple_stmt_iterator *gsi, >> if (supportable_widening_operation (code, stmt, vectype_out, >> vectype_in, >> &code1, &code2, &multi_step_cvt, >> &interm_types)) >> - { >> - /* Binary widening operation can only be supported directly by the >> - architecture. */ >> - gcc_assert (!(multi_step_cvt && op_type == binary_op)); >> - break; >> - } >> + break; >> >> if (code != FLOAT_EXPR >> || (GET_MODE_SIZE (TYPE_MODE (lhs_type)) >> @@ -2787,6 +2782,15 @@ vectorizable_conversion (gimple stmt, >> gimple_stmt_iterator *gsi, >> c1 = codecvt1; >> c2 = codecvt2; >> } >> + else if (op_type == binary_op && i < multi_step_cvt) >> + { >> + /* For binary widening operations, if multi-steps are needed, >> + use VEC_UNPACK_LO_EXPR/VEC_UNPACK_HI_EXPR to convert >> + intermediate result to final one. */ >> + c1 = VEC_UNPACK_LO_EXPR; >> + c2 = VEC_UNPACK_HI_EXPR; >> + op_type = unary_op; >> + } >> vect_create_vectorized_promotion_stmts (&vec_oprnds0, >> &vec_oprnds1, >> stmt, this_dest, gsi, >> @@ -6510,7 +6514,7 @@ supportable_widening_operation (enum tree_code >> code, gimple stmt, >> enum tree_code c1, c2; >> int i; >> tree prev_type, intermediate_type; >> - enum machine_mode intermediate_mode, prev_mode; >> + enum machine_mode intermediate_mode; >> optab optab3, optab4; >> >> *multi_step_cvt = 0; >> @@ -6634,11 +6638,17 @@ supportable_widening_operation (enum tree_code >> code, gimple stmt, >> types. */ >> >> prev_type = vectype; >> - prev_mode = vec_mode; >> >> - if (!CONVERT_EXPR_CODE_P (code)) >> + /* For WIDEN_MULT_EXPR, it is possible that two steps are needed. For >> + example, consider WIDEN_MULT_EXPR from char to int. In the first step, >> + we can get the widen-mult result as short, and then widen it again to >> int. >> + VEC_UNPACK_LO_EXPR/VEC_UNPACK_HI_EXPR are used in the second step. */ >> + if (!CONVERT_EXPR_CODE_P (code) && code != WIDEN_MULT_EXPR) >> return false; >> >> + c1 = VEC_UNPACK_LO_EXPR; >> + c2 = VEC_UNPACK_HI_EXPR; >> + >> /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS >> intermediate steps in promotion sequence. We try >> MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do >> @@ -6654,10 +6664,6 @@ supportable_widening_operation (enum tree_code >> code, gimple stmt, >> optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); >> >> if (!optab3 || !optab4 >> - || (icode1 = optab_handler (optab1, prev_mode)) == >> CODE_FOR_nothing >> - || insn_data[icode1].operand[0].mode != intermediate_mode >> - || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing >> - || insn_data[icode2].operand[0].mode != intermediate_mode >> || ((icode1 = optab_handler (optab3, intermediate_mode)) >> == CODE_FOR_nothing) >> || ((icode2 = optab_handler (optab4, intermediate_mode)) >> @@ -6672,7 +6678,6 @@ supportable_widening_operation (enum tree_code >> code, gimple stmt, >> return true; >> >> prev_type = intermediate_type; >> - prev_mode = intermediate_mode; >> } >> >> interm_types->release ();