Jakub Jelinek <ja...@redhat.com> wrote on 14/12/2011 02:25:13 PM:
> > @@ -1573,6 +1576,211 @@ vect_recog_vector_vector_shift_pattern ( > return pattern_stmt; > } > > +/* Detect a signed division by power of two constant that wouldn't be > + otherwise vectorized: > + > + type a_t, b_t; > + > + S1 a_t = b_t / N; > + > + where type 'type' is a signed integral type and N is a constant positive > + power of two. > + > + Similarly handle signed modulo by power of two constant: > + > + S4 a_t = b_t % N; > + > + Input/Output: > + > + * STMTS: Contains a stmt from which the pattern search begins, > + i.e. the division stmt. S1 is replaced by: > + S3 y_t = b_t < 0 ? N - 1 : 0; > + S2 x_t = b_t + y_t; > + S1' a_t = x_t >> log2 (N); > + > + S4 is replaced by (where *_T temporaries have unsigned type): > + S9 y_T = b_t < 0 ? -1U : 0U; > + S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); > + S7 z_t = (type) z_T; > + S6 w_t = b_t + z_t; > + S5 x_t = w_t & (N - 1); > + S4' a_t = x_t - z_t; > + > + Output: > + > + * TYPE_IN: The type of the input arguments to the pattern. > + > + * TYPE_OUT: The type of the output of this pattern. > + > + * Return value: A new stmt that will be used to replace the division > + S1 or modulo S4 stmt. */ > + > +static gimple > +vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, > + tree *type_in, tree *type_out) > +{ > + gimple last_stmt = VEC_pop (gimple, *stmts); > + gimple_stmt_iterator gsi; > + tree oprnd0, oprnd1, vectype, itype, cond; > + gimple pattern_stmt, def_stmt; > + enum tree_code rhs_code; > + stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); > + loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); > + optab optab; > + > + if (!is_gimple_assign (last_stmt)) > + return NULL; > + > + rhs_code = gimple_assign_rhs_code (last_stmt); > + switch (rhs_code) > + { > + case TRUNC_DIV_EXPR: > + case TRUNC_MOD_EXPR: > + break; > + default: > + return NULL; > + } > + > + if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) > + return NULL; > + > + oprnd0 = gimple_assign_rhs1 (last_stmt); > + oprnd1 = gimple_assign_rhs2 (last_stmt); > + itype = TREE_TYPE (oprnd0); > + if (TREE_CODE (oprnd0) != SSA_NAME > + || TREE_CODE (oprnd1) != INTEGER_CST > + || TREE_CODE (itype) != INTEGER_TYPE > + || TYPE_UNSIGNED (itype) > + || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)) > + || !integer_pow2p (oprnd1) > + || tree_int_cst_sgn (oprnd1) != 1) > + return NULL; > + > + vectype = get_vectype_for_scalar_type (itype); > + if (vectype == NULL_TREE) > + return NULL; > + > + /* If the target can handle vectorized division or modulo natively, > + don't attempt to optimize this. */ > + optab = optab_for_tree_code (rhs_code, vectype, optab_default); > + if (optab != NULL) > + { > + enum machine_mode vec_mode = TYPE_MODE (vectype); > + int icode = (int) optab_handler (optab, vec_mode); > + if (icode != CODE_FOR_nothing > + || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD) > + return NULL; > + } > + > + /* Pattern detected. */ > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: "); > + > + cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst > (itype, 0)); > + gsi = gsi_for_stmt (last_stmt); > + if (rhs_code == TRUNC_DIV_EXPR) > + { > + tree var = vect_recog_temp_ssa_var (itype, NULL); > + def_stmt > + = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, > + fold_build2 (MINUS_EXPR, itype, > + oprnd1, > + build_int_cst (itype, > + 1)), > + build_int_cst (itype, 0)); > + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT); Hmm, you are inserting pattern stmts. This was causing some mess in the past as explained here http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00801.html. Maybe you can use STMT_VINFO_PATTERN_DEF_STMT to keep a chain of def_stmts? Ira > + set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt, loop_vinfo, > + NULL)); > + var = vect_recog_temp_ssa_var (itype, NULL); > + def_stmt > + = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0, > + gimple_assign_lhs (def_stmt)); > + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; > + > + pattern_stmt > + = gimple_build_assign_with_ops (RSHIFT_EXPR, > + vect_recog_temp_ssa_var (itype, NULL), > + var, > + build_int_cst (itype, > + tree_log2 (oprnd1))); > + } > + else > + { > + tree signmask; > + tree utype = build_nonstandard_integer_type (TYPE_PRECISION > (itype), 1); > + tree shift = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) > + - tree_log2 (oprnd1)); > + if (compare_tree_int (oprnd1, 2) == 0) > + { > + signmask = vect_recog_temp_ssa_var (itype, NULL); > + def_stmt > + = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond, > + build_int_cst (itype, 1), > + build_int_cst (itype, 0)); > + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT); > + set_vinfo_for_stmt (def_stmt, > + new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); > + } > + else > + { > + tree var = vect_recog_temp_ssa_var (utype, NULL); > + def_stmt > + = gimple_build_assign_with_ops3 (COND_EXPR, var, cond, > + build_int_cst (utype, -1), > + build_int_cst (utype, 0)); > + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT); > + set_vinfo_for_stmt (def_stmt, > + new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); > + var = vect_recog_temp_ssa_var (utype, NULL); > + def_stmt > + = gimple_build_assign_with_ops (RSHIFT_EXPR, var, > + gimple_assign_lhs (def_stmt), > + shift); > + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT); > + set_vinfo_for_stmt (def_stmt, > + new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); > + signmask = vect_recog_temp_ssa_var (itype, NULL); > + def_stmt > + = gimple_build_assign_with_ops (NOP_EXPR, signmask, var, > + NULL_TREE); > + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT); > + set_vinfo_for_stmt (def_stmt, > + new_stmt_vec_info (def_stmt, loop_vinfo, NULL)); > + } > + def_stmt > + = gimple_build_assign_with_ops (PLUS_EXPR, > + vect_recog_temp_ssa_var (itype, NULL), > + oprnd0, signmask); > + gsi_insert_before (&gsi, def_stmt, GSI_SAME_STMT); > + set_vinfo_for_stmt (def_stmt, new_stmt_vec_info (def_stmt, loop_vinfo, > + NULL)); > + def_stmt > + = gimple_build_assign_with_ops (BIT_AND_EXPR, > + vect_recog_temp_ssa_var (itype, NULL), > + gimple_assign_lhs (def_stmt), > + fold_build2 (MINUS_EXPR, itype, > + oprnd1, > + build_int_cst (itype, > + 1))); > + STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = def_stmt; > + > + pattern_stmt > + = gimple_build_assign_with_ops (MINUS_EXPR, > + vect_recog_temp_ssa_var (itype, NULL), > + gimple_assign_lhs (def_stmt), > + signmask); > + } > + > + if (vect_print_dump_info (REPORT_DETAILS)) > + print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); > + > + VEC_safe_push (gimple, heap, *stmts, last_stmt); > + > + *type_in = vectype; > + *type_out = vectype; > + return pattern_stmt; > +} > + > /* Function vect_recog_mixed_size_cond_pattern > > Try to find the following pattern: