Richard should have the final say, but some comments... Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index 1e2dfe5d22d..862206b3256 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -1989,17 +1989,31 @@ check_load_store_masking (loop_vec_info loop_vinfo, > tree vectype, > > static tree > prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask, > - gimple_stmt_iterator *gsi) > + gimple_stmt_iterator *gsi, tree mask, > + cond_vmask_map_type *cond_to_vec_mask)
"scalar_mask" might be a better name. But maybe we should key off the vector mask after all, now that we're relying on the code having no redundancies. Passing the vinfo would be better than passing the cond_vmask_map_type directly. > { > gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask))); > if (!loop_mask) > return vec_mask; > > gcc_assert (TREE_TYPE (loop_mask) == mask_type); > + > + tree *slot = 0; > + if (cond_to_vec_mask) The pointer should never be null in this context. > + { > + cond_vmask_key cond (mask, loop_mask); > + slot = &cond_to_vec_mask->get_or_insert (cond); > + if (*slot) > + return *slot; > + } > + > tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and"); > gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR, > vec_mask, loop_mask); > gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); > + > + if (slot) > + *slot = and_res; > return and_res; > } > [...] > @@ -9975,6 +9997,38 @@ vectorizable_condition (stmt_vec_info stmt_info, > gimple_stmt_iterator *gsi, > /* Handle cond expr. */ > for (j = 0; j < ncopies; j++) > { > + tree vec_mask = NULL_TREE; > + > + if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) Nit: one condition per line when the whole thing doesn't fit on a single line. > + && TREE_CODE_CLASS (TREE_CODE (cond_expr)) == tcc_comparison Why restrict this to embedded comparisons? It should work for separate comparisons too. > + && loop_vinfo->cond_to_vec_mask) This should always be nonnull given the above. > + { > + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); > + if (masks) This is never null. > + { > + tree loop_mask = vect_get_loop_mask (gsi, masks, > + ncopies, vectype, j); > + > + cond_vmask_key cond (cond_expr, loop_mask); > + tree *slot = loop_vinfo->cond_to_vec_mask->get (cond); > + if (slot && *slot) > + vec_mask = *slot; > + else > + { > + cond.cond_ops.code > + = invert_tree_comparison (cond.cond_ops.code, true); > + slot = loop_vinfo->cond_to_vec_mask->get (cond); > + if (slot && *slot) > + { > + vec_mask = *slot; > + tree tmp = then_clause; > + then_clause = else_clause; > + else_clause = tmp; Can use std::swap. > + } > + } > + } > + } > + > stmt_vec_info new_stmt_info = NULL; > if (j == 0) > { > @@ -10054,6 +10108,8 @@ vectorizable_condition (stmt_vec_info stmt_info, > gimple_stmt_iterator *gsi, > > if (masked) > vec_compare = vec_cond_lhs; > + else if (vec_mask) > + vec_compare = vec_mask; If we do drop the comparison check above, this should win over "masked". > @@ -193,6 +194,81 @@ public: > poly_uint64 min_value; > }; > > +struct cond_vmask_key I'm no good at naming things, but since "vmask" doesn't occur elsewhere in target-independent code, how about "vec_masked_cond_key"? > +{ > + cond_vmask_key (tree t, tree loop_mask_) > + : cond_ops (t), loop_mask (loop_mask_) > + {} > + > + hashval_t hash () const > + { > + inchash::hash h; > + h.add_int (cond_ops.code); > + h.add_int (TREE_HASH (cond_ops.op0)); > + h.add_int (TREE_HASH (cond_ops.op1)); These two need to use inchash::add_expr, since you're hashing for operand_equal_p. > + h.add_int (TREE_HASH (loop_mask)); > + return h.end (); > + } > + > + void mark_empty () > + { > + loop_mask = NULL_TREE; > + } > + > + bool is_empty () > + { > + return loop_mask == NULL_TREE; > + } > + > + tree_cond_ops cond_ops; > + tree loop_mask; > +}; > + > +inline bool operator== (const cond_vmask_key& c1, const cond_vmask_key& c2) > +{ > + return c1.loop_mask == c2.loop_mask > + && c1.cond_ops == c2.cond_ops; Multi-line expressions should be in brackets (or just put this one on a single line). > +} > + > +struct cond_vmask_key_traits Might as well make this: template<> struct default_hash_traits<cond_vmask_key> and then you can drop the third template parameter from hash_map. > +{ > + typedef cond_vmask_key value_type; > + typedef cond_vmask_key compare_type; > + > + static inline hashval_t hash (value_type v) > + { > + return v.hash (); > + } > + > + static inline bool equal (value_type existing, value_type candidate) > + { > + return existing == candidate; > + } > + > + static inline void mark_empty (value_type& v) > + { > + v.mark_empty (); > + } > + > + static inline bool is_empty (value_type v) > + { > + return v.is_empty (); > + } Making hash (), mask_empty () and is_empty () forward to cond_vmask_key functions of the same name seems unnecessary. I think we should just put the implementation here and not define the functions in cond_vmask_key itself. > + > + static void mark_deleted (value_type&) {} > + > + static inline bool is_deleted (value_type) > + { > + return false; > + } > + > + static inline void remove (value_type &) {} > +}; > + > +typedef hash_map<cond_vmask_key, tree, > + simple_hashmap_traits <cond_vmask_key_traits, tree> > > + cond_vmask_map_type; > + > /* Vectorizer state shared between different analyses like vector sizes > of the same CFG region. */ > class vec_info_shared { > @@ -255,6 +331,8 @@ public: > /* Cost data used by the target cost model. */ > void *target_cost_data; > > + cond_vmask_map_type *cond_to_vec_mask; > + > private: > stmt_vec_info new_stmt_vec_info (gimple *stmt); > void set_vinfo_for_stmt (gimple *, stmt_vec_info); > diff --git a/gcc/tree.c b/gcc/tree.c > index 8f80012c6e8..32a8fcf1eb8 100644 > --- a/gcc/tree.c > +++ b/gcc/tree.c > @@ -15204,6 +15204,44 @@ max_object_size (void) > return TYPE_MAX_VALUE (ptrdiff_type_node); > } > > +/* If code(T) is comparison op or def of comparison stmt, > + extract it's operands. > + Else return <NE_EXPR, T, 0>. */ > + > +tree_cond_ops::tree_cond_ops (tree t) > +{ > + if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison) > + { > + this->code = TREE_CODE (t); > + this->op0 = TREE_OPERAND (t, 0); > + this->op1 = TREE_OPERAND (t, 1); > + return; > + } > + > + else if (TREE_CODE (t) == SSA_NAME) Better as just an "if", given the early return above. > + { > + gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)); > + if (stmt) > + { > + tree_code code = gimple_assign_rhs_code (stmt); > + if (TREE_CODE_CLASS (code) == tcc_comparison) > + { > + this->code = code; > + this->op0 = gimple_assign_rhs1 (stmt); > + this->op1 = gimple_assign_rhs2 (stmt); > + return; > + } > + } > + > + this->code = NE_EXPR; > + this->op0 = t; > + this->op1 = build_zero_cst (TREE_TYPE (t)); Think we should use this as the default for non-SSA_NAMEs too, rather than assert below. > + } > + > + else > + gcc_unreachable (); > +} > + > #if CHECKING_P > > namespace selftest { > diff --git a/gcc/tree.h b/gcc/tree.h > index 94dbb95a78a..e6d6e9541c3 100644 > --- a/gcc/tree.h > +++ b/gcc/tree.h > @@ -6141,4 +6141,25 @@ public: > operator location_t () const { return m_combined_loc; } > }; > > +struct tree_cond_ops > +{ > + tree_code code; > + tree op0; > + tree op1; > + > + tree_cond_ops (tree); > +}; > + > +/* ??? Not sure if it's good idea to include fold-const.h > + only for operand_equal_p ? */ Maybe put the new stuff in fold-const.h? > +extern bool operand_equal_p (const_tree, const_tree, unsigned int); > + > +inline bool > +operator== (const tree_cond_ops& o1, const tree_cond_ops &o2) > +{ > + return o1.code == o2.code > + && operand_equal_p (o1.op0, o2.op0, 0) > + && operand_equal_p (o1.op1, o2.op1, 0); Multi-line expression should be enclosed in brackets. Thanks, Richard > +} > + > #endif /* GCC_TREE_H */