This fixes mismatched cost compute in peeling cost estimation by using the same routine also for no peeling costing (the original implementation failed to handle the same special cases such as groups and thus always over-estimated no peeling cost...).
It doesn't disable peeling for bwaves for me because we still improve (as expected) cost by 1 (but outside cost increases significantly). Factoring in outside cost looks difficult so maybe instead of a hard compare (13 vs. 14 in this case) we should require a percentage improvement. Anyway, this fixes a bug. Bootstrap and regtest running on x86_64-unknown-linux-gnu. Richard. 2017-07-21 Richard Biener <rguent...@suse.de> PR tree-optimization/81303 * tree-vect-data-refs.c (vect_get_peeling_costs_all_drs): Pass in datarefs vector. Allow NULL dr0 for no peeling cost estimate. (vect_peeling_hash_get_lowest_cost): Adjust. (vect_enhance_data_refs_alignment): Likewise. Use vect_get_peeling_costs_all_drs to compute the penalty for no peeling to match up costs. Index: gcc/tree-vect-data-refs.c =================================================================== --- gcc/tree-vect-data-refs.c (revision 250386) +++ gcc/tree-vect-data-refs.c (working copy) @@ -1159,25 +1159,21 @@ vect_peeling_hash_get_most_frequent (_ve misalignment will be zero after peeling. */ static void -vect_get_peeling_costs_all_drs (struct data_reference *dr0, +vect_get_peeling_costs_all_drs (vec<data_reference_p> datarefs, + struct data_reference *dr0, unsigned int *inside_cost, unsigned int *outside_cost, stmt_vector_for_cost *body_cost_vec, unsigned int npeel, bool unknown_misalignment) { - gimple *stmt = DR_STMT (dr0); - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); - vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); - unsigned i; data_reference *dr; FOR_EACH_VEC_ELT (datarefs, i, dr) { - stmt = DR_STMT (dr); - stmt_info = vinfo_for_stmt (stmt); + gimple *stmt = DR_STMT (dr); + stmt_vec_info stmt_info = vinfo_for_stmt (stmt); /* For interleaving, only the alignment of the first access matters. */ if (STMT_VINFO_GROUPED_ACCESS (stmt_info) @@ -1192,7 +1188,9 @@ vect_get_peeling_costs_all_drs (struct d int save_misalignment; save_misalignment = DR_MISALIGNMENT (dr); - if (unknown_misalignment && dr == dr0) + if (npeel == 0) + ; + else if (unknown_misalignment && dr == dr0) SET_DR_MISALIGNMENT (dr, 0); else vect_update_misalignment_for_peel (dr, dr0, npeel); @@ -1222,7 +1220,8 @@ vect_peeling_hash_get_lowest_cost (_vect body_cost_vec.create (2); epilogue_cost_vec.create (2); - vect_get_peeling_costs_all_drs (elem->dr, &inside_cost, &outside_cost, + vect_get_peeling_costs_all_drs (LOOP_VINFO_DATAREFS (loop_vinfo), + elem->dr, &inside_cost, &outside_cost, &body_cost_vec, elem->npeel, false); body_cost_vec.release (); @@ -1651,7 +1650,7 @@ vect_enhance_data_refs_alignment (loop_v stmt_vector_for_cost dummy; dummy.create (2); - vect_get_peeling_costs_all_drs (dr0, + vect_get_peeling_costs_all_drs (datarefs, dr0, &load_inside_cost, &load_outside_cost, &dummy, vf / 2, true); @@ -1660,7 +1659,7 @@ vect_enhance_data_refs_alignment (loop_v if (first_store) { dummy.create (2); - vect_get_peeling_costs_all_drs (first_store, + vect_get_peeling_costs_all_drs (datarefs, first_store, &store_inside_cost, &store_outside_cost, &dummy, vf / 2, true); @@ -1744,18 +1743,15 @@ vect_enhance_data_refs_alignment (loop_v dr0 = unsupportable_dr; else if (do_peeling) { - /* Calculate the penalty for no peeling, i.e. leaving everything - unaligned. - TODO: Adapt vect_get_peeling_costs_all_drs and use here. + /* Calculate the penalty for no peeling, i.e. leaving everything as-is. TODO: Use nopeel_outside_cost or get rid of it? */ unsigned nopeel_inside_cost = 0; unsigned nopeel_outside_cost = 0; stmt_vector_for_cost dummy; dummy.create (2); - FOR_EACH_VEC_ELT (datarefs, i, dr) - vect_get_data_access_cost (dr, &nopeel_inside_cost, - &nopeel_outside_cost, &dummy); + vect_get_peeling_costs_all_drs (datarefs, NULL, &nopeel_inside_cost, + &nopeel_outside_cost, &dummy, 0, false); dummy.release (); /* Add epilogue costs. As we do not peel for alignment here, no prologue