On Thu, 11 Jan 2024, Tamar Christina wrote:

> Hi All,
> 
> Instead of searching for where to move stores to, they should always be in
> exit belonging to the latch.  We can only ever delay stores and even if we
> pick a different exit than the latch one as the main one, effects still
> happen in program order when vectorized.  If we don't move the stores to the
> latch exit but instead to whever we pick as the "main" exit then we can
> perform incorrect memory accesses (luckily these are trapped by verify_ssa).
> 
> We used to iterate over the conds and check the loads and stores inside them.
> However this relies on the conds being ordered in program order.  Additionally
> if there is a basic block between two conds we would not have analyzed it.
> 
> Instead this now walks from the preds of the destination basic block up to the
> loop header and analyzes every block along the way.  As a later optimization 
> we
> could stop as soon as we've seen all the BBs we have conds for.  For now the
> header will always contain the first cond, but this can change when we support
> arbitrary control flow.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu
> and no issues normally and with --enable-checking=release --enable-lto
> --with-build-config=bootstrap-O3 --enable-checking=yes,rtl,extra.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/113135
>       * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Rework
>       dependency analysis.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/113135
>       * gcc.dg/vect/vect-early-break_103-pr113135.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..bbad7ee2cb18086e470f4a2a2dc0a2b345bbdd71
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-w" } */
> +
> +char UnpackReadTables_BitLength[20];
> +int UnpackReadTables_ZeroCount;
> +void UnpackReadTables() {
> +  for (unsigned I = 0; I < 20;)
> +    while (UnpackReadTables_ZeroCount-- &&
> +           I < sizeof(UnpackReadTables_BitLength))
> +      UnpackReadTables_BitLength[I++] = 0;
> +}
> diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> index 
> 3d9673fb0b580ff21ff151dc5c199840df41a1cd..6b76eee72cb7d09de5f443589b4fc3a0e8c2584f
>  100644
> --- a/gcc/tree-vect-data-refs.cc
> +++ b/gcc/tree-vect-data-refs.cc
> @@ -671,13 +671,18 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>                    "loop contains multiple exits, analyzing"
>                    " statement dependencies.\n");
>  
> -  for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo))
> -    {
> -      stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c);
> -      if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type)
> -     continue;
> +  /* Since we don't support general control flow, the location we'll move the
> +     side-effects to is always the latch connected exit.  When we support
> +     general control flow we can do better but for now this is fine.  */
> +  dest_bb = single_pred (loop->latch);
> +  auto_vec <edge> workset;
> +  for (auto e: dest_bb->preds)
> +    workset.safe_push (e);

If you handle an arbitrary number of preds here ...

> -      gimple_stmt_iterator gsi = gsi_for_stmt (c);
> +  while (!workset.is_empty ())
> +    {
> +      basic_block bb = workset.pop ()->src;
> +      gimple_stmt_iterator gsi = gsi_last_bb (bb);
>  
>        /* Now analyze all the remaining statements and try to determine which
>        instructions are allowed/needed to be moved.  */
> @@ -705,10 +710,10 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                                "early breaks only supported on statically"
>                                " allocated objects.\n");
> -           return opt_result::failure_at (c,
> +           return opt_result::failure_at (stmt,
>                                "can't safely apply code motion to "
>                                "dependencies of %G to vectorize "
> -                              "the early exit.\n", c);
> +                              "the early exit.\n", stmt);
>           }
>  
>         tree refop = TREE_OPERAND (obj, 0);
> @@ -720,10 +725,10 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>                                "early breaks only supported on"
>                                " statically allocated objects.\n");
> -           return opt_result::failure_at (c,
> +           return opt_result::failure_at (stmt,
>                                "can't safely apply code motion to "
>                                "dependencies of %G to vectorize "
> -                              "the early exit.\n", c);
> +                              "the early exit.\n", stmt);
>           }
>  
>         /* Check if vector accesses to the object will be within bounds.
> @@ -736,10 +741,10 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>                                "early breaks not supported: vectorization "
>                                "would %s beyond size of obj.",
>                                DR_IS_READ (dr_ref) ? "read" : "write");
> -           return opt_result::failure_at (c,
> +           return opt_result::failure_at (stmt,
>                                "can't safely apply code motion to "
>                                "dependencies of %G to vectorize "
> -                              "the early exit.\n", c);
> +                              "the early exit.\n", stmt);
>           }
>  
>         if (DR_IS_READ (dr_ref))
> @@ -802,26 +807,15 @@ vect_analyze_early_break_dependences (loop_vec_info 
> loop_vinfo)
>           }
>       }
>  
> -      /* Save destination as we go, BB are visited in order and the last one
> -     is where statements should be moved to.  */
> -      if (!dest_bb)
> -     dest_bb = gimple_bb (c);
> -      else
> -     {
> -       basic_block curr_bb = gimple_bb (c);
> -       if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb))
> -         dest_bb = curr_bb;
> -     }
> +      if (bb != loop->header)
> +     for (auto e: bb->preds)
> +       workset.safe_push (e);

... or here that wouldn't be correct if we actually had such a case
(we don't), since that no longer guarantees we visit stores before
loads.  Please get rid of the workset and instead do

   bb = single_pred (bb);

to iterate to the next block, stopping at the loop header.

OK with that change.

Thanks,
Richard.

Reply via email to