The compile time issue was discovered in SPEC 2017 wrf:

Use time and -ftime-report to analyze the profile data of SPEC 2017 wrf 
compilation .

Before this patch (Lazy vsetvl):

scheduling                         : 121.89 ( 15%)   0.53 ( 11%) 122.72 ( 15%)  
  13M (  1%)
machine dep reorg                  : 424.61 ( 53%)   1.84 ( 37%) 427.44 ( 53%)  
5290k (  0%)
real    13m27.074s
user    13m19.539s
sys     0m5.180s

Simple vsetvl:

machine dep reorg                  :   0.10 (  0%)   0.00 (  0%)   0.11 (  0%)  
4138k (  0%)
real    6m5.780s
user    6m2.396s
sys     0m2.373s

The machine dep reorg is the compile time of VSETVL PASS (424 seconds) which 
counts 53% of
the compilation time, spends much more time than scheduling.

After investigation, the critical patch of VSETVL pass is 
compute_lcm_local_properties which
is called every iteration of phase 2 (earliest fusion) and phase 3 (global lcm).

This patch optimized the codes of compute_lcm_local_properties to reduce the 
compilation time.

After this patch:

scheduling                         : 117.51 ( 27%)   0.21 (  6%) 118.04 ( 27%)  
  13M (  1%)
machine dep reorg                  :  80.13 ( 18%)   0.91 ( 26%)  81.26 ( 18%)  
5290k (  0%)
real    7m25.374s
user    7m20.116s
sys     0m3.795s

The optimization of this patch is very obvious, lazy VSETVL PASS: 424s (53%) -> 
80s (18%) which
spend less time than scheduling.

Tested on both RV32 and RV64 no regression.  Ok for trunk ?
 
        PR target/113495

gcc/ChangeLog:

        * config/riscv/riscv-vsetvl.cc (extract_single_source): Remove.
        (pre_vsetvl::compute_vsetvl_def_data): Fix compile time issue.
        (pre_vsetvl::compute_transparent): New function.
        (pre_vsetvl::compute_lcm_local_properties): Fix compile time time issue.

---
 gcc/config/riscv/riscv-vsetvl.cc | 184 ++++++++++---------------------
 1 file changed, 60 insertions(+), 124 deletions(-)

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index d7b40a5c813..cec862329c5 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -599,14 +599,6 @@ extract_single_source (set_info *set)
   return first_insn;
 }
 
-static insn_info *
-extract_single_source (def_info *def)
-{
-  if (!def)
-    return nullptr;
-  return extract_single_source (dyn_cast<set_info *> (def));
-}
-
 static bool
 same_equiv_note_p (set_info *set1, set_info *set2)
 {
@@ -2374,6 +2366,7 @@ public:
   }
 
   void compute_vsetvl_def_data ();
+  void compute_transparent (const bb_info *);
   void compute_lcm_local_properties ();
 
   void fuse_local_vsetvl_info ();
@@ -2452,20 +2445,16 @@ pre_vsetvl::compute_vsetvl_def_data ()
        {
          for (unsigned i = 0; i < m_vsetvl_def_exprs.length (); i += 1)
            {
-             const vsetvl_info &info = *m_vsetvl_def_exprs[i];
-             if (!info.has_nonvlmax_reg_avl ())
-               continue;
-             unsigned int regno;
-             sbitmap_iterator sbi;
-             EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0, regno,
-                                       sbi)
-               if (regno == REGNO (info.get_avl ()))
-                 {
-                   bitmap_set_bit (m_kill[bb->index ()], i);
-                   bitmap_set_bit (def_loc[bb->index ()],
-                                   get_expr_index (m_vsetvl_def_exprs,
-                                                   m_unknow_info));
-                 }
+             auto *info = m_vsetvl_def_exprs[i];
+             if (info->has_nonvlmax_reg_avl ()
+                 && bitmap_bit_p (m_reg_def_loc[bb->index ()],
+                                  REGNO (info->get_avl ())))
+               {
+                 bitmap_set_bit (m_kill[bb->index ()], i);
+                 bitmap_set_bit (def_loc[bb->index ()],
+                                 get_expr_index (m_vsetvl_def_exprs,
+                                                 m_unknow_info));
+               }
            }
          continue;
        }
@@ -2516,6 +2505,36 @@ pre_vsetvl::compute_vsetvl_def_data ()
   sbitmap_vector_free (m_kill);
 }
 
+/* Subroutine of compute_lcm_local_properties which Compute local transparent
+   BB. Note that the compile time is very sensitive to compute_transparent and
+   compute_lcm_local_properties, any change of these 2 functions should be
+   aware of the compile time changing of the program which has a large number 
of
+   blocks, e.g SPEC 2017 wrf.
+
+   Current compile time profile of SPEC 2017 wrf:
+
+     1. scheduling - 27%
+     2. machine dep reorg (VSETVL PASS) - 18%
+
+   VSETVL pass should not spend more time than scheduling in compilation.  */
+void
+pre_vsetvl::compute_transparent (const bb_info *bb)
+{
+  int num_exprs = m_exprs.length ();
+  unsigned bb_index = bb->index ();
+  for (int i = 0; i < num_exprs; i++)
+    {
+      auto *info = m_exprs[i];
+      if (info->has_nonvlmax_reg_avl ()
+         && bitmap_bit_p (m_reg_def_loc[bb_index], REGNO (info->get_avl ())))
+       bitmap_clear_bit (m_transp[bb_index], i);
+      else if (info->has_vl ()
+              && bitmap_bit_p (m_reg_def_loc[bb_index],
+                               REGNO (info->get_vl ())))
+       bitmap_clear_bit (m_transp[bb_index], i);
+    }
+}
+
 /* Compute the local properties of each recorded expression.
 
    Local properties are those that are defined by the block, irrespective of
@@ -2572,7 +2591,7 @@ pre_vsetvl::compute_lcm_local_properties ()
 
   bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
   bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
-  bitmap_vector_clear (m_transp, last_basic_block_for_fn (cfun));
+  bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
 
   /* -  If T is locally available at the end of a block, then T' must be
        available at the end of the same block. Since some optimization has
@@ -2598,117 +2617,34 @@ pre_vsetvl::compute_lcm_local_properties ()
 
       /* Compute m_transp */
       if (block_info.empty_p ())
+       compute_transparent (bb);
+      else
        {
-         bitmap_ones (m_transp[bb_index]);
-         for (int i = 0; i < num_exprs; i += 1)
-           {
-             const vsetvl_info &info = *m_exprs[i];
-             if (!info.has_nonvlmax_reg_avl () && !info.has_vl ())
-               continue;
-
-             if (info.has_nonvlmax_reg_avl ())
-               {
-                 unsigned int regno;
-                 sbitmap_iterator sbi;
-                 EXECUTE_IF_SET_IN_BITMAP (m_reg_def_loc[bb->index ()], 0,
-                                           regno, sbi)
-                   {
-                     if (regno == REGNO (info.get_avl ()))
-                       bitmap_clear_bit (m_transp[bb->index ()], i);
-                   }
-               }
-
-             for (insn_info *insn : bb->real_nondebug_insns ())
-               {
-                 if (info.has_nonvlmax_reg_avl ()
-                     && find_access (insn->defs (), REGNO (info.get_avl ())))
-                   {
-                     bitmap_clear_bit (m_transp[bb_index], i);
-                     break;
-                   }
-
-                 if (info.has_vl ()
-                     && reg_mentioned_p (info.get_vl (), insn->rtl ()))
-                   {
-                     if (find_access (insn->defs (), REGNO (info.get_vl ())))
-                       /* We can't fuse vsetvl into the blocks that modify the
-                          VL operand since successors of such blocks will need
-                          the value of those blocks are defining.
-
-                                         bb 4: def a5
-                                         /   \
-                                 bb 5:use a5  bb 6:vsetvl a5, 5
-
-                          The example above shows that we can't fuse vsetvl
-                          from bb 6 into bb 4 since the successor bb 5 is using
-                          the value defined in bb 4.  */
-                       ;
-                     else
-                       {
-                         /* We can't fuse vsetvl into the blocks that use the
-                            VL operand which has different value from the
-                            vsetvl info.
-
-                                           bb 4: def a5
-                                             |
-                                           bb 5: use a5
-                                             |
-                                           bb 6: def a5
-                                             |
-                                           bb 7: use a5
-
-                            The example above shows that we can't fuse vsetvl
-                            from bb 6 into bb 5 since their value is different.
-                          */
-                         resource_info resource
-                           = full_register (REGNO (info.get_vl ()));
-                         def_lookup dl = crtl->ssa->find_def (resource, insn);
-                         def_info *def
-                           = dl.matching_set_or_last_def_of_prev_group ();
-                         insn_info *def_insn = extract_single_source (def);
-                         if (def_insn && vsetvl_insn_p (def_insn->rtl ()))
-                           {
-                             vsetvl_info def_info = vsetvl_info (def_insn);
-                             if (m_dem.compatible_p (def_info, info))
-                               continue;
-                           }
-                       }
+         bitmap_clear (m_transp[bb_index]);
+         vsetvl_info &header_info = block_info.get_entry_info ();
+         vsetvl_info &footer_info = block_info.get_exit_info ();
 
-                     bitmap_clear_bit (m_transp[bb_index], i);
-                     break;
-                   }
-               }
-           }
+         if (header_info.valid_p () && anticipated_exp_p (header_info))
+           bitmap_set_bit (m_antloc[bb_index],
+                           get_expr_index (m_exprs, header_info));
 
-         continue;
+         if (footer_info.valid_p ())
+           for (int i = 0; i < num_exprs; i += 1)
+             {
+               const vsetvl_info &info = *m_exprs[i];
+               if (!info.valid_p ())
+                 continue;
+               if (available_exp_p (footer_info, info))
+                 bitmap_set_bit (m_avloc[bb_index], i);
+             }
        }
 
-      vsetvl_info &header_info = block_info.get_entry_info ();
-      vsetvl_info &footer_info = block_info.get_exit_info ();
-
-      if (header_info.valid_p () && anticipated_exp_p (header_info))
-       bitmap_set_bit (m_antloc[bb_index],
-                       get_expr_index (m_exprs, header_info));
-
-      if (footer_info.valid_p ())
-       for (int i = 0; i < num_exprs; i += 1)
-         {
-           const vsetvl_info &info = *m_exprs[i];
-           if (!info.valid_p ())
-             continue;
-           if (available_exp_p (footer_info, info))
-             bitmap_set_bit (m_avloc[bb_index], i);
-         }
-    }
-
-  for (const bb_info *bb : crtl->ssa->bbs ())
-    {
-      unsigned bb_index = bb->index ();
       if (invalid_opt_bb_p (bb->cfg_bb ()))
        {
          bitmap_clear (m_antloc[bb_index]);
          bitmap_clear (m_transp[bb_index]);
        }
+
       /* Compute ae_kill for each basic block using:
 
         ~(TRANSP | COMP)
-- 
2.36.1

Reply via email to