The LRA rematerialization patch I've submitted about day ago broke H.J.'s 32-bit bootstrap. So I switched off the rematerialization right away. The set for bootstrapping used by H.J. was very useful. I've fixed several existing and potential bugs.

Here the patch fixing the bugs and switching on LRA remat back. The patch was bootstrapped on x86-64 and i686 (using H.J.'s options).

Committed as rev. 217588.

2014-11-14  Vladimir Makarov  <vmaka...@redhat.com>

        * lra-int.h (lra_create_live_ranges): Add parameter.
        * lra-lives.c (temp_bitmap): Move higher.
        (initiate_live_solver): Move temp_bitmap initialization into
        lra_live_ranges_init.
        (finish_live_solver): Move temp_bitmap clearing into
        live_ranges_finish.
        (process_bb_lives): Add parameter.  Use it to control live info
        update and dead insn elimination.  Pass it to mark_regno_live and
        mark_regno_dead.
        (lra_create_live_ranges): Add parameter.  Pass it to
        process_bb_lives.
        (lra_live_ranges_init, lra_live_ranges_finish): See changes in
        initiate_live_solver and finish_live_solver.
        * lra-remat.c (do_remat): Process insn non-operand hard regs too.
        Use temp_bitmap to update avail_cands.
        * lra.c (lra): Pass new parameter to lra_create_live_ranges.  Move
        check with lra_need_for_spill_p after live range pass.  Switch on
        rematerialization pass.
Index: lra-int.h
===================================================================
--- lra-int.h   (revision 217458)
+++ lra-int.h   (working copy)
@@ -353,7 +353,7 @@ extern int *lra_point_freq;
 extern int lra_hard_reg_usage[FIRST_PSEUDO_REGISTER];
 
 extern int lra_live_range_iter;
-extern void lra_create_live_ranges (bool);
+extern void lra_create_live_ranges (bool, bool);
 extern lra_live_range_t lra_copy_live_range_list (lra_live_range_t);
 extern lra_live_range_t lra_merge_live_ranges (lra_live_range_t,
                                               lra_live_range_t);
Index: lra-lives.c
===================================================================
--- lra-lives.c (revision 217458)
+++ lra-lives.c (working copy)
@@ -100,6 +100,9 @@ static sparseset start_living, start_dyi
    insn.  */
 static sparseset unused_set, dead_set;
 
+/* Bitmap used for holding intermediate bitmap operation results.  */
+static bitmap_head temp_bitmap;
+
 /* Pool for pseudo live ranges.         */
 static alloc_pool live_range_pool;
 
@@ -420,9 +423,6 @@ get_bb_data_by_index (int index)
 /* Bitmap with all hard regs.  */
 static bitmap_head all_hard_regs_bitmap;
 
-/* Bitmap used for holding intermediate bitmap operation results.  */
-static bitmap_head temp_bitmap;
-
 /* The transfer function used by the DF equation solver to propagate
    live info through block with BB_INDEX according to the following
    equation:
@@ -476,7 +476,6 @@ static bitmap_head all_blocks;
 static void
 initiate_live_solver (void)
 {
-  bitmap_initialize (&temp_bitmap, &reg_obstack);
   bitmap_initialize (&all_hard_regs_bitmap, &reg_obstack);
   bitmap_set_range (&all_hard_regs_bitmap, 0, FIRST_PSEUDO_REGISTER);
   bb_data = XNEWVEC (struct bb_data, last_basic_block_for_fn (cfun));
@@ -508,7 +507,6 @@ finish_live_solver (void)
     }
   free (bb_data);
   bitmap_clear (&all_hard_regs_bitmap);
-  bitmap_clear (&temp_bitmap);
 }
 
 
@@ -640,10 +638,11 @@ check_pseudos_live_through_calls (int re
    backward scan of BB insns.  CURR_POINT is the program point where
    BB ends.  The function updates this counter and returns in
    CURR_POINT the program point where BB starts.  The function also
-   can delete the dead insns.  It returns true if pseudo live info was
+   does local live info updates and can delete the dead insns if
+   GLOBAL_LIVE_INFO_P.  It returns true if pseudo live info was
    changed at the BB start.  */
 static bool
-process_bb_lives (basic_block bb, int &curr_point)
+process_bb_lives (basic_block bb, int &curr_point, bool global_live_info_p)
 {
   int i, regno, freq;
   unsigned int j;
@@ -663,11 +662,13 @@ process_bb_lives (basic_block bb, int &c
   EXECUTE_IF_SET_IN_BITMAP (reg_live_out, FIRST_PSEUDO_REGISTER, j, bi)
     mark_pseudo_live (j, curr_point);
 
-  bb_gen_pseudos = &get_bb_data (bb)->gen_pseudos;
-  bb_killed_pseudos = &get_bb_data (bb)->killed_pseudos;
-  bitmap_clear (bb_gen_pseudos);
-  bitmap_clear (bb_killed_pseudos);
-
+  if (global_live_info_p)
+    {
+      bb_gen_pseudos = &get_bb_data (bb)->gen_pseudos;
+      bb_killed_pseudos = &get_bb_data (bb)->killed_pseudos;
+      bitmap_clear (bb_gen_pseudos);
+      bitmap_clear (bb_killed_pseudos);
+    }
   freq = REG_FREQ_FROM_BB (bb);
 
   if (lra_dump_file != NULL)
@@ -700,7 +701,7 @@ process_bb_lives (basic_block bb, int &c
 
       set = single_set (curr_insn);
 
-      if (set != NULL_RTX
+      if (global_live_info_p && set != NULL_RTX
          && REG_P (SET_DEST (set)) && REGNO (SET_DEST (set)) >= 
FIRST_PSEUDO_REGISTER
          && find_reg_note (curr_insn, REG_EH_REGION, NULL_RTX) == NULL_RTX
          && ! may_trap_p (PATTERN (curr_insn))
@@ -736,8 +737,8 @@ process_bb_lives (basic_block bb, int &c
                  unsigned int uid;
                  rtx_insn *insn;
 
-                 EXECUTE_IF_SET_IN_BITMAP
-                   (&lra_reg_info[dst_regno].insn_bitmap, 0, uid, bi)
+                 bitmap_copy (&temp_bitmap, 
&lra_reg_info[dst_regno].insn_bitmap);
+                 EXECUTE_IF_SET_IN_BITMAP (&temp_bitmap, 0, uid, bi)
                    {
                      insn = lra_insn_recog_data[uid]->insn;
                      lra_substitute_pseudo_within_insn (insn, dst_regno,
@@ -815,9 +816,9 @@ process_bb_lives (basic_block bb, int &c
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
        if (reg->type != OP_IN)
          {
-           need_curr_point_incr |= mark_regno_live (reg->regno,
-                                                    reg->biggest_mode,
-                                                    curr_point, true);
+           need_curr_point_incr
+             |= mark_regno_live (reg->regno, reg->biggest_mode,
+                                 curr_point, global_live_info_p);
            check_pseudos_live_through_calls (reg->regno);
          }
 
@@ -832,9 +833,9 @@ process_bb_lives (basic_block bb, int &c
       /* See which defined values die here.  */
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
        if (reg->type == OP_OUT && ! reg->early_clobber && ! reg->subreg_p)
-         need_curr_point_incr |= mark_regno_dead (reg->regno,
-                                                  reg->biggest_mode,
-                                                  curr_point, true);
+         need_curr_point_incr
+           |= mark_regno_dead (reg->regno, reg->biggest_mode,
+                               curr_point, global_live_info_p);
 
       for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next)
        if (reg->type == OP_OUT && ! reg->early_clobber && ! reg->subreg_p)
@@ -874,9 +875,9 @@ process_bb_lives (basic_block bb, int &c
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
        if (reg->type == OP_IN)
          {
-           need_curr_point_incr |= mark_regno_live (reg->regno,
-                                                    reg->biggest_mode,
-                                                    curr_point, true);
+           need_curr_point_incr
+             |= mark_regno_live (reg->regno, reg->biggest_mode,
+                                 curr_point, global_live_info_p);
            check_pseudos_live_through_calls (reg->regno);
          }
 
@@ -894,9 +895,9 @@ process_bb_lives (basic_block bb, int &c
       /* Mark early clobber outputs dead.  */
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
        if (reg->type == OP_OUT && reg->early_clobber && ! reg->subreg_p)
-         need_curr_point_incr |= mark_regno_dead (reg->regno,
-                                                  reg->biggest_mode,
-                                                  curr_point, true);
+         need_curr_point_incr
+           |= mark_regno_dead (reg->regno, reg->biggest_mode,
+                               curr_point, global_live_info_p);
 
       for (reg = curr_static_id->hard_regs; reg != NULL; reg = reg->next)
        if (reg->type == OP_OUT && reg->early_clobber && ! reg->subreg_p)
@@ -969,19 +970,25 @@ process_bb_lives (basic_block bb, int &c
            make_hard_regno_born (px);
     }
 
-  /* Check if bb border live info was changed.  */
-  unsigned int live_pseudos_num = 0;
   bool live_change_p = false;
-  EXECUTE_IF_SET_IN_BITMAP (df_get_live_in (bb), FIRST_PSEUDO_REGISTER, j, bi)
+  if (global_live_info_p)
     {
-      live_pseudos_num++;
-      if (! sparseset_bit_p (pseudos_live, j))
+      /* Check if bb border live info was changed.  */
+      unsigned int live_pseudos_num = 0;
+      EXECUTE_IF_SET_IN_BITMAP (df_get_live_in (bb),
+                               FIRST_PSEUDO_REGISTER, j, bi)
        {
-         live_change_p = TRUE;
-         break;
+         live_pseudos_num++;
+         if (! sparseset_bit_p (pseudos_live, j))
+           {
+             live_change_p = TRUE;
+             break;
+           }
        }
+      live_change_p
+       = (live_change_p
+          || sparseset_cardinality (pseudos_live) != live_pseudos_num);
     }
-  live_change_p = live_change_p || sparseset_cardinality (pseudos_live) != 
live_pseudos_num;
 
   /* See if we'll need an increment at the end of this basic block.
      An increment is needed if the PSEUDOS_LIVE set is not empty,
@@ -1175,10 +1182,11 @@ int lra_live_range_iter;
 
 /* The main entry function creates live ranges only for memory pseudos
    (or for all ones if ALL_P), set up CONFLICT_HARD_REGS for the
-   pseudos.  It also does global live analysis only for pseudos and
-   only if the pseudo live info was changed on a BB border.  */
+   pseudos.  It also does dead insn elimination and global live
+   analysis only for pseudos and only if GLOBAL_LIVE_INFO_P and the
+   pseudo live info was changed on a BB border.  */
 void
-lra_create_live_ranges (bool all_p)
+lra_create_live_ranges (bool all_p, bool global_live_info_p)
 {
   basic_block bb;
   int i, hard_regno, max_regno = max_reg_num ();
@@ -1254,7 +1262,7 @@ lra_create_live_ranges (bool all_p)
       if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb
          == ENTRY_BLOCK_PTR_FOR_FN (cfun))
        continue;
-      if (process_bb_lives (bb, curr_point))
+      if (process_bb_lives (bb, curr_point, global_live_info_p))
        bb_live_change_p = true;
     }
   if (bb_live_change_p)
@@ -1328,6 +1336,7 @@ lra_live_ranges_init (void)
 {
   live_range_pool = create_alloc_pool ("live ranges",
                                       sizeof (struct lra_live_range), 100);
+  bitmap_initialize (&temp_bitmap, &reg_obstack);
   initiate_live_solver ();
 }
 
@@ -1336,5 +1345,6 @@ void
 lra_live_ranges_finish (void)
 {
   finish_live_solver ();
+  bitmap_clear (&temp_bitmap);
   free_alloc_pool (live_range_pool);
 }
Index: lra-remat.c
===================================================================
--- lra-remat.c (revision 217458)
+++ lra-remat.c (working copy)
@@ -1026,6 +1026,7 @@ do_remat (void)
            continue;
 
          lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
+         struct lra_static_insn_data *static_id = id->insn_static_data;
          struct lra_insn_reg *reg;
          cand_t cand;
          unsigned int cid;
@@ -1059,7 +1060,10 @@ do_remat (void)
          HOST_WIDE_INT cand_sp_offset = 0;
          if (cand != NULL)
            {
-             lra_insn_recog_data_t cand_id = lra_get_insn_recog_data 
(cand->insn);
+             lra_insn_recog_data_t cand_id
+               = lra_get_insn_recog_data (cand->insn);
+             struct lra_static_insn_data *static_cand_id
+               = cand_id->insn_static_data;
              rtx saved_op = *cand_id->operand_loc[cand->nop];
 
              /* Check clobbers do not kill something living.  */
@@ -1080,6 +1084,16 @@ do_remat (void)
 
              if (reg == NULL)
                {
+                 for (reg = static_cand_id->hard_regs;
+                      reg != NULL;
+                      reg = reg->next)
+                   if (reg->type != OP_IN
+                       && TEST_HARD_REG_BIT (live_hard_regs, reg->regno))
+                     break;
+               }
+
+             if (reg == NULL)
+               {
                  *cand_id->operand_loc[cand->nop] = SET_DEST (set);
                  lra_update_insn_regno_info (cand->insn);
                  bool ok_p = lra_constrain_insn (cand->insn);
@@ -1100,6 +1114,7 @@ do_remat (void)
                }
            }
 
+         bitmap_clear (&temp_bitmap);
          /* Update avail_cands (see analogous code for
             calculate_gen_cands).  */
          for (reg = id->regs; reg != NULL; reg = reg->next)
@@ -1115,7 +1130,7 @@ do_remat (void)
                    continue;
                  if (cand->regno == reg->regno
                      || input_regno_present_p (cand->insn, reg->regno))
-                   bitmap_clear_bit (&avail_cands, cand->index);
+                   bitmap_set_bit (&temp_bitmap, cand->index);
                }
 
          if (CALL_P (insn))
@@ -1124,9 +1139,10 @@ do_remat (void)
                cand = all_cands[cid];
                
                if (call_used_input_regno_present_p (cand->insn))
-                 bitmap_clear_bit (&avail_cands, cand->index);
+                 bitmap_set_bit (&temp_bitmap, cand->index);
              }
 
+         bitmap_and_compl_into (&avail_cands, &temp_bitmap);
          if ((cand = insn_to_cand[INSN_UID (insn)]) != NULL)
            bitmap_set_bit (&avail_cands, cand->index);
            
@@ -1160,6 +1176,15 @@ do_remat (void)
                for (i = 0; i < nregs; i++)
                  SET_HARD_REG_BIT (live_hard_regs, hard_regno + i);
              }
+         /* Process also hard regs (e.g. CC register) which are part
+            of insn definition.  */
+         for (reg = static_id->hard_regs; reg != NULL; reg = reg->next)
+           if (reg->type == OP_IN
+               && find_regno_note (insn, REG_DEAD, reg->regno) != NULL)
+             CLEAR_HARD_REG_BIT (live_hard_regs, reg->regno);
+           else if (reg->type != OP_IN
+                    && find_regno_note (insn, REG_UNUSED, reg->regno) == NULL)
+             SET_HARD_REG_BIT (live_hard_regs, reg->regno);
        }
     }
   bitmap_clear (&avail_cands);
Index: lra.c
===================================================================
--- lra.c       (revision 217459)
+++ lra.c       (working copy)
@@ -2296,14 +2296,17 @@ lra (FILE *f)
                  /* As a side-effect of lra_create_live_ranges, we calculate
                     actual_call_used_reg_set,  which is needed during
                     lra_inheritance.  */
-                 lra_create_live_ranges (true);
+                 lra_create_live_ranges (true, true);
                }
              lra_inheritance ();
            }
          if (live_p)
            lra_clear_live_ranges ();
-         /* We need live ranges for lra_assign -- so build them.  */
-         lra_create_live_ranges (true);
+         /* We need live ranges for lra_assign -- so build them.  But
+            don't remove dead insns or change global live info as we
+            can undo inheritance transformations after inheritance
+            pseudo assigning.  */
+         lra_create_live_ranges (true, false);
          live_p = true;
          /* If we don't spill non-reload and non-inheritance pseudos,
             there is no sense to run memory-memory move coalescing.
@@ -2322,7 +2325,7 @@ lra (FILE *f)
                {
                  if (! live_p)
                    {
-                     lra_create_live_ranges (true);
+                     lra_create_live_ranges (true, true);
                      live_p = true;
                    }
                  if (lra_coalesce ())
@@ -2338,21 +2341,23 @@ lra (FILE *f)
       bitmap_clear (&lra_subreg_reload_pseudos);
       bitmap_clear (&lra_inheritance_pseudos);
       bitmap_clear (&lra_split_regs);
-      if (! lra_need_for_spills_p ())
-       break;
       if (! live_p)
        {
          /* We need full live info for spilling pseudos into
             registers instead of memory.  */
-         lra_create_live_ranges (lra_reg_spill_p);
+         lra_create_live_ranges (lra_reg_spill_p, true);
          live_p = true;
        }
+      /* We should check necessity for spilling here as the above live
+        range pass can remove spilled pseudos.  */
+      if (! lra_need_for_spills_p ())
+       break;
       /* Now we know what pseudos should be spilled.  Try to
         rematerialize them first.  */
-      if (0 && lra_remat ())
+      if (lra_remat ())
        {
          /* We need full live info -- see the comment above.  */
-         lra_create_live_ranges (lra_reg_spill_p);
+         lra_create_live_ranges (lra_reg_spill_p, true);
          live_p = true;
          if (! lra_need_for_spills_p ())
            break;

Reply via email to