This patch changes the previous way of creating a copy between allocnos to 
objects.

gcc/ChangeLog:

        * ira-build.cc (find_allocno_copy): Removed.
        (find_object): New.
        (ira_create_copy): Adjust.
        (add_allocno_copy_to_list): Adjust.
        (swap_allocno_copy_ends_if_necessary): Adjust.
        (ira_add_allocno_copy): Adjust.
        (print_copy): Adjust.
        (print_allocno_copies): Adjust.
        (ira_flattening): Adjust.
        * ira-color.cc (INCLUDE_VECTOR): Include vector.
        (struct allocno_color_data): Adjust.
        (struct allocno_hard_regs_subnode): Adjust.
        (form_allocno_hard_regs_nodes_forest): Adjust.
        (update_left_conflict_sizes_p): Adjust.
        (struct update_cost_queue_elem): Adjust.
        (queue_update_cost): Adjust.
        (get_next_update_cost): Adjust.
        (update_costs_from_allocno): Adjust.
        (update_conflict_hard_regno_costs): Adjust.
        (assign_hard_reg): Adjust.
        (objects_conflict_by_live_ranges_p): New.
        (allocno_thread_conflict_p): Adjust.
        (object_thread_conflict_p): Ditto.
        (merge_threads): Ditto.
        (form_threads_from_copies): Ditto.
        (form_threads_from_bucket): Ditto.
        (form_threads_from_colorable_allocno): Ditto.
        (init_allocno_threads): Ditto.
        (add_allocno_to_bucket): Ditto.
        (delete_allocno_from_bucket): Ditto.
        (allocno_copy_cost_saving): Ditto.
        (color_allocnos): Ditto.
        (color_pass): Ditto.
        (update_curr_costs): Ditto.
        (coalesce_allocnos): Ditto.
        (ira_reuse_stack_slot): Ditto.
        (ira_initiate_assign): Ditto.
        (ira_finish_assign): Ditto.
        * ira-conflicts.cc (allocnos_conflict_for_copy_p): Ditto.
        (REG_SUBREG_P): Ditto.
        (subreg_move_p): New.
        (regs_non_conflict_for_copy_p): New.
        (subreg_reg_align_and_times_p): New.
        (process_regs_for_copy): Ditto.
        (add_insn_allocno_copies): Ditto.
        (propagate_copies): Ditto.
        * ira-emit.cc (add_range_and_copies_from_move_list): Ditto.
        * ira-int.h (struct ira_allocno_copy): Ditto.
        (ira_add_allocno_copy): Ditto.
        (find_object): Exported.
        (subreg_move_p): Exported.
        * ira.cc (print_redundant_copies): Exported.

---
 gcc/ira-build.cc     | 154 +++++++-----
 gcc/ira-color.cc     | 541 +++++++++++++++++++++++++++++++------------
 gcc/ira-conflicts.cc | 173 +++++++++++---
 gcc/ira-emit.cc      |  10 +-
 gcc/ira-int.h        |  10 +-
 gcc/ira.cc           |   5 +-
 6 files changed, 646 insertions(+), 247 deletions(-)

diff --git a/gcc/ira-build.cc b/gcc/ira-build.cc
index a32693e69e4..13f0f7336ed 100644
--- a/gcc/ira-build.cc
+++ b/gcc/ira-build.cc
@@ -36,9 +36,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "subreg-live-range.h"
 
-static ira_copy_t find_allocno_copy (ira_allocno_t, ira_allocno_t, rtx_insn *,
-                                    ira_loop_tree_node_t);
-
 /* The root of the loop tree corresponding to the all function.  */
 ira_loop_tree_node_t ira_loop_tree_root;
 
@@ -520,6 +517,16 @@ find_object (ira_allocno_t a, poly_int64 offset, 
poly_int64 size)
   return find_object (a, subreg_start, subreg_nregs);
 }
 
+/* Return object in allocno A for REG.  */
+ira_object_t
+find_object (ira_allocno_t a, rtx reg)
+{
+  if (has_subreg_object_p (a) && read_modify_subreg_p (reg))
+    return find_object (a, SUBREG_BYTE (reg), GET_MODE_SIZE (GET_MODE (reg)));
+  else
+    return find_object (a, 0, ALLOCNO_NREGS (a));
+}
+
 /* Return the object in allocno A which match START & NREGS.  Create when not
    found.  */
 ira_object_t
@@ -1503,27 +1510,36 @@ initiate_copies (void)
 /* Return copy connecting A1 and A2 and originated from INSN of
    LOOP_TREE_NODE if any.  */
 static ira_copy_t
-find_allocno_copy (ira_allocno_t a1, ira_allocno_t a2, rtx_insn *insn,
+find_allocno_copy (ira_object_t obj1, ira_object_t obj2, rtx_insn *insn,
                   ira_loop_tree_node_t loop_tree_node)
 {
   ira_copy_t cp, next_cp;
-  ira_allocno_t another_a;
+  ira_object_t another_obj;
 
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
   for (cp = ALLOCNO_COPIES (a1); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a1)
+      ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+      if (first_a == a1)
        {
          next_cp = cp->next_first_allocno_copy;
-         another_a = cp->second;
+         if (cp->first == obj1)
+           another_obj = cp->second;
+         else
+           continue;
        }
-      else if (cp->second == a1)
+      else if (second_a == a1)
        {
          next_cp = cp->next_second_allocno_copy;
-         another_a = cp->first;
+         if (cp->second == obj1)
+           another_obj = cp->first;
+         else
+           continue;
        }
       else
        gcc_unreachable ();
-      if (another_a == a2 && cp->insn == insn
+      if (another_obj == obj2 && cp->insn == insn
          && cp->loop_tree_node == loop_tree_node)
        return cp;
     }
@@ -1533,7 +1549,7 @@ find_allocno_copy (ira_allocno_t a1, ira_allocno_t a2, 
rtx_insn *insn,
 /* Create and return copy with given attributes LOOP_TREE_NODE, FIRST,
    SECOND, FREQ, CONSTRAINT_P, and INSN.  */
 ira_copy_t
-ira_create_copy (ira_allocno_t first, ira_allocno_t second, int freq,
+ira_create_copy (ira_object_t first, ira_object_t second, int freq,
                 bool constraint_p, rtx_insn *insn,
                 ira_loop_tree_node_t loop_tree_node)
 {
@@ -1557,28 +1573,29 @@ ira_create_copy (ira_allocno_t first, ira_allocno_t 
second, int freq,
 static void
 add_allocno_copy_to_list (ira_copy_t cp)
 {
-  ira_allocno_t first = cp->first, second = cp->second;
+  ira_object_t first = cp->first, second = cp->second;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (first), a2 = OBJECT_ALLOCNO (second);
 
   cp->prev_first_allocno_copy = NULL;
   cp->prev_second_allocno_copy = NULL;
-  cp->next_first_allocno_copy = ALLOCNO_COPIES (first);
+  cp->next_first_allocno_copy = ALLOCNO_COPIES (a1);
   if (cp->next_first_allocno_copy != NULL)
     {
-      if (cp->next_first_allocno_copy->first == first)
+      if (OBJECT_ALLOCNO (cp->next_first_allocno_copy->first) == a1)
        cp->next_first_allocno_copy->prev_first_allocno_copy = cp;
       else
        cp->next_first_allocno_copy->prev_second_allocno_copy = cp;
     }
-  cp->next_second_allocno_copy = ALLOCNO_COPIES (second);
+  cp->next_second_allocno_copy = ALLOCNO_COPIES (a2);
   if (cp->next_second_allocno_copy != NULL)
     {
-      if (cp->next_second_allocno_copy->second == second)
+      if (OBJECT_ALLOCNO (cp->next_second_allocno_copy->second) == a2)
        cp->next_second_allocno_copy->prev_second_allocno_copy = cp;
       else
        cp->next_second_allocno_copy->prev_first_allocno_copy = cp;
     }
-  ALLOCNO_COPIES (first) = cp;
-  ALLOCNO_COPIES (second) = cp;
+  ALLOCNO_COPIES (a1) = cp;
+  ALLOCNO_COPIES (a2) = cp;
 }
 
 /* Make a copy CP a canonical copy where number of the
@@ -1586,7 +1603,8 @@ add_allocno_copy_to_list (ira_copy_t cp)
 static void
 swap_allocno_copy_ends_if_necessary (ira_copy_t cp)
 {
-  if (ALLOCNO_NUM (cp->first) <= ALLOCNO_NUM (cp->second))
+  if (ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first))
+      <= ALLOCNO_NUM (OBJECT_ALLOCNO (cp->second)))
     return;
 
   std::swap (cp->first, cp->second);
@@ -1595,11 +1613,10 @@ swap_allocno_copy_ends_if_necessary (ira_copy_t cp)
 }
 
 /* Create (or update frequency if the copy already exists) and return
-   the copy of allocnos FIRST and SECOND with frequency FREQ
-   corresponding to move insn INSN (if any) and originated from
-   LOOP_TREE_NODE.  */
+   the copy of objects FIRST and SECOND with frequency FREQ corresponding to
+   move insn INSN (if any) and originated from LOOP_TREE_NODE.  */
 ira_copy_t
-ira_add_allocno_copy (ira_allocno_t first, ira_allocno_t second, int freq,
+ira_add_allocno_copy (ira_object_t first, ira_object_t second, int freq,
                      bool constraint_p, rtx_insn *insn,
                      ira_loop_tree_node_t loop_tree_node)
 {
@@ -1618,15 +1635,38 @@ ira_add_allocno_copy (ira_allocno_t first, 
ira_allocno_t second, int freq,
   return cp;
 }
 
+/* Create (or update frequency if the copy already exists) and return
+   the copy of allocnos FIRST and SECOND with frequency FREQ
+   corresponding to move insn INSN (if any) and originated from
+   LOOP_TREE_NODE.  */
+ira_copy_t
+ira_add_allocno_copy (ira_allocno_t first, ira_allocno_t second, int freq,
+                     bool constraint_p, rtx_insn *insn,
+                     ira_loop_tree_node_t loop_tree_node)
+{
+  ira_object_t obj1 = get_full_object (first);
+  ira_object_t obj2 = get_full_object (second);
+  gcc_assert (obj1 != NULL && obj2 != NULL);
+  return ira_add_allocno_copy (obj1, obj2, freq, constraint_p, insn,
+                              loop_tree_node);
+}
+
 /* Print info about copy CP into file F.  */
 static void
 print_copy (FILE *f, ira_copy_t cp)
 {
-  fprintf (f, "  cp%d:a%d(r%d)<->a%d(r%d)@%d:%s\n", cp->num,
-          ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first),
-          ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second), cp->freq,
-          cp->insn != NULL
-          ? "move" : cp->constraint_p ? "constraint" : "shuffle");
+  ira_allocno_t a1 = OBJECT_ALLOCNO (cp->first);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (cp->second);
+  fprintf (f, "  cp%d:a%d(r%d", cp->num, ALLOCNO_NUM (a1), ALLOCNO_REGNO (a1));
+  if (ALLOCNO_NREGS (a1) != OBJECT_NREGS (cp->first))
+    fprintf (f, "_obj%d", OBJECT_INDEX (cp->first));
+  fprintf (f, ")<->a%d(r%d", ALLOCNO_NUM (a2), ALLOCNO_REGNO (a2));
+  if (ALLOCNO_NREGS (a2) != OBJECT_NREGS (cp->second))
+    fprintf (f, "_obj%d", OBJECT_INDEX (cp->second));
+  fprintf (f, ")@%d:%s\n", cp->freq,
+          cp->insn != NULL   ? "move"
+          : cp->constraint_p ? "constraint"
+                             : "shuffle");
 }
 
 DEBUG_FUNCTION void
@@ -1673,24 +1713,25 @@ ira_debug_copies (void)
 static void
 print_allocno_copies (FILE *f, ira_allocno_t a)
 {
-  ira_allocno_t another_a;
+  ira_object_t another_obj;
   ira_copy_t cp, next_cp;
 
   fprintf (f, " a%d(r%d):", ALLOCNO_NUM (a), ALLOCNO_REGNO (a));
   for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a)
+      if (OBJECT_ALLOCNO (cp->first) == a)
        {
          next_cp = cp->next_first_allocno_copy;
-         another_a = cp->second;
+         another_obj = cp->second;
        }
-      else if (cp->second == a)
+      else if (OBJECT_ALLOCNO (cp->second) == a)
        {
          next_cp = cp->next_second_allocno_copy;
-         another_a = cp->first;
+         another_obj = cp->first;
        }
       else
        gcc_unreachable ();
+      ira_allocno_t another_a = OBJECT_ALLOCNO (another_obj);
       fprintf (f, " cp%d:a%d(r%d)@%d", cp->num,
               ALLOCNO_NUM (another_a), ALLOCNO_REGNO (another_a), cp->freq);
     }
@@ -3480,25 +3521,21 @@ ira_flattening (int max_regno_before_emit, int 
ira_max_point_before_emit)
      copies.  */
   FOR_EACH_COPY (cp, ci)
     {
-      if (ALLOCNO_CAP_MEMBER (cp->first) != NULL
-         || ALLOCNO_CAP_MEMBER (cp->second) != NULL)
+      ira_allocno_t a1 = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t a2 = OBJECT_ALLOCNO (cp->second);
+      if (ALLOCNO_CAP_MEMBER (a1) != NULL || ALLOCNO_CAP_MEMBER (a2) != NULL)
        {
          if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL)
-           fprintf
-             (ira_dump_file, "      Remove cp%d:%c%dr%d-%c%dr%d\n",
-              cp->num, ALLOCNO_CAP_MEMBER (cp->first) != NULL ? 'c' : 'a',
-              ALLOCNO_NUM (cp->first),
-              REGNO (allocno_emit_reg (cp->first)),
-              ALLOCNO_CAP_MEMBER (cp->second) != NULL ? 'c' : 'a',
-              ALLOCNO_NUM (cp->second),
-              REGNO (allocno_emit_reg (cp->second)));
+           fprintf (ira_dump_file, "      Remove cp%d:%c%dr%d-%c%dr%d\n",
+                    cp->num, ALLOCNO_CAP_MEMBER (a1) != NULL ? 'c' : 'a',
+                    ALLOCNO_NUM (a1), REGNO (allocno_emit_reg (a1)),
+                    ALLOCNO_CAP_MEMBER (a2) != NULL ? 'c' : 'a',
+                    ALLOCNO_NUM (a2), REGNO (allocno_emit_reg (a2)));
          cp->loop_tree_node = NULL;
          continue;
        }
-      first
-       = regno_top_level_allocno_map[REGNO (allocno_emit_reg (cp->first))];
-      second
-       = regno_top_level_allocno_map[REGNO (allocno_emit_reg (cp->second))];
+      first = regno_top_level_allocno_map[REGNO (allocno_emit_reg (a1))];
+      second = regno_top_level_allocno_map[REGNO (allocno_emit_reg (a2))];
       node = cp->loop_tree_node;
       if (node == NULL)
        keep_p = true; /* It copy generated in ira-emit.cc.  */
@@ -3506,8 +3543,8 @@ ira_flattening (int max_regno_before_emit, int 
ira_max_point_before_emit)
        {
          /* Check that the copy was not propagated from level on
             which we will have different pseudos.  */
-         node_first = node->regno_allocno_map[ALLOCNO_REGNO (cp->first)];
-         node_second = node->regno_allocno_map[ALLOCNO_REGNO (cp->second)];
+         node_first = node->regno_allocno_map[ALLOCNO_REGNO (a1)];
+         node_second = node->regno_allocno_map[ALLOCNO_REGNO (a2)];
          keep_p = ((REGNO (allocno_emit_reg (first))
                     == REGNO (allocno_emit_reg (node_first)))
                     && (REGNO (allocno_emit_reg (second))
@@ -3516,18 +3553,18 @@ ira_flattening (int max_regno_before_emit, int 
ira_max_point_before_emit)
       if (keep_p)
        {
          cp->loop_tree_node = ira_loop_tree_root;
-         cp->first = first;
-         cp->second = second;
+         cp->first = find_object_anyway (first, OBJECT_START (cp->first),
+                                         OBJECT_NREGS (cp->first));
+         cp->second = find_object_anyway (second, OBJECT_START (cp->second),
+                                          OBJECT_NREGS (cp->second));
        }
       else
        {
          cp->loop_tree_node = NULL;
          if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL)
            fprintf (ira_dump_file, "      Remove cp%d:a%dr%d-a%dr%d\n",
-                    cp->num, ALLOCNO_NUM (cp->first),
-                    REGNO (allocno_emit_reg (cp->first)),
-                    ALLOCNO_NUM (cp->second),
-                    REGNO (allocno_emit_reg (cp->second)));
+                    cp->num, ALLOCNO_NUM (a1), REGNO (allocno_emit_reg (a1)),
+                    ALLOCNO_NUM (a2), REGNO (allocno_emit_reg (a2)));
        }
     }
   /* Remove unnecessary allocnos on lower levels of the loop tree.  */
@@ -3563,9 +3600,10 @@ ira_flattening (int max_regno_before_emit, int 
ira_max_point_before_emit)
          finish_copy (cp);
          continue;
        }
-      ira_assert
-       (ALLOCNO_LOOP_TREE_NODE (cp->first) == ira_loop_tree_root
-        && ALLOCNO_LOOP_TREE_NODE (cp->second) == ira_loop_tree_root);
+      ira_assert (ALLOCNO_LOOP_TREE_NODE (OBJECT_ALLOCNO (cp->first))
+                   == ira_loop_tree_root
+                 && ALLOCNO_LOOP_TREE_NODE (OBJECT_ALLOCNO (cp->second))
+                      == ira_loop_tree_root);
       add_allocno_copy_to_list (cp);
       swap_allocno_copy_ends_if_necessary (cp);
     }
diff --git a/gcc/ira-color.cc b/gcc/ira-color.cc
index 8aed25144b9..099312bcdb3 100644
--- a/gcc/ira-color.cc
+++ b/gcc/ira-color.cc
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include "config.h"
 #define INCLUDE_MAP
+#define INCLUDE_VECTOR
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
@@ -150,11 +151,18 @@ struct allocno_color_data
   struct update_cost_record *update_cost_records;
   /* Threads.  We collect allocnos connected by copies into threads
      and try to assign hard regs to allocnos by threads.  */
-  /* Allocno representing all thread.  */
-  ira_allocno_t first_thread_allocno;
+  /* The head objects for all thread.  */
+  ira_object_t *first_thread_objects;
   /* Allocnos in thread forms a cycle list through the following
      member.  */
-  ira_allocno_t next_thread_allocno;
+  ira_object_t *next_thread_objects;
+  /* The allocno all thread shared.  */
+  ira_allocno_t first_thread_allocno;
+  /* The offset start relative to the first_thread_allocno.  */
+  int first_thread_offset;
+  /* All allocnos belong to the thread.  */
+  bitmap thread_allocnos;
+  /* The freq sum of all thread allocno.  */
   /* All thread frequency.  Defined only for first thread allocno.  */
   int thread_freq;
   /* Sum of frequencies of hard register preferences of the allocno.  */
@@ -188,6 +196,9 @@ static bitmap coloring_allocno_bitmap;
    allocnos.  */
 static bitmap consideration_allocno_bitmap;
 
+/* Bitmap of allocnos which is not trivially colorable.  */
+static bitmap uncolorable_allocno_set;
+
 /* All allocnos sorted according their priorities.  */
 static ira_allocno_t *sorted_allocnos;
 
@@ -647,9 +658,13 @@ struct allocno_hard_regs_subnode
      Overall conflict size is
      left_conflict_subnodes_size
        + MIN (max_node_impact - left_conflict_subnodes_size,
-              left_conflict_size)
+             left_conflict_size)
+     Use MIN here to ensure that the total conflict does not exceed
+     max_node_impact.
   */
+  /* The total conflict size of subnodes.  */
   short left_conflict_subnodes_size;
+  /* The maximum number of registers that the current node can use.  */
   short max_node_impact;
 };
 
@@ -758,6 +773,8 @@ form_allocno_hard_regs_nodes_forest (void)
       collect_allocno_hard_regs_cover (hard_regs_roots,
                                       allocno_data->profitable_hard_regs);
       allocno_hard_regs_node = NULL;
+      /* Find the ancestor node in forest which cover all nodes. The ancestor 
is
+        a smallest superset of profitable_hard_regs.  */
       for (j = 0; hard_regs_node_vec.iterate (j, &node); j++)
        allocno_hard_regs_node
          = (j == 0
@@ -990,6 +1007,8 @@ update_left_conflict_sizes_p (ira_allocno_t a,
                                        removed_node->hard_regs->set));
   start = node_preorder_num * allocno_hard_regs_nodes_num;
   i = allocno_hard_regs_subnode_index[start + removed_node->preorder_num];
+  /* i < 0 means removed_node is parent of node instead of node is the parent 
of
+     removed_node.  */
   if (i < 0)
     i = 0;
   subnodes = allocno_hard_regs_subnodes + data->hard_regs_subnodes_start;
@@ -999,6 +1018,7 @@ update_left_conflict_sizes_p (ira_allocno_t a,
              - subnodes[i].left_conflict_subnodes_size,
              subnodes[i].left_conflict_size));
   subnodes[i].left_conflict_size -= size;
+  /* Update all ancestors for subnode i.  */
   for (;;)
     {
       conflict_size
@@ -1242,6 +1262,9 @@ struct update_cost_queue_elem
      connecting this allocno to the one being allocated.  */
   int divisor;
 
+  /* Hard register regno assigned to current ALLOCNO.  */
+  int hard_regno;
+
   /* Allocno from which we started chaining costs of connected
      allocnos. */
   ira_allocno_t start;
@@ -1308,7 +1331,7 @@ start_update_cost (void)
 /* Add (ALLOCNO, START, FROM, DIVISOR) to the end of update_cost_queue, unless
    ALLOCNO is already in the queue, or has NO_REGS class.  */
 static inline void
-queue_update_cost (ira_allocno_t allocno, ira_allocno_t start,
+queue_update_cost (ira_allocno_t allocno, int hard_regno, ira_allocno_t start,
                   ira_allocno_t from, int divisor)
 {
   struct update_cost_queue_elem *elem;
@@ -1317,6 +1340,7 @@ queue_update_cost (ira_allocno_t allocno, ira_allocno_t 
start,
   if (elem->check != update_cost_check
       && ALLOCNO_CLASS (allocno) != NO_REGS)
     {
+      elem->hard_regno = hard_regno;
       elem->check = update_cost_check;
       elem->start = start;
       elem->from = from;
@@ -1334,8 +1358,8 @@ queue_update_cost (ira_allocno_t allocno, ira_allocno_t 
start,
    false if the queue was empty, otherwise make (*ALLOCNO, *START,
    *FROM, *DIVISOR) describe the removed element.  */
 static inline bool
-get_next_update_cost (ira_allocno_t *allocno, ira_allocno_t *start,
-                     ira_allocno_t *from, int *divisor)
+get_next_update_cost (ira_allocno_t *allocno, int *hard_regno,
+                     ira_allocno_t *start, ira_allocno_t *from, int *divisor)
 {
   struct update_cost_queue_elem *elem;
 
@@ -1348,6 +1372,8 @@ get_next_update_cost (ira_allocno_t *allocno, 
ira_allocno_t *start,
   *from = elem->from;
   *divisor = elem->divisor;
   update_cost_queue = elem->next;
+  if (hard_regno != NULL)
+    *hard_regno = elem->hard_regno;
   return true;
 }
 
@@ -1449,31 +1475,41 @@ update_costs_from_allocno (ira_allocno_t allocno, int 
hard_regno,
   enum reg_class rclass, aclass;
   ira_allocno_t another_allocno, start = allocno, from = NULL;
   ira_copy_t cp, next_cp;
+  ira_object_t another_obj;
+  unsigned int obj_index1, obj_index2;
 
   rclass = REGNO_REG_CLASS (hard_regno);
   do
     {
+      gcc_assert (hard_regno >= 0);
       mode = ALLOCNO_MODE (allocno);
       ira_init_register_move_cost_if_necessary (mode);
       for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp)
        {
-         if (cp->first == allocno)
+         if (OBJECT_ALLOCNO (cp->first) == allocno)
            {
+             obj_index1 = OBJECT_INDEX (cp->first);
+             obj_index2 = OBJECT_INDEX (cp->second);
              next_cp = cp->next_first_allocno_copy;
-             another_allocno = cp->second;
+             another_obj = cp->second;
            }
-         else if (cp->second == allocno)
+         else if (OBJECT_ALLOCNO (cp->second) == allocno)
            {
+             obj_index1 = OBJECT_INDEX (cp->second);
+             obj_index2 = OBJECT_INDEX (cp->first);
              next_cp = cp->next_second_allocno_copy;
-             another_allocno = cp->first;
+             another_obj = cp->first;
            }
          else
            gcc_unreachable ();
 
+         another_allocno = OBJECT_ALLOCNO (another_obj);
          if (another_allocno == from
              || (ALLOCNO_COLOR_DATA (another_allocno) != NULL
-                 && (ALLOCNO_COLOR_DATA (allocno)->first_thread_allocno
-                     != ALLOCNO_COLOR_DATA 
(another_allocno)->first_thread_allocno)))
+                 && (ALLOCNO_COLOR_DATA (allocno)
+                       ->first_thread_objects[obj_index1]
+                     != ALLOCNO_COLOR_DATA (another_allocno)
+                          ->first_thread_objects[obj_index2])))
            continue;
 
          aclass = ALLOCNO_CLASS (another_allocno);
@@ -1482,6 +1518,8 @@ update_costs_from_allocno (ira_allocno_t allocno, int 
hard_regno,
              || ALLOCNO_ASSIGNED_P (another_allocno))
            continue;
 
+         ira_allocno_t first_allocno = OBJECT_ALLOCNO (cp->first);
+         ira_allocno_t second_allocno = OBJECT_ALLOCNO (cp->second);
          /* If we have different modes use the smallest one.  It is
             a sub-register move.  It is hard to predict what LRA
             will reload (the pseudo or its sub-register) but LRA
@@ -1489,14 +1527,21 @@ update_costs_from_allocno (ira_allocno_t allocno, int 
hard_regno,
             register classes bigger modes might be invalid,
             e.g. DImode for AREG on x86.  For such cases the
             register move cost will be maximal.  */
-         mode = narrower_subreg_mode (ALLOCNO_MODE (cp->first),
-                                      ALLOCNO_MODE (cp->second));
+         mode = narrower_subreg_mode (ALLOCNO_MODE (first_allocno),
+                                      ALLOCNO_MODE (second_allocno));
 
          ira_init_register_move_cost_if_necessary (mode);
 
-         cost = (cp->second == allocno
-                 ? ira_register_move_cost[mode][rclass][aclass]
-                 : ira_register_move_cost[mode][aclass][rclass]);
+         cost = (second_allocno == allocno
+                   ? ira_register_move_cost[mode][rclass][aclass]
+                   : ira_register_move_cost[mode][aclass][rclass]);
+         /* Adjust the hard regno for another_allocno for subreg copy.  */
+         int start_regno = hard_regno;
+         if (cp->insn && subreg_move_p (cp->first, cp->second))
+           {
+             int diff = OBJECT_START (cp->first) - OBJECT_START (cp->second);
+             start_regno += (first_allocno == allocno ? diff : -diff);
+           }
          if (decr_p)
            cost = -cost;
 
@@ -1505,25 +1550,30 @@ update_costs_from_allocno (ira_allocno_t allocno, int 
hard_regno,
 
          if (internal_flag_ira_verbose > 5 && ira_dump_file != NULL)
            fprintf (ira_dump_file,
-                    "          a%dr%d (hr%d): update cost by %d, conflict cost 
by %d\n",
-                    ALLOCNO_NUM (another_allocno), ALLOCNO_REGNO 
(another_allocno),
-                    hard_regno, update_cost, update_conflict_cost);
+                    "          a%dr%d (hr%d): update cost by %d, conflict "
+                    "cost by %d\n",
+                    ALLOCNO_NUM (another_allocno),
+                    ALLOCNO_REGNO (another_allocno), start_regno, update_cost,
+                    update_conflict_cost);
          if (update_cost == 0)
            continue;
 
-         if (! update_allocno_cost (another_allocno, hard_regno,
-                                    update_cost, update_conflict_cost))
+         if (start_regno < 0
+             || (start_regno + ALLOCNO_NREGS (another_allocno))
+                  > FIRST_PSEUDO_REGISTER
+             || !update_allocno_cost (another_allocno, start_regno,
+                                      update_cost, update_conflict_cost))
            continue;
-         queue_update_cost (another_allocno, start, allocno,
+         queue_update_cost (another_allocno, start_regno, start, allocno,
                             divisor * COST_HOP_DIVISOR);
          if (record_p && ALLOCNO_COLOR_DATA (another_allocno) != NULL)
            ALLOCNO_COLOR_DATA (another_allocno)->update_cost_records
-             = get_update_cost_record (hard_regno, divisor,
-                                       ALLOCNO_COLOR_DATA (another_allocno)
-                                       ->update_cost_records);
+             = get_update_cost_record (
+               start_regno, divisor,
+               ALLOCNO_COLOR_DATA (another_allocno)->update_cost_records);
        }
-    }
-  while (get_next_update_cost (&allocno, &start, &from, &divisor));
+  } while (
+    get_next_update_cost (&allocno, &hard_regno, &start, &from, &divisor));
 }
 
 /* Decrease preferred ALLOCNO hard register costs and costs of
@@ -1632,23 +1682,25 @@ update_conflict_hard_regno_costs (int *costs, enum 
reg_class aclass,
   enum reg_class another_aclass;
   ira_allocno_t allocno, another_allocno, start, from;
   ira_copy_t cp, next_cp;
+  ira_object_t another_obj;
 
-  while (get_next_update_cost (&allocno, &start, &from, &divisor))
+  while (get_next_update_cost (&allocno, NULL, &start, &from, &divisor))
     for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp)
       {
-       if (cp->first == allocno)
+       if (OBJECT_ALLOCNO (cp->first) == allocno)
          {
            next_cp = cp->next_first_allocno_copy;
-           another_allocno = cp->second;
+           another_obj = cp->second;
          }
-       else if (cp->second == allocno)
+       else if (OBJECT_ALLOCNO (cp->second) == allocno)
          {
            next_cp = cp->next_second_allocno_copy;
-           another_allocno = cp->first;
+           another_obj = cp->first;
          }
        else
          gcc_unreachable ();
 
+       another_allocno = OBJECT_ALLOCNO (another_obj);
        another_aclass = ALLOCNO_CLASS (another_allocno);
        if (another_allocno == from
            || ALLOCNO_ASSIGNED_P (another_allocno)
@@ -1696,7 +1748,8 @@ update_conflict_hard_regno_costs (int *costs, enum 
reg_class aclass,
                           * COST_HOP_DIVISOR
                           * COST_HOP_DIVISOR
                           * COST_HOP_DIVISOR))
-         queue_update_cost (another_allocno, start, from, divisor * 
COST_HOP_DIVISOR);
+         queue_update_cost (another_allocno, -1, start, from,
+                            divisor * COST_HOP_DIVISOR);
       }
 }
 
@@ -2034,6 +2087,11 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
       FOR_EACH_OBJECT_CONFLICT (obj, conflict_obj, oci)
         {
          ira_allocno_t conflict_a = OBJECT_ALLOCNO (conflict_obj);
+
+         if (ALLOCNO_COLOR_DATA (a)->first_thread_allocno
+             == ALLOCNO_COLOR_DATA (conflict_a)->first_thread_allocno)
+           continue;
+
          enum reg_class conflict_aclass;
          allocno_color_data_t data = ALLOCNO_COLOR_DATA (conflict_a);
 
@@ -2225,7 +2283,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
                      continue;
                    full_costs[j] -= conflict_costs[k];
                  }
-             queue_update_cost (conflict_a, conflict_a, NULL, 
COST_HOP_DIVISOR);
+             queue_update_cost (conflict_a, -1, conflict_a, NULL,
+                                COST_HOP_DIVISOR);
            }
        }
     }
@@ -2239,7 +2298,7 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
   if (! retry_p)
     {
       start_update_cost ();
-      queue_update_cost (a, a, NULL, COST_HOP_DIVISOR);
+      queue_update_cost (a, -1, a, NULL, COST_HOP_DIVISOR);
       update_conflict_hard_regno_costs (full_costs, aclass, false);
     }
   min_cost = min_full_cost = INT_MAX;
@@ -2264,17 +2323,17 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
       if (!HONOR_REG_ALLOC_ORDER)
        {
          if ((saved_nregs = calculate_saved_nregs (hard_regno, mode)) != 0)
-         /* We need to save/restore the hard register in
-            epilogue/prologue.  Therefore we increase the cost.  */
-         {
-           rclass = REGNO_REG_CLASS (hard_regno);
-           add_cost = ((ira_memory_move_cost[mode][rclass][0]
-                        + ira_memory_move_cost[mode][rclass][1])
+           /* We need to save/restore the hard register in
+              epilogue/prologue.  Therefore we increase the cost.  */
+           {
+             rclass = REGNO_REG_CLASS (hard_regno);
+             add_cost = ((ira_memory_move_cost[mode][rclass][0]
+                          + ira_memory_move_cost[mode][rclass][1])
                        * saved_nregs / hard_regno_nregs (hard_regno,
                                                          mode) - 1);
-           cost += add_cost;
-           full_cost += add_cost;
-         }
+             cost += add_cost;
+             full_cost += add_cost;
+           }
        }
       if (min_cost > cost)
        min_cost = cost;
@@ -2393,54 +2452,173 @@ copy_freq_compare_func (const void *v1p, const void 
*v2p)
   return cp1->num - cp2->num;
 }
 
-
+/* Return true if object OBJ1 conflict with OBJ2.  */
+static bool
+objects_conflict_by_live_ranges_p (ira_object_t obj1, ira_object_t obj2)
+{
+  rtx reg1, reg2;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+  if (a1 == a2)
+    return false;
+  reg1 = regno_reg_rtx[ALLOCNO_REGNO (a1)];
+  reg2 = regno_reg_rtx[ALLOCNO_REGNO (a2)];
+  if (reg1 != NULL && reg2 != NULL
+      && ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2))
+    return false;
+
+  /* We don't keep live ranges for caps because they can be quite big.
+     Use ranges of non-cap allocno from which caps are created.  */
+  a1 = get_cap_member (a1);
+  a2 = get_cap_member (a2);
+
+  obj1 = find_object (a1, OBJECT_START (obj1), OBJECT_NREGS (obj1));
+  obj2 = find_object (a2, OBJECT_START (obj2), OBJECT_NREGS (obj2));
+  return ira_live_ranges_intersect_p (OBJECT_LIVE_RANGES (obj1),
+                                     OBJECT_LIVE_RANGES (obj2));
+}
 
-/* Return true if any allocno from thread of A1 conflicts with any
-   allocno from thread A2.  */
+/* Return true if any object from thread of OBJ1 conflicts with any
+   object from thread OBJ2.  */
 static bool
-allocno_thread_conflict_p (ira_allocno_t a1, ira_allocno_t a2)
+object_thread_conflict_p (ira_object_t obj1, ira_object_t obj2)
 {
-  ira_allocno_t a, conflict_a;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+
+  gcc_assert (
+    obj1 != obj2
+    && ALLOCNO_COLOR_DATA (a1)->first_thread_objects[OBJECT_INDEX (obj1)]
+        == obj1
+    && ALLOCNO_COLOR_DATA (a2)->first_thread_objects[OBJECT_INDEX (obj2)]
+        == obj2);
+
+  ira_allocno_t first_thread_allocno1
+    = ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
+  ira_allocno_t first_thread_allocno2
+    = ALLOCNO_COLOR_DATA (a2)->first_thread_allocno;
+
+  int offset
+    = (ALLOCNO_COLOR_DATA (a1)->first_thread_offset + OBJECT_START (obj1))
+      - (ALLOCNO_COLOR_DATA (a2)->first_thread_offset + OBJECT_START (obj2));
+
+  /* Update first_thread_allocno and thread_allocnos info.  */
+  bitmap thread_allocnos1
+    = ALLOCNO_COLOR_DATA (first_thread_allocno1)->thread_allocnos;
+  bitmap thread_allocnos2
+    = ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_allocnos;
+  gcc_assert (!bitmap_empty_p (thread_allocnos1)
+             && !bitmap_empty_p (thread_allocnos2));
+  std::vector<ira_object_t> thread_objects_2;
 
-  for (a = ALLOCNO_COLOR_DATA (a2)->next_thread_allocno;;
-       a = ALLOCNO_COLOR_DATA (a)->next_thread_allocno)
+  unsigned int i;
+  bitmap_iterator bi;
+  EXECUTE_IF_SET_IN_BITMAP (thread_allocnos2, 0, i, bi)
     {
-      for (conflict_a = ALLOCNO_COLOR_DATA (a1)->next_thread_allocno;;
-          conflict_a = ALLOCNO_COLOR_DATA (conflict_a)->next_thread_allocno)
-       {
-         if (allocnos_conflict_by_live_ranges_p (a, conflict_a))
-           return true;
-         if (conflict_a == a1)
-           break;
-       }
-      if (a == a2)
-       break;
+      ira_allocno_object_iterator oi;
+      ira_object_t obj;
+      FOR_EACH_ALLOCNO_OBJECT (ira_allocnos[i], obj, oi)
+       thread_objects_2.push_back (obj);
+    }
+
+  EXECUTE_IF_SET_IN_BITMAP (thread_allocnos1, 0, i, bi)
+    {
+      ira_allocno_object_iterator oi;
+      ira_object_t obj;
+      ira_allocno_t a = ira_allocnos[i];
+      FOR_EACH_ALLOCNO_OBJECT (ira_allocnos[i], obj, oi)
+       for (ira_object_t other_obj : thread_objects_2)
+         {
+           int thread_start1 = ALLOCNO_COLOR_DATA (a)->first_thread_offset
+                               + OBJECT_START (obj);
+           int thread_start2 = ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (other_obj))
+                                 ->first_thread_offset
+                               + offset + OBJECT_START (other_obj);
+           if (!(thread_start1 + OBJECT_NREGS (obj) <= thread_start2
+                 || thread_start2 + OBJECT_NREGS (other_obj) <= thread_start1)
+               && objects_conflict_by_live_ranges_p (obj, other_obj))
+             return true;
+         }
     }
+
   return false;
 }
 
-/* Merge two threads given correspondingly by their first allocnos T1
-   and T2 (more accurately merging T2 into T1).  */
+/* Merge two threads given correspondingly by their first objects OBJ1
+   and OBJ2 (more accurately merging OBJ2 into OBJ1).  */
 static void
-merge_threads (ira_allocno_t t1, ira_allocno_t t2)
+merge_threads (ira_object_t obj1, ira_object_t obj2)
 {
-  ira_allocno_t a, next, last;
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+
+  gcc_assert (
+    obj1 != obj2
+    && ALLOCNO_COLOR_DATA (a1)->first_thread_objects[OBJECT_INDEX (obj1)]
+        == obj1
+    && ALLOCNO_COLOR_DATA (a2)->first_thread_objects[OBJECT_INDEX (obj2)]
+        == obj2);
+
+  ira_allocno_t first_thread_allocno1
+    = ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
+  ira_allocno_t first_thread_allocno2
+    = ALLOCNO_COLOR_DATA (a2)->first_thread_allocno;
+
+  gcc_assert (first_thread_allocno1 != first_thread_allocno2);
 
-  gcc_assert (t1 != t2
-             && ALLOCNO_COLOR_DATA (t1)->first_thread_allocno == t1
-             && ALLOCNO_COLOR_DATA (t2)->first_thread_allocno == t2);
-  for (last = t2, a = ALLOCNO_COLOR_DATA (t2)->next_thread_allocno;;
-       a = ALLOCNO_COLOR_DATA (a)->next_thread_allocno)
+  int offset
+    = (ALLOCNO_COLOR_DATA (a1)->first_thread_offset + OBJECT_START (obj1))
+      - (ALLOCNO_COLOR_DATA (a2)->first_thread_offset + OBJECT_START (obj2));
+
+  /* Update first_thread_allocno and thread_allocnos info.  */
+  unsigned int i;
+  bitmap_iterator bi;
+  bitmap thread_allocnos2
+    = ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_allocnos;
+  bitmap thread_allocnos1
+    = ALLOCNO_COLOR_DATA (first_thread_allocno1)->thread_allocnos;
+  gcc_assert (!bitmap_empty_p (thread_allocnos1)
+             && !bitmap_empty_p (thread_allocnos2));
+  EXECUTE_IF_SET_IN_BITMAP (thread_allocnos2, 0, i, bi)
+    {
+      ira_allocno_t a = ira_allocnos[i];
+      gcc_assert (ALLOCNO_COLOR_DATA (a)->first_thread_allocno
+                 == first_thread_allocno2);
+      /* Update first_thread_allocno and first_thread_offset filed.  */
+      ALLOCNO_COLOR_DATA (a)->first_thread_allocno = first_thread_allocno1;
+      ALLOCNO_COLOR_DATA (a)->first_thread_offset += offset;
+      bitmap_set_bit (thread_allocnos1, i);
+    }
+  bitmap_clear (thread_allocnos2);
+  ira_free_bitmap (thread_allocnos2);
+  ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_allocnos = NULL;
+
+  ira_object_t last_obj = obj2;
+  for (ira_object_t next_obj
+       = ALLOCNO_COLOR_DATA (a2)->next_thread_objects[OBJECT_INDEX (obj2)];
+       ; next_obj = ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (next_obj))
+                     ->next_thread_objects[OBJECT_INDEX (next_obj)])
     {
-      ALLOCNO_COLOR_DATA (a)->first_thread_allocno = t1;
-      if (a == t2)
+      ira_allocno_t next_a = OBJECT_ALLOCNO (next_obj);
+      ALLOCNO_COLOR_DATA (next_a)->first_thread_objects[OBJECT_INDEX 
(next_obj)]
+       = obj1;
+      gcc_assert (ALLOCNO_COLOR_DATA (next_a)->first_thread_allocno
+                 == first_thread_allocno1);
+      gcc_assert (bitmap_bit_p (thread_allocnos1, ALLOCNO_NUM (next_a)));
+      if (next_obj == obj2)
        break;
-      last = a;
+      last_obj = next_obj;
     }
-  next = ALLOCNO_COLOR_DATA (t1)->next_thread_allocno;
-  ALLOCNO_COLOR_DATA (t1)->next_thread_allocno = t2;
-  ALLOCNO_COLOR_DATA (last)->next_thread_allocno = next;
-  ALLOCNO_COLOR_DATA (t1)->thread_freq += ALLOCNO_COLOR_DATA (t2)->thread_freq;
+  /* Add OBJ2's threads chain to OBJ1.  */
+  ira_object_t temp_obj
+    = ALLOCNO_COLOR_DATA (a1)->next_thread_objects[OBJECT_INDEX (obj1)];
+  ALLOCNO_COLOR_DATA (a1)->next_thread_objects[OBJECT_INDEX (obj1)] = obj2;
+  ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (last_obj))
+    ->next_thread_objects[OBJECT_INDEX (last_obj)]
+    = temp_obj;
+
+  ALLOCNO_COLOR_DATA (first_thread_allocno1)->thread_freq
+    += ALLOCNO_COLOR_DATA (first_thread_allocno2)->thread_freq;
 }
 
 /* Create threads by processing CP_NUM copies from sorted copies.  We
@@ -2448,7 +2626,6 @@ merge_threads (ira_allocno_t t1, ira_allocno_t t2)
 static void
 form_threads_from_copies (int cp_num)
 {
-  ira_allocno_t a, thread1, thread2;
   ira_copy_t cp;
 
   qsort (sorted_copies, cp_num, sizeof (ira_copy_t), copy_freq_compare_func);
@@ -2457,33 +2634,43 @@ form_threads_from_copies (int cp_num)
   for (int i = 0; i < cp_num; i++)
     {
       cp = sorted_copies[i];
-      thread1 = ALLOCNO_COLOR_DATA (cp->first)->first_thread_allocno;
-      thread2 = ALLOCNO_COLOR_DATA (cp->second)->first_thread_allocno;
-      if (thread1 == thread2)
+      ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+      ira_object_t thread1 = ALLOCNO_COLOR_DATA (first_a)
+                              ->first_thread_objects[OBJECT_INDEX (cp->first)];
+      ira_object_t thread2
+       = ALLOCNO_COLOR_DATA (second_a)
+           ->first_thread_objects[OBJECT_INDEX (cp->second)];
+      if (thread1 == thread2
+         || ALLOCNO_COLOR_DATA (first_a)->first_thread_allocno
+              == ALLOCNO_COLOR_DATA (second_a)->first_thread_allocno)
        continue;
-      if (! allocno_thread_conflict_p (thread1, thread2))
+      if (!object_thread_conflict_p (thread1, thread2))
        {
          if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-           fprintf
-               (ira_dump_file,
-                "        Forming thread by copy %d:a%dr%d-a%dr%d (freq=%d):\n",
-                cp->num, ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first),
-                ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second),
-                cp->freq);
+           fprintf (
+             ira_dump_file,
+             "        Forming thread by copy %d:a%dr%d-a%dr%d (freq=%d):\n",
+             cp->num, ALLOCNO_NUM (first_a), ALLOCNO_REGNO (first_a),
+             ALLOCNO_NUM (second_a), ALLOCNO_REGNO (second_a), cp->freq);
          merge_threads (thread1, thread2);
          if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
            {
-             thread1 = ALLOCNO_COLOR_DATA (thread1)->first_thread_allocno;
-             fprintf (ira_dump_file, "          Result (freq=%d): a%dr%d(%d)",
-                      ALLOCNO_COLOR_DATA (thread1)->thread_freq,
-                      ALLOCNO_NUM (thread1), ALLOCNO_REGNO (thread1),
-                      ALLOCNO_FREQ (thread1));
-             for (a = ALLOCNO_COLOR_DATA (thread1)->next_thread_allocno;
-                  a != thread1;
-                  a = ALLOCNO_COLOR_DATA (a)->next_thread_allocno)
-               fprintf (ira_dump_file, " a%dr%d(%d)",
-                        ALLOCNO_NUM (a), ALLOCNO_REGNO (a),
-                        ALLOCNO_FREQ (a));
+             ira_allocno_t a1 = OBJECT_ALLOCNO (thread1);
+             ira_allocno_t first_thread_allocno
+               = ALLOCNO_COLOR_DATA (a1)->first_thread_allocno;
+             fprintf (ira_dump_file, "          Result (freq=%d):",
+                      ALLOCNO_COLOR_DATA (first_thread_allocno)->thread_freq);
+             unsigned int i;
+             bitmap_iterator bi;
+             EXECUTE_IF_SET_IN_BITMAP (
+               ALLOCNO_COLOR_DATA (first_thread_allocno)->thread_allocnos, 0,
+               i, bi)
+               {
+                 ira_allocno_t a = ira_allocnos[i];
+                 fprintf (ira_dump_file, " a%dr%d(%d)", ALLOCNO_NUM (a),
+                          ALLOCNO_REGNO (a), ALLOCNO_FREQ (a));
+               }
              fprintf (ira_dump_file, "\n");
            }
        }
@@ -2503,13 +2690,27 @@ form_threads_from_bucket (ira_allocno_t bucket)
     {
       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
        {
-         if (cp->first == a)
+         bool intersect_p = hard_reg_set_intersect_p (
+           ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (cp->first))
+             ->profitable_hard_regs,
+           ALLOCNO_COLOR_DATA (OBJECT_ALLOCNO (cp->second))
+             ->profitable_hard_regs);
+         if (OBJECT_ALLOCNO (cp->first) == a)
            {
              next_cp = cp->next_first_allocno_copy;
+             if (!intersect_p)
+               continue;
+             sorted_copies[cp_num++] = cp;
+           }
+         else if (OBJECT_ALLOCNO (cp->second) == a)
+           {
+             next_cp = cp->next_second_allocno_copy;
+             if (!intersect_p
+                 || !bitmap_bit_p (uncolorable_allocno_set,
+                                   ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first))))
+               continue;
              sorted_copies[cp_num++] = cp;
            }
-         else if (cp->second == a)
-           next_cp = cp->next_second_allocno_copy;
          else
            gcc_unreachable ();
        }
@@ -2531,15 +2732,15 @@ form_threads_from_colorable_allocno (ira_allocno_t a)
             ALLOCNO_NUM (a), ALLOCNO_REGNO (a));
   for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a)
+      if (OBJECT_ALLOCNO (cp->first) == a)
        {
          next_cp = cp->next_first_allocno_copy;
-         another_a = cp->second;
+         another_a = OBJECT_ALLOCNO (cp->second);
        }
-      else if (cp->second == a)
+      else if (OBJECT_ALLOCNO (cp->second) == a)
        {
          next_cp = cp->next_second_allocno_copy;
-         another_a = cp->first;
+         another_a = OBJECT_ALLOCNO (cp->first);
        }
       else
        gcc_unreachable ();
@@ -2564,8 +2765,16 @@ init_allocno_threads (void)
     {
       a = ira_allocnos[j];
       /* Set up initial thread data: */
-      ALLOCNO_COLOR_DATA (a)->first_thread_allocno
-       = ALLOCNO_COLOR_DATA (a)->next_thread_allocno = a;
+      for (int i = 0; i < ALLOCNO_NUM_OBJECTS (a); i += 1)
+       {
+         ira_object_t obj = ALLOCNO_OBJECT (a, i);
+         ALLOCNO_COLOR_DATA (a)->first_thread_objects[i]
+           = ALLOCNO_COLOR_DATA (a)->next_thread_objects[i] = obj;
+       }
+      ALLOCNO_COLOR_DATA (a)->first_thread_allocno = a;
+      ALLOCNO_COLOR_DATA (a)->first_thread_offset = 0;
+      ALLOCNO_COLOR_DATA (a)->thread_allocnos = ira_allocate_bitmap ();
+      bitmap_set_bit (ALLOCNO_COLOR_DATA (a)->thread_allocnos, ALLOCNO_NUM 
(a));
       ALLOCNO_COLOR_DATA (a)->thread_freq = ALLOCNO_FREQ (a);
       ALLOCNO_COLOR_DATA (a)->hard_reg_prefs = 0;
       for (pref = ALLOCNO_PREFS (a); pref != NULL; pref = pref->next_pref)
@@ -2608,6 +2817,9 @@ add_allocno_to_bucket (ira_allocno_t a, ira_allocno_t 
*bucket_ptr)
   ira_allocno_t first_a;
   allocno_color_data_t data;
 
+  if (bucket_ptr == &uncolorable_allocno_bucket)
+    bitmap_set_bit (uncolorable_allocno_set, ALLOCNO_NUM (a));
+
   if (bucket_ptr == &uncolorable_allocno_bucket
       && ALLOCNO_CLASS (a) != NO_REGS)
     {
@@ -2734,6 +2946,9 @@ delete_allocno_from_bucket (ira_allocno_t allocno, 
ira_allocno_t *bucket_ptr)
 {
   ira_allocno_t prev_allocno, next_allocno;
 
+  if (bucket_ptr == &uncolorable_allocno_bucket)
+    bitmap_clear_bit (uncolorable_allocno_set, ALLOCNO_NUM (allocno));
+
   if (bucket_ptr == &uncolorable_allocno_bucket
       && ALLOCNO_CLASS (allocno) != NO_REGS)
     {
@@ -3227,16 +3442,23 @@ allocno_copy_cost_saving (ira_allocno_t allocno, int 
hard_regno)
     rclass = ALLOCNO_CLASS (allocno);
   for (cp = ALLOCNO_COPIES (allocno); cp != NULL; cp = next_cp)
     {
-      if (cp->first == allocno)
+      if (OBJECT_ALLOCNO (cp->first) == allocno)
        {
          next_cp = cp->next_first_allocno_copy;
-         if (ALLOCNO_HARD_REGNO (cp->second) != hard_regno)
+         ira_allocno_t another_a = OBJECT_ALLOCNO (cp->second);
+         if (ALLOCNO_HARD_REGNO (another_a) > -1
+             && hard_regno + OBJECT_START (cp->first)
+                  != ALLOCNO_HARD_REGNO (another_a)
+                       + OBJECT_START (cp->second))
            continue;
        }
-      else if (cp->second == allocno)
+      else if (OBJECT_ALLOCNO (cp->second) == allocno)
        {
          next_cp = cp->next_second_allocno_copy;
-         if (ALLOCNO_HARD_REGNO (cp->first) != hard_regno)
+         ira_allocno_t another_a = OBJECT_ALLOCNO (cp->first);
+         if (ALLOCNO_HARD_REGNO (another_a) > -1
+             && hard_regno + OBJECT_START (cp->second)
+                  != ALLOCNO_HARD_REGNO (another_a) + OBJECT_START (cp->first))
            continue;
        }
       else
@@ -3643,6 +3865,7 @@ color_allocnos (void)
       /* Put the allocnos into the corresponding buckets.  */
       colorable_allocno_bucket = NULL;
       uncolorable_allocno_bucket = NULL;
+      bitmap_clear (uncolorable_allocno_set);
       EXECUTE_IF_SET_IN_BITMAP (coloring_allocno_bitmap, 0, i, bi)
        {
          a = ira_allocnos[i];
@@ -3740,10 +3963,12 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
   bitmap_copy (coloring_allocno_bitmap, loop_tree_node->all_allocnos);
   bitmap_copy (consideration_allocno_bitmap, coloring_allocno_bitmap);
   n = 0;
+  size_t obj_n = 0;
   EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi)
     {
       a = ira_allocnos[j];
       n++;
+      obj_n += ALLOCNO_NUM_OBJECTS (a);
       if (! ALLOCNO_ASSIGNED_P (a))
        continue;
       bitmap_clear_bit (coloring_allocno_bitmap, ALLOCNO_NUM (a));
@@ -3752,20 +3977,29 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
     = (allocno_color_data_t) ira_allocate (sizeof (struct allocno_color_data)
                                           * n);
   memset (allocno_color_data, 0, sizeof (struct allocno_color_data) * n);
+  ira_object_t *thread_objects
+    = (ira_object_t *) ira_allocate (sizeof (ira_object_t *) * obj_n * 2);
+  memset (thread_objects, 0, sizeof (ira_object_t *) * obj_n * 2);
   curr_allocno_process = 0;
   n = 0;
+  size_t obj_offset = 0;
   EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi)
     {
       a = ira_allocnos[j];
       ALLOCNO_ADD_DATA (a) = allocno_color_data + n;
+      ALLOCNO_COLOR_DATA (a)->first_thread_objects
+       = thread_objects + obj_offset;
+      obj_offset += ALLOCNO_NUM_OBJECTS (a);
+      ALLOCNO_COLOR_DATA (a)->next_thread_objects = thread_objects + 
obj_offset;
+      obj_offset += ALLOCNO_NUM_OBJECTS (a);
       n++;
     }
+  gcc_assert (obj_n * 2 == obj_offset);
   init_allocno_threads ();
   /* Color all mentioned allocnos including transparent ones.  */
   color_allocnos ();
   /* Process caps.  They are processed just once.  */
-  if (flag_ira_region == IRA_REGION_MIXED
-      || flag_ira_region == IRA_REGION_ALL)
+  if (flag_ira_region == IRA_REGION_MIXED || flag_ira_region == IRA_REGION_ALL)
     EXECUTE_IF_SET_IN_BITMAP (loop_tree_node->all_allocnos, 0, j, bi)
       {
        a = ira_allocnos[j];
@@ -3881,12 +4115,22 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
            }
        }
     }
-  ira_free (allocno_color_data);
   EXECUTE_IF_SET_IN_BITMAP (consideration_allocno_bitmap, 0, j, bi)
     {
       a = ira_allocnos[j];
+      gcc_assert (a != NULL);
+      ALLOCNO_COLOR_DATA (a)->first_thread_objects = NULL;
+      ALLOCNO_COLOR_DATA (a)->next_thread_objects = NULL;
+      if (ALLOCNO_COLOR_DATA (a)->thread_allocnos != NULL)
+       {
+         bitmap_clear (ALLOCNO_COLOR_DATA (a)->thread_allocnos);
+         ira_free_bitmap (ALLOCNO_COLOR_DATA (a)->thread_allocnos);
+         ALLOCNO_COLOR_DATA (a)->thread_allocnos = NULL;
+       }
       ALLOCNO_ADD_DATA (a) = NULL;
     }
+  ira_free (allocno_color_data);
+  ira_free (thread_objects);
 }
 
 /* Initialize the common data for coloring and calls functions to do
@@ -4080,15 +4324,17 @@ update_curr_costs (ira_allocno_t a)
   ira_init_register_move_cost_if_necessary (mode);
   for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
     {
-      if (cp->first == a)
+      ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+      ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+      if (first_a == a)
        {
          next_cp = cp->next_first_allocno_copy;
-         another_a = cp->second;
+         another_a = second_a;
        }
-      else if (cp->second == a)
+      else if (second_a == a)
        {
          next_cp = cp->next_second_allocno_copy;
-         another_a = cp->first;
+         another_a = first_a;
        }
       else
        gcc_unreachable ();
@@ -4100,9 +4346,8 @@ update_curr_costs (ira_allocno_t a)
       i = ira_class_hard_reg_index[aclass][hard_regno];
       if (i < 0)
        continue;
-      cost = (cp->first == a
-             ? ira_register_move_cost[mode][rclass][aclass]
-             : ira_register_move_cost[mode][aclass][rclass]);
+      cost = (first_a == a ? ira_register_move_cost[mode][rclass][aclass]
+                          : ira_register_move_cost[mode][aclass][rclass]);
       ira_allocate_and_set_or_copy_costs
        (&ALLOCNO_UPDATED_HARD_REG_COSTS (a), aclass, ALLOCNO_CLASS_COST (a),
         ALLOCNO_HARD_REG_COSTS (a));
@@ -4349,21 +4594,23 @@ coalesce_allocnos (void)
        continue;
       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
        {
-         if (cp->first == a)
+         ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+         ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+         if (first_a == a)
            {
              next_cp = cp->next_first_allocno_copy;
-             regno = ALLOCNO_REGNO (cp->second);
+             regno = ALLOCNO_REGNO (second_a);
              /* For priority coloring we coalesce allocnos only with
                 the same allocno class not with intersected allocno
                 classes as it were possible.  It is done for
                 simplicity.  */
              if ((cp->insn != NULL || cp->constraint_p)
-                 && ALLOCNO_ASSIGNED_P (cp->second)
-                 && ALLOCNO_HARD_REGNO (cp->second) < 0
-                 && ! ira_equiv_no_lvalue_p (regno))
+                 && ALLOCNO_ASSIGNED_P (second_a)
+                 && ALLOCNO_HARD_REGNO (second_a) < 0
+                 && !ira_equiv_no_lvalue_p (regno))
                sorted_copies[cp_num++] = cp;
            }
-         else if (cp->second == a)
+         else if (second_a == a)
            next_cp = cp->next_second_allocno_copy;
          else
            gcc_unreachable ();
@@ -4376,17 +4623,18 @@ coalesce_allocnos (void)
       for (i = 0; i < cp_num; i++)
        {
          cp = sorted_copies[i];
-         if (! coalesced_allocno_conflict_p (cp->first, cp->second))
+         ira_allocno_t first_a = OBJECT_ALLOCNO (cp->first);
+         ira_allocno_t second_a = OBJECT_ALLOCNO (cp->second);
+         if (!coalesced_allocno_conflict_p (first_a, second_a))
            {
              allocno_coalesced_p = true;
              if (internal_flag_ira_verbose > 3 && ira_dump_file != NULL)
-               fprintf
-                 (ira_dump_file,
-                  "      Coalescing copy %d:a%dr%d-a%dr%d (freq=%d)\n",
-                  cp->num, ALLOCNO_NUM (cp->first), ALLOCNO_REGNO (cp->first),
-                  ALLOCNO_NUM (cp->second), ALLOCNO_REGNO (cp->second),
-                  cp->freq);
-             merge_allocnos (cp->first, cp->second);
+               fprintf (ira_dump_file,
+                        "      Coalescing copy %d:a%dr%d-a%dr%d (freq=%d)\n",
+                        cp->num, ALLOCNO_NUM (first_a),
+                        ALLOCNO_REGNO (first_a), ALLOCNO_NUM (second_a),
+                        ALLOCNO_REGNO (second_a), cp->freq);
+             merge_allocnos (first_a, second_a);
              i++;
              break;
            }
@@ -4395,8 +4643,11 @@ coalesce_allocnos (void)
       for (n = 0; i < cp_num; i++)
        {
          cp = sorted_copies[i];
-         if (allocno_coalesce_data[ALLOCNO_NUM (cp->first)].first
-             != allocno_coalesce_data[ALLOCNO_NUM (cp->second)].first)
+         if (allocno_coalesce_data[ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first))]
+               .first
+             != allocno_coalesce_data[ALLOCNO_NUM (
+                                        OBJECT_ALLOCNO (cp->second))]
+                  .first)
            sorted_copies[n++] = cp;
        }
       cp_num = n;
@@ -5070,15 +5321,15 @@ ira_reuse_stack_slot (int regno, poly_uint64 
inherent_size,
               cp != NULL;
               cp = next_cp)
            {
-             if (cp->first == allocno)
+             if (OBJECT_ALLOCNO (cp->first) == allocno)
                {
                  next_cp = cp->next_first_allocno_copy;
-                 another_allocno = cp->second;
+                 another_allocno = OBJECT_ALLOCNO (cp->second);
                }
-             else if (cp->second == allocno)
+             else if (OBJECT_ALLOCNO (cp->second) == allocno)
                {
                  next_cp = cp->next_second_allocno_copy;
-                 another_allocno = cp->first;
+                 another_allocno = OBJECT_ALLOCNO (cp->first);
                }
              else
                gcc_unreachable ();
@@ -5274,6 +5525,7 @@ ira_initiate_assign (void)
     = (ira_allocno_t *) ira_allocate (sizeof (ira_allocno_t)
                                      * ira_allocnos_num);
   consideration_allocno_bitmap = ira_allocate_bitmap ();
+  uncolorable_allocno_set = ira_allocate_bitmap ();
   initiate_cost_update ();
   allocno_priorities = (int *) ira_allocate (sizeof (int) * ira_allocnos_num);
   sorted_copies = (ira_copy_t *) ira_allocate (ira_copies_num
@@ -5286,6 +5538,7 @@ ira_finish_assign (void)
 {
   ira_free (sorted_allocnos);
   ira_free_bitmap (consideration_allocno_bitmap);
+  ira_free_bitmap (uncolorable_allocno_set);
   finish_cost_update ();
   ira_free (allocno_priorities);
   ira_free (sorted_copies);
diff --git a/gcc/ira-conflicts.cc b/gcc/ira-conflicts.cc
index 0585ad10043..7aeed7202ce 100644
--- a/gcc/ira-conflicts.cc
+++ b/gcc/ira-conflicts.cc
@@ -173,25 +173,115 @@ build_conflict_bit_table (void)
   sparseset_free (objects_live);
   return true;
 }
-
-/* Return true iff allocnos A1 and A2 cannot be allocated to the same
-   register due to conflicts.  */
 
-static bool
-allocnos_conflict_for_copy_p (ira_allocno_t a1, ira_allocno_t a2)
+/* Check that X is REG or SUBREG of REG.  */
+#define REG_SUBREG_P(x)                                                        
\
+  (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (SUBREG_REG (x))))
+
+/* Return true if OBJ1 and OBJ2 can be a move INSN.  */
+bool
+subreg_move_p (ira_object_t obj1, ira_object_t obj2)
 {
-  /* Due to the fact that we canonicalize conflicts (see
-     record_object_conflict), we only need to test for conflicts of
-     the lowest order words.  */
-  ira_object_t obj1 = ALLOCNO_OBJECT (a1, 0);
-  ira_object_t obj2 = ALLOCNO_OBJECT (a2, 0);
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+  return ALLOCNO_CLASS (a1) != NO_REGS && ALLOCNO_CLASS (a2) != NO_REGS
+        && (ALLOCNO_TRACK_SUBREG_P (a1) || ALLOCNO_TRACK_SUBREG_P (a2))
+        && OBJECT_NREGS (obj1) == OBJECT_NREGS (obj2)
+        && (OBJECT_NREGS (obj1) != ALLOCNO_NREGS (a1)
+            || OBJECT_NREGS (obj2) != ALLOCNO_NREGS (a2));
+}
 
-  return OBJECTS_CONFLICT_P (obj1, obj2);
+/* Return true if ORIG_DEST_REG and ORIG_SRC_REG can be a move INSN.  */
+bool
+subreg_move_p (rtx orig_dest_reg, rtx orig_src_reg)
+{
+  gcc_assert (REG_SUBREG_P (orig_dest_reg) && REG_SUBREG_P (orig_src_reg));
+  rtx reg1
+    = SUBREG_P (orig_dest_reg) ? SUBREG_REG (orig_dest_reg) : orig_dest_reg;
+  rtx reg2 = SUBREG_P (orig_src_reg) ? SUBREG_REG (orig_src_reg) : 
orig_src_reg;
+  if (HARD_REGISTER_P (reg1) || HARD_REGISTER_P (reg2))
+    return false;
+  ira_allocno_t a1 = ira_curr_regno_allocno_map[REGNO (reg1)];
+  ira_allocno_t a2 = ira_curr_regno_allocno_map[REGNO (reg2)];
+  ira_object_t obj1 = find_object (a1, orig_dest_reg);
+  ira_object_t obj2 = find_object (a2, orig_src_reg);
+  return subreg_move_p (obj1, obj2);
 }
 
-/* Check that X is REG or SUBREG of REG.  */
-#define REG_SUBREG_P(x)                                                        
\
-   (REG_P (x) || (GET_CODE (x) == SUBREG && REG_P (SUBREG_REG (x))))
+/* Return true if OBJ1 and OBJ2 can allocate to the same register.  */
+static bool
+regs_non_conflict_for_copy_p (ira_object_t obj1, ira_object_t obj2,
+                             bool is_move, bool offset_equal)
+{
+  ira_allocno_t a1 = OBJECT_ALLOCNO (obj1);
+  ira_allocno_t a2 = OBJECT_ALLOCNO (obj2);
+  if (is_move && subreg_move_p (obj1, obj2))
+    {
+      if (OBJECTS_CONFLICT_P (obj1, obj2))
+       return false;
+      /* Assume a1 allocate to `OBJECT_START (obj2)` and a2 allocate to
+        `OBJECT_START (obj1)` hard register, so both objects can use the same
+        hard register `OBJECT_START (obj1) + OBJECT_START (obj2)`.  */
+      int start_regno1 = OBJECT_START (obj2);
+      int start_regno2 = OBJECT_START (obj1);
+
+      ira_object_t obj_a, obj_b;
+      ira_allocno_object_iterator oi_a, oi_b;
+      FOR_EACH_ALLOCNO_OBJECT (a1, obj_a, oi_a)
+       FOR_EACH_ALLOCNO_OBJECT (a2, obj_b, oi_b)
+         /* If there have a conflict between a1 and a2 and prevent the
+            allocation before, then obj1 and obj2 cannot be a copy.  */
+         if (OBJECTS_CONFLICT_P (obj_a, obj_b)
+             && !(start_regno1 + OBJECT_START (obj_a) + OBJECT_NREGS (obj_a)
+                    <= (start_regno2 + OBJECT_START (obj_b))
+                  || start_regno2 + OBJECT_START (obj_b) + OBJECT_NREGS (obj_b)
+                       <= (start_regno1 + OBJECT_START (obj_a))))
+             return false;
+
+      return true;
+    }
+  else
+    {
+      /* For normal case, make sure full_obj1 and full_obj2 can allocate to the
+        same register.  */
+      ira_object_t full_obj1 = find_object (a1, 0, ALLOCNO_NREGS (a1));
+      ira_object_t full_obj2 = find_object (a2, 0, ALLOCNO_NREGS (a2));
+      return !OBJECTS_CONFLICT_P (full_obj1, full_obj2) && offset_equal;
+    }
+}
+
+/* Return true if ORIG_REG offset align in ALLOCNO_UNIT_SIZE (A) and times of
+   ALLOCNO_UNIT_SIZE (A). Use to forbidden bellow rtl which has a subreg move 
to
+   create copy (from testsuite/gcc.dg/vect/vect-simd-20.c on AArch64). Suppose
+   they are all allocated to the fourth register, that is, pseudo 127 is
+   allocated to w4, and pseudo 149 is allocated to x4 and x5. Then the third
+   instruction can be safely deleted without affecting the result of pseudo 
149.
+   But when the second instruction is executed, the upper 32 bits of x4 will be
+   set to 0 (the behavior of the add instruction), that is to say, the result 
of
+   pseudo 149 is modified, and its 32~63 bits are set to 0, Not the desired
+   result.
+
+     (set (reg:SI 127)
+         (subreg:SI (reg:TI 149) 0))
+     ...
+     (set (reg:SI 127)
+         (plus:SI (reg:SI 127)
+                  (reg:SI 180)))
+     ...
+     (set (zero_extract:DI (subreg:DI (reg:TI 149) 0)
+                          (const_int 32 [0x20])
+                          (const_int 0 [0]))
+         (subreg:DI (reg:SI 127) 0))  */
+static bool
+subreg_reg_align_and_times_p (ira_allocno_t a, rtx orig_reg)
+{
+  if (!has_subreg_object_p (a) || !SUBREG_P (orig_reg))
+    return true;
+
+  return multiple_p (SUBREG_BYTE (orig_reg), ALLOCNO_UNIT_SIZE (a))
+        && multiple_p (GET_MODE_SIZE (GET_MODE (orig_reg)),
+                       ALLOCNO_UNIT_SIZE (a));
+}
 
 /* Return X if X is a REG, otherwise it should be SUBREG of REG and
    the function returns the reg in this case.  *OFFSET will be set to
@@ -237,8 +327,9 @@ get_freq_for_shuffle_copy (int freq)
    SINGLE_INPUT_OP_HAS_CSTR_P is only meaningful when constraint_p
    is true, see function ira_get_dup_out_num for its meaning.  */
 static bool
-process_regs_for_copy (rtx reg1, rtx reg2, bool constraint_p, rtx_insn *insn,
-                      int freq, bool single_input_op_has_cstr_p = true)
+process_regs_for_copy (rtx orig_reg1, rtx orig_reg2, bool constraint_p,
+                      rtx_insn *insn, int freq,
+                      bool single_input_op_has_cstr_p = true)
 {
   int allocno_preferenced_hard_regno, index, offset1, offset2;
   int cost, conflict_cost, move_cost;
@@ -248,10 +339,10 @@ process_regs_for_copy (rtx reg1, rtx reg2, bool 
constraint_p, rtx_insn *insn,
   machine_mode mode;
   ira_copy_t cp;
 
-  gcc_assert (REG_SUBREG_P (reg1) && REG_SUBREG_P (reg2));
-  only_regs_p = REG_P (reg1) && REG_P (reg2);
-  reg1 = go_through_subreg (reg1, &offset1);
-  reg2 = go_through_subreg (reg2, &offset2);
+  gcc_assert (REG_SUBREG_P (orig_reg1) && REG_SUBREG_P (orig_reg2));
+  only_regs_p = REG_P (orig_reg1) && REG_P (orig_reg2);
+  rtx reg1 = go_through_subreg (orig_reg1, &offset1);
+  rtx reg2 = go_through_subreg (orig_reg2, &offset2);
   /* Set up hard regno preferenced by allocno.  If allocno gets the
      hard regno the copy (or potential move) insn will be removed.  */
   if (HARD_REGISTER_P (reg1))
@@ -270,13 +361,17 @@ process_regs_for_copy (rtx reg1, rtx reg2, bool 
constraint_p, rtx_insn *insn,
     {
       ira_allocno_t a1 = ira_curr_regno_allocno_map[REGNO (reg1)];
       ira_allocno_t a2 = ira_curr_regno_allocno_map[REGNO (reg2)];
+      ira_object_t obj1 = find_object (a1, orig_reg1);
+      ira_object_t obj2 = find_object (a2, orig_reg2);
 
-      if (!allocnos_conflict_for_copy_p (a1, a2)
-         && offset1 == offset2
+      if (subreg_reg_align_and_times_p (a1, orig_reg1)
+         && subreg_reg_align_and_times_p (a2, orig_reg2)
+         && regs_non_conflict_for_copy_p (obj1, obj2, insn != NULL,
+                                          offset1 == offset2)
          && ordered_p (GET_MODE_PRECISION (ALLOCNO_MODE (a1)),
                        GET_MODE_PRECISION (ALLOCNO_MODE (a2))))
        {
-         cp = ira_add_allocno_copy (a1, a2, freq, constraint_p, insn,
+         cp = ira_add_allocno_copy (obj1, obj2, freq, constraint_p, insn,
                                     ira_curr_loop_tree_node);
          bitmap_set_bit (ira_curr_loop_tree_node->local_copies, cp->num);
          return true;
@@ -438,16 +533,15 @@ add_insn_allocno_copies (rtx_insn *insn)
   freq = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn));
   if (freq == 0)
     freq = 1;
-  if ((set = single_set (insn)) != NULL_RTX
-      && REG_SUBREG_P (SET_DEST (set)) && REG_SUBREG_P (SET_SRC (set))
-      && ! side_effects_p (set)
-      && find_reg_note (insn, REG_DEAD,
-                       REG_P (SET_SRC (set))
-                       ? SET_SRC (set)
-                       : SUBREG_REG (SET_SRC (set))) != NULL_RTX)
+  if ((set = single_set (insn)) != NULL_RTX && REG_SUBREG_P (SET_DEST (set))
+      && REG_SUBREG_P (SET_SRC (set)) && !side_effects_p (set)
+      && (find_reg_note (insn, REG_DEAD,
+                        REG_P (SET_SRC (set)) ? SET_SRC (set)
+                                              : SUBREG_REG (SET_SRC (set)))
+           != NULL_RTX
+         || subreg_move_p (SET_DEST (set), SET_SRC (set))))
     {
-      process_regs_for_copy (SET_SRC (set), SET_DEST (set),
-                            false, insn, freq);
+      process_regs_for_copy (SET_SRC (set), SET_DEST (set), false, insn, freq);
       return;
     }
   /* Fast check of possibility of constraint or shuffle copies.  If
@@ -521,16 +615,23 @@ propagate_copies (void)
 
   FOR_EACH_COPY (cp, ci)
     {
-      a1 = cp->first;
-      a2 = cp->second;
+      a1 = OBJECT_ALLOCNO (cp->first);
+      a2 = OBJECT_ALLOCNO (cp->second);
       if (ALLOCNO_LOOP_TREE_NODE (a1) == ira_loop_tree_root)
        continue;
       ira_assert ((ALLOCNO_LOOP_TREE_NODE (a2) != ira_loop_tree_root));
       parent_a1 = ira_parent_or_cap_allocno (a1);
       parent_a2 = ira_parent_or_cap_allocno (a2);
+      ira_object_t parent_obj1
+       = find_object_anyway (parent_a1, OBJECT_START (cp->first),
+                             OBJECT_NREGS (cp->first));
+      ira_object_t parent_obj2
+       = find_object_anyway (parent_a2, OBJECT_START (cp->second),
+                             OBJECT_NREGS (cp->second));
       ira_assert (parent_a1 != NULL && parent_a2 != NULL);
-      if (! allocnos_conflict_for_copy_p (parent_a1, parent_a2))
-       ira_add_allocno_copy (parent_a1, parent_a2, cp->freq,
+      if (regs_non_conflict_for_copy_p (parent_obj1, parent_obj2,
+                                       cp->insn != NULL, true))
+       ira_add_allocno_copy (parent_obj1, parent_obj2, cp->freq,
                              cp->constraint_p, cp->insn, cp->loop_tree_node);
     }
 }
diff --git a/gcc/ira-emit.cc b/gcc/ira-emit.cc
index 9dc7f3c655e..30ff46980f5 100644
--- a/gcc/ira-emit.cc
+++ b/gcc/ira-emit.cc
@@ -1129,11 +1129,11 @@ add_range_and_copies_from_move_list (move_t list, 
ira_loop_tree_node_t node,
       update_costs (to, false, freq);
       cp = ira_add_allocno_copy (from, to, freq, false, move->insn, NULL);
       if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
-       fprintf (ira_dump_file, "    Adding cp%d:a%dr%d-a%dr%d\n",
-                cp->num, ALLOCNO_NUM (cp->first),
-                REGNO (allocno_emit_reg (cp->first)),
-                ALLOCNO_NUM (cp->second),
-                REGNO (allocno_emit_reg (cp->second)));
+       fprintf (ira_dump_file, "    Adding cp%d:a%dr%d-a%dr%d\n", cp->num,
+                ALLOCNO_NUM (OBJECT_ALLOCNO (cp->first)),
+                REGNO (allocno_emit_reg (OBJECT_ALLOCNO (cp->first))),
+                ALLOCNO_NUM (OBJECT_ALLOCNO (cp->second)),
+                REGNO (allocno_emit_reg (OBJECT_ALLOCNO (cp->second))));
 
       nr = ALLOCNO_NUM_OBJECTS (from);
       for (i = 0; i < nr; i++)
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index 9095a8227f7..963e533e448 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -594,9 +594,9 @@ struct ira_allocno_copy
 {
   /* The unique order number of the copy node starting with 0.  */
   int num;
-  /* Allocnos connected by the copy.  The first allocno should have
+  /* Objects connected by the copy.  The first allocno should have
      smaller order number than the second one.  */
-  ira_allocno_t first, second;
+  ira_object_t first, second;
   /* Execution frequency of the copy.  */
   int freq;
   bool constraint_p;
@@ -1046,6 +1046,9 @@ extern void ira_remove_allocno_prefs (ira_allocno_t);
 extern ira_copy_t ira_create_copy (ira_allocno_t, ira_allocno_t,
                                   int, bool, rtx_insn *,
                                   ira_loop_tree_node_t);
+extern ira_copy_t
+ira_add_allocno_copy (ira_object_t, ira_object_t, int, bool, rtx_insn *,
+                     ira_loop_tree_node_t);
 extern ira_copy_t ira_add_allocno_copy (ira_allocno_t, ira_allocno_t, int,
                                        bool, rtx_insn *,
                                        ira_loop_tree_node_t);
@@ -1059,6 +1062,7 @@ extern void ira_destroy (void);
 extern ira_object_t
 find_object (ira_allocno_t, int, int);
 extern ira_object_t find_object (ira_allocno_t, poly_int64, poly_int64);
+extern ira_object_t find_object (ira_allocno_t, rtx);
 ira_object_t
 find_object_anyway (ira_allocno_t a, int start, int nregs);
 extern void ira_copy_allocno_objects (ira_allocno_t, ira_allocno_t);
@@ -1087,6 +1091,8 @@ extern void ira_implicitly_set_insn_hard_regs 
(HARD_REG_SET *,
 /* ira-conflicts.cc */
 extern void ira_debug_conflicts (bool);
 extern void ira_build_conflicts (void);
+extern bool subreg_move_p (ira_object_t, ira_object_t);
+extern bool subreg_move_p (rtx, rtx);
 
 /* ira-color.cc */
 extern ira_allocno_t ira_soft_conflict (ira_allocno_t, ira_allocno_t);
diff --git a/gcc/ira.cc b/gcc/ira.cc
index 9ea57d3b1ea..280ca47a999 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -2853,14 +2853,15 @@ print_redundant_copies (void)
       if (hard_regno >= 0)
        continue;
       for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
-       if (cp->first == a)
+       if (OBJECT_ALLOCNO (cp->first) == a)
          next_cp = cp->next_first_allocno_copy;
        else
          {
            next_cp = cp->next_second_allocno_copy;
            if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL
                && cp->insn != NULL_RTX
-               && ALLOCNO_HARD_REGNO (cp->first) == hard_regno)
+               && ALLOCNO_HARD_REGNO (OBJECT_ALLOCNO (cp->first))
+                    == hard_regno)
              fprintf (ira_dump_file,
                       "        Redundant move from %d(freq %d):%d\n",
                       INSN_UID (cp->insn), cp->freq, hard_regno);
-- 
2.36.3

Reply via email to