This fixes PR46590 - I've worked on this at the beginning of last
year already but appearantly when deciding to not push the last
bits of the LIM reorg failed to check the full testcase again.

So this fixes memory usage of that large testcase (many loops)
from requiring a peak memory usage of >3.5GB (kills my machine)
to a peak memory usage of ~1GB (peak happens during IRA,
the testcase completes compile in 122s at -O1 and in 160s at -O3).

The main issue with LIM was the accesses_in_loop array which
we keep for each distinct memory reference.  This was an
array [number-of-loops][number-of-accesses-in-that-loop]
to be able to easily walk over all accesses of a memory reference
in a loop and its children (see for_all_locs_in_loop).
Obviously allocating a vec of size number-of-loops of vecs
(even if those remain un-allocated) makes memory requirements
O(number-of-accesses * number-of-loops) - very bad for this
testcase.

The fix is to flatten this to a [number-of-accesses] array
and support easy walking over accesses in a loop and its children
by sorting that array after the locations loop postorder number.
So all accesses are clustered and you can bsearch for a member
of that cluster.

Note that restoring this efficient walking isn't necessary
for the testcase in PR46590 but I've created an artificial
testcase where that improves compile-time from 13s to 0s (LIM time).

The patch below includes some more TLC and optimizations I applied
to LIM until I noticed this issue.

The patch also adds bsearch support to vec<>, alongside its support
for qsort, copying the glibc inline function implementation
(Jakub said we shouldn't rely on bsearch availability nor on
it being implemented with an inline function to make it fast).

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

I'll commit this later today (if testing works out ok).

Thanks,
Richard.

2014-01-17  Richard Biener  <rguent...@suse.de>

        PR tree-optimization/46590
        * vec.h (vec<>::bseach): New member function implementing
        binary search according to C89 bsearch.
        (vec<>::qsort): Avoid calling ::qsort for vectors with sizes 0 or 1.
        * tree-ssa-loop-im.c (struct mem_ref): Make stored member a
        bitmap pointer again.  Make accesses_in_loop a flat array.
        (mem_ref_obstack): New global.
        (outermost_indep_loop): Adjust for mem_ref->stored changes.
        (mark_ref_stored): Likewise.
        (ref_indep_loop_p_2): Likewise.
        (set_ref_stored_in_loop): New helper function.
        (mem_ref_alloc): Allocate mem_refs on the mem_ref_obstack obstack.
        (memref_free): Adjust.
        (record_mem_ref_loc): Simplify.
        (gather_mem_refs_stmt): Adjust.
        (sort_locs_in_loop_postorder_cmp): New function.
        (analyze_memory_references): Sort accesses_in_loop after
        loop postorder number.
        (find_ref_loc_in_loop_cmp): New function.
        (for_all_locs_in_loop): Find relevant cluster of locs in
        accesses_in_loop and iterate without recursion.
        (execute_sm): Avoid uninit warning.
        (struct ref_always_accessed): Simplify.
        (ref_always_accessed::operator ()): Likewise.
        (ref_always_accessed_p): Likewise.
        (tree_ssa_lim_initialize): Initialize mem_ref_obstack, compute
        loop postorder numbers here.
        (tree_ssa_lim_finalize): Free mem_ref_obstack and loop postorder
        numbers.

Index: gcc/vec.h
===================================================================
*** gcc/vec.h.orig      2014-01-07 10:19:52.979454286 +0100
--- gcc/vec.h   2014-01-17 12:18:11.352085931 +0100
*************** public:
*** 476,481 ****
--- 476,482 ----
    void unordered_remove (unsigned);
    void block_remove (unsigned, unsigned);
    void qsort (int (*) (const void *, const void *));
+   T *bsearch (const void *key, int (*compar)(const void *, const void *));
    unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
    static size_t embedded_size (unsigned);
    void embedded_init (unsigned, unsigned = 0);
*************** template<typename T, typename A>
*** 938,944 ****
  inline void
  vec<T, A, vl_embed>::qsort (int (*cmp) (const void *, const void *))
  {
!   ::qsort (address (), length (), sizeof (T), cmp);
  }
  
  
--- 939,981 ----
  inline void
  vec<T, A, vl_embed>::qsort (int (*cmp) (const void *, const void *))
  {
!   if (length () > 1)
!     ::qsort (address (), length (), sizeof (T), cmp);
! }
! 
! 
! /* Search the contents of the sorted vector with a binary search.
!    CMP is the comparison function to pass to bsearch.  */
! 
! template<typename T, typename A>
! inline T *
! vec<T, A, vl_embed>::bsearch (const void *__key,
!                             int (*__compar) (const void *, const void *))
! {
!   const void *__base = this->address ();
!   size_t __nmemb = this->length ();
!   size_t __size = sizeof (T);
!   /* The following is a copy of glibc stdlib-bsearch.h.  */
!   size_t __l, __u, __idx;
!   const void *__p;
!   int __comparison;
! 
!   __l = 0;
!   __u = __nmemb;
!   while (__l < __u)
!     {
!       __idx = (__l + __u) / 2;
!       __p = (const void *) (((const char *) __base) + (__idx * __size));
!       __comparison = (*__compar) (__key, __p);
!       if (__comparison < 0)
!       __u = __idx;
!       else if (__comparison > 0)
!       __l = __idx + 1;
!       else
!       return (T *)const_cast<void *>(__p);
!     }
! 
!   return NULL;
  }
  
  
*************** public:
*** 1174,1179 ****
--- 1211,1217 ----
    void unordered_remove (unsigned);
    void block_remove (unsigned, unsigned);
    void qsort (int (*) (const void *, const void *));
+   T *bsearch (const void *key, int (*compar)(const void *, const void *));
    unsigned lower_bound (T, bool (*)(const T &, const T &)) const;
  
    bool using_auto_storage () const;
*************** vec<T, va_heap, vl_ptr>::qsort (int (*cm
*** 1635,1640 ****
--- 1673,1692 ----
  }
  
  
+ /* Search the contents of the sorted vector with a binary search.
+    CMP is the comparison function to pass to bsearch.  */
+ 
+ template<typename T>
+ inline T *
+ vec<T, va_heap, vl_ptr>::bsearch (const void *key,
+                                 int (*cmp) (const void *, const void *))
+ {
+   if (m_vec)
+     return m_vec->bsearch (key, cmp);
+   return NULL;
+ }
+ 
+ 
  /* Find and return the first position in which OBJ could be inserted
     without changing the ordering of this vector.  LESSTHAN is a
     function that returns true if the first argument is strictly less
Index: gcc/tree-ssa-loop-im.c
===================================================================
*** gcc/tree-ssa-loop-im.c.orig 2014-01-17 12:17:55.057087053 +0100
--- gcc/tree-ssa-loop-im.c      2014-01-17 12:18:27.264084835 +0100
*************** typedef struct mem_ref
*** 126,134 ****
       query meta-data.  */
    ao_ref mem;
  
!   bitmap_head stored;         /* The set of loops in that this memory location
                                   is stored to.  */
!   vec<vec<mem_ref_loc> > accesses_in_loop;
                                /* The locations of the accesses.  Vector
                                   indexed by the loop number.  */
  
--- 126,134 ----
       query meta-data.  */
    ao_ref mem;
  
!   bitmap stored;              /* The set of loops in that this memory location
                                   is stored to.  */
!   vec<mem_ref_loc>            accesses_in_loop;
                                /* The locations of the accesses.  Vector
                                   indexed by the loop number.  */
  
*************** static struct
*** 204,209 ****
--- 204,210 ----
  
  /* Obstack for the bitmaps in the above data structures.  */
  static bitmap_obstack lim_bitmap_obstack;
+ static obstack mem_ref_obstack;
  
  static bool ref_indep_loop_p (struct loop *, mem_ref_p);
  
*************** outermost_indep_loop (struct loop *outer
*** 546,558 ****
  {
    struct loop *aloop;
  
!   if (bitmap_bit_p (&ref->stored, loop->num))
      return NULL;
  
    for (aloop = outer;
         aloop != loop;
         aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
!     if (!bitmap_bit_p (&ref->stored, aloop->num)
        && ref_indep_loop_p (aloop, ref))
        return aloop;
  
--- 547,559 ----
  {
    struct loop *aloop;
  
!   if (ref->stored && bitmap_bit_p (ref->stored, loop->num))
      return NULL;
  
    for (aloop = outer;
         aloop != loop;
         aloop = superloop_at_depth (loop, loop_depth (aloop) + 1))
!     if ((!ref->stored || !bitmap_bit_p (ref->stored, aloop->num))
        && ref_indep_loop_p (aloop, ref))
        return aloop;
  
*************** force_move_till (tree ref, tree *index,
*** 1383,1396 ****
  static void
  memref_free (struct mem_ref *mem)
  {
-   unsigned i;
-   vec<mem_ref_loc> *accs;
- 
-   FOR_EACH_VEC_ELT (mem->accesses_in_loop, i, accs)
-     accs->release ();
    mem->accesses_in_loop.release ();
- 
-   free (mem);
  }
  
  /* Allocates and returns a memory reference description for MEM whose hash
--- 1384,1390 ----
*************** memref_free (struct mem_ref *mem)
*** 1399,1412 ****
  static mem_ref_p
  mem_ref_alloc (tree mem, unsigned hash, unsigned id)
  {
!   mem_ref_p ref = XNEW (struct mem_ref);
    ao_ref_init (&ref->mem, mem);
    ref->id = id;
    ref->hash = hash;
!   bitmap_initialize (&ref->stored, &lim_bitmap_obstack);
    bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
    bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
!   ref->accesses_in_loop.create (0);
  
    return ref;
  }
--- 1393,1406 ----
  static mem_ref_p
  mem_ref_alloc (tree mem, unsigned hash, unsigned id)
  {
!   mem_ref_p ref = XOBNEW (&mem_ref_obstack, struct mem_ref);
    ao_ref_init (&ref->mem, mem);
    ref->id = id;
    ref->hash = hash;
!   ref->stored = NULL;
    bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
    bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
!   ref->accesses_in_loop.create (1);
  
    return ref;
  }
*************** mem_ref_alloc (tree mem, unsigned hash,
*** 1415,1431 ****
     description REF.  The reference occurs in statement STMT.  */
  
  static void
! record_mem_ref_loc (mem_ref_p ref, struct loop *loop, gimple stmt, tree *loc)
  {
    mem_ref_loc aref;
- 
-   if (ref->accesses_in_loop.length ()
-       <= (unsigned) loop->num)
-     ref->accesses_in_loop.safe_grow_cleared (loop->num + 1);
- 
    aref.stmt = stmt;
    aref.ref = loc;
!   ref->accesses_in_loop[loop->num].safe_push (aref);
  }
  
  /* Marks reference REF as stored in LOOP.  */
--- 1409,1431 ----
     description REF.  The reference occurs in statement STMT.  */
  
  static void
! record_mem_ref_loc (mem_ref_p ref, gimple stmt, tree *loc)
  {
    mem_ref_loc aref;
    aref.stmt = stmt;
    aref.ref = loc;
!   ref->accesses_in_loop.safe_push (aref);
! }
! 
! /* Set the LOOP bit in REF stored bitmap and allocate that if
!    necessary.  Return whether a bit was changed.  */
! 
! static bool
! set_ref_stored_in_loop (mem_ref_p ref, struct loop *loop)
! {
!   if (!ref->stored)
!     ref->stored = BITMAP_ALLOC (&lim_bitmap_obstack);
!   return bitmap_set_bit (ref->stored, loop->num);
  }
  
  /* Marks reference REF as stored in LOOP.  */
*************** static void
*** 1434,1440 ****
  mark_ref_stored (mem_ref_p ref, struct loop *loop)
  {
    while (loop != current_loops->tree_root
!        && bitmap_set_bit (&ref->stored, loop->num))
      loop = loop_outer (loop);
  }
  
--- 1434,1440 ----
  mark_ref_stored (mem_ref_p ref, struct loop *loop)
  {
    while (loop != current_loops->tree_root
!        && set_ref_stored_in_loop (ref, loop))
      loop = loop_outer (loop);
  }
  
*************** gather_mem_refs_stmt (struct loop *loop,
*** 1493,1499 ****
            }
        }
  
!       record_mem_ref_loc (ref, loop, stmt, mem);
      }
    bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
    if (is_stored)
--- 1493,1499 ----
            }
        }
  
!       record_mem_ref_loc (ref, stmt, mem);
      }
    bitmap_set_bit (&memory_accesses.refs_in_loop[loop->num], ref->id);
    if (is_stored)
*************** sort_bbs_in_loop_postorder_cmp (const vo
*** 1520,1525 ****
--- 1520,1539 ----
    return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 
1;
  }
  
+ /* qsort sort function to sort ref locs after their loop fathers postorder.  
*/
+ 
+ static int
+ sort_locs_in_loop_postorder_cmp (const void *loc1_, const void *loc2_)
+ {
+   mem_ref_loc *loc1 = (mem_ref_loc *)const_cast<void *>(loc1_);
+   mem_ref_loc *loc2 = (mem_ref_loc *)const_cast<void *>(loc2_);
+   struct loop *loop1 = gimple_bb (loc1->stmt)->loop_father;
+   struct loop *loop2 = gimple_bb (loc2->stmt)->loop_father;
+   if (loop1->num == loop2->num)
+     return 0;
+   return bb_loop_postorder[loop1->num] < bb_loop_postorder[loop2->num] ? -1 : 
1;
+ }
+ 
  /* Gathers memory references in loops.  */
  
  static void
*************** analyze_memory_references (void)
*** 1530,1541 ****
    struct loop *loop, *outer;
    unsigned i, n;
  
-   /* Initialize bb_loop_postorder with a mapping from loop->num to
-      its postorder index.  */
-   i = 0;
-   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
-   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
-     bb_loop_postorder[loop->num] = i++;
    /* Collect all basic-blocks in loops and sort them after their
       loops postorder.  */
    i = 0;
--- 1544,1549 ----
*************** analyze_memory_references (void)
*** 1545,1551 ****
        bbs[i++] = bb;
    n = i;
    qsort (bbs, n, sizeof (basic_block), sort_bbs_in_loop_postorder_cmp);
-   free (bb_loop_postorder);
  
    /* Visit blocks in loop postorder and assign mem-ref IDs in that order.
       That results in better locality for all the bitmaps.  */
--- 1553,1558 ----
*************** analyze_memory_references (void)
*** 1556,1561 ****
--- 1563,1574 ----
          gather_mem_refs_stmt (bb->loop_father, gsi_stmt (bsi));
      }
  
+   /* Sort the location list of gathered memory references after their
+      loop postorder number.  */
+   mem_ref *ref;
+   FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
+     ref->accesses_in_loop.qsort (sort_locs_in_loop_postorder_cmp);
+ 
    free (bbs);
  
    /* Propagate the information about accessed memory references up
*************** mem_refs_may_alias_p (mem_ref_p mem1, me
*** 1612,1617 ****
--- 1625,1646 ----
    return true;
  }
  
+ /* Compare function for bsearch searching for reference locations
+    in a loop.  */
+ 
+ static int
+ find_ref_loc_in_loop_cmp (const void *loop_, const void *loc_)
+ {
+   struct loop *loop = (struct loop *)const_cast<void *>(loop_);
+   mem_ref_loc *loc = (mem_ref_loc *)const_cast<void *>(loc_);
+   struct loop *loc_loop = gimple_bb (loc->stmt)->loop_father;
+   if (loop->num  == loc_loop->num
+       || flow_loop_nested_p (loop, loc_loop))
+     return 0;
+   return (bb_loop_postorder[loop->num] < bb_loop_postorder[loc_loop->num]
+         ? -1 : 1);
+ }
+ 
  /* Iterates over all locations of REF in LOOP and its subloops calling
     fn.operator() with the location as argument.  When that operator
     returns true the iteration is stopped and true is returned.
*************** for_all_locs_in_loop (struct loop *loop,
*** 1623,1638 ****
  {
    unsigned i;
    mem_ref_loc_p loc;
-   struct loop *subloop;
  
!   if (ref->accesses_in_loop.length () > (unsigned) loop->num)
!     FOR_EACH_VEC_ELT (ref->accesses_in_loop[loop->num], i, loc)
!       if (fn (loc))
!       return true;
  
!   for (subloop = loop->inner; subloop != NULL; subloop = subloop->next)
!     if (for_all_locs_in_loop (subloop, ref, fn))
!       return true;
  
    return false;
  }
--- 1652,1685 ----
  {
    unsigned i;
    mem_ref_loc_p loc;
  
!   /* Search for the cluster of locs in the accesses_in_loop vector
!      which is sorted after postorder index of the loop father.  */
!   loc = ref->accesses_in_loop.bsearch (loop, find_ref_loc_in_loop_cmp);
!   if (!loc)
!     return false;
  
!   /* We have found one location inside loop or its sub-loops.  Iterate
!      both forward and backward to cover the whole cluster.  */
!   i = loc - ref->accesses_in_loop.address ();
!   while (i > 0)
!     {
!       --i;
!       mem_ref_loc_p l = &ref->accesses_in_loop[i];
!       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
!       break;
!       if (fn (l))
!       return true;
!     }
!   for (i = loc - ref->accesses_in_loop.address ();
!        i < ref->accesses_in_loop.length (); ++i)
!     {
!       mem_ref_loc_p l = &ref->accesses_in_loop[i];
!       if (!flow_bb_inside_loop_p (loop, gimple_bb (l->stmt)))
!       break;
!       if (fn (l))
!       return true;
!     }
  
    return false;
  }
*************** execute_sm_if_changed_flag_set (struct l
*** 1871,1877 ****
  static void
  execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
  {
!   tree tmp_var, store_flag;
    unsigned i;
    gimple load;
    struct fmt_data fmt_data;
--- 1918,1924 ----
  static void
  execute_sm (struct loop *loop, vec<edge> exits, mem_ref_p ref)
  {
!   tree tmp_var, store_flag = NULL_TREE;
    unsigned i;
    gimple load;
    struct fmt_data fmt_data;
*************** hoist_memory_references (struct loop *lo
*** 1958,1968 ****
  
  struct ref_always_accessed
  {
!   ref_always_accessed (struct loop *loop_, tree base_, bool stored_p_)
!       : loop (loop_), base (base_), stored_p (stored_p_) {}
    bool operator () (mem_ref_loc_p loc);
    struct loop *loop;
-   tree base;
    bool stored_p;
  };
  
--- 2005,2014 ----
  
  struct ref_always_accessed
  {
!   ref_always_accessed (struct loop *loop_, bool stored_p_)
!       : loop (loop_), stored_p (stored_p_) {}
    bool operator () (mem_ref_loc_p loc);
    struct loop *loop;
    bool stored_p;
  };
  
*************** ref_always_accessed::operator () (mem_re
*** 1978,1993 ****
       stores to the reference.  */
    if (stored_p)
      {
!       tree lhs;
!       if (!gimple_get_lhs (loc->stmt))
!       return false;
!       lhs = get_base_address (gimple_get_lhs (loc->stmt));
!       if (!lhs)
!       return false;
!       if (INDIRECT_REF_P (lhs)
!         || TREE_CODE (lhs) == MEM_REF)
!       lhs = TREE_OPERAND (lhs, 0);
!       if (lhs != base)
        return false;
      }
  
--- 2024,2032 ----
       stores to the reference.  */
    if (stored_p)
      {
!       tree lhs = gimple_get_lhs (loc->stmt);
!       if (!lhs
!         || lhs != *loc->ref)
        return false;
      }
  
*************** ref_always_accessed::operator () (mem_re
*** 2008,2019 ****
  static bool
  ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
-   tree base = ao_ref_base (&ref->mem);
-   if (TREE_CODE (base) == MEM_REF)
-     base = TREE_OPERAND (base, 0);
- 
    return for_all_locs_in_loop (loop, ref,
!                              ref_always_accessed (loop, base, stored_p));
  }
  
  /* Returns true if REF1 and REF2 are independent.  */
--- 2047,2054 ----
  static bool
  ref_always_accessed_p (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
    return for_all_locs_in_loop (loop, ref,
!                              ref_always_accessed (loop, stored_p));
  }
  
  /* Returns true if REF1 and REF2 are independent.  */
*************** ref_indep_loop_p_1 (struct loop *loop, m
*** 2090,2096 ****
  static bool
  ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
!   stored_p |= bitmap_bit_p (&ref->stored, loop->num);
  
    if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
      return true;
--- 2125,2131 ----
  static bool
  ref_indep_loop_p_2 (struct loop *loop, mem_ref_p ref, bool stored_p)
  {
!   stored_p |= (ref->stored && bitmap_bit_p (ref->stored, loop->num));
  
    if (bitmap_bit_p (&ref->indep_loop, LOOP_DEP_BIT (loop->num, stored_p)))
      return true;
*************** fill_always_executed_in (void)
*** 2370,2378 ****
--- 2405,2415 ----
  static void
  tree_ssa_lim_initialize (void)
  {
+   struct loop *loop;
    unsigned i;
  
    bitmap_obstack_initialize (&lim_bitmap_obstack);
+   gcc_obstack_init (&mem_ref_obstack);
    lim_aux_data_map = pointer_map_create ();
  
    if (flag_tm)
*************** tree_ssa_lim_initialize (void)
*** 2404,2409 ****
--- 2441,2453 ----
      }
  
    memory_accesses.ttae_cache = NULL;
+ 
+   /* Initialize bb_loop_postorder with a mapping from loop->num to
+      its postorder index.  */
+   i = 0;
+   bb_loop_postorder = XNEWVEC (unsigned, number_of_loops (cfun));
+   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+     bb_loop_postorder[loop->num] = i++;
  }
  
  /* Cleans up after the invariant motion pass.  */
*************** tree_ssa_lim_finalize (void)
*** 2428,2433 ****
--- 2472,2478 ----
    FOR_EACH_VEC_ELT (memory_accesses.refs_list, i, ref)
      memref_free (ref);
    memory_accesses.refs_list.release ();
+   obstack_free (&mem_ref_obstack, NULL);
  
    memory_accesses.refs_in_loop.release ();
    memory_accesses.refs_stored_in_loop.release ();
*************** tree_ssa_lim_finalize (void)
*** 2435,2440 ****
--- 2480,2487 ----
  
    if (memory_accesses.ttae_cache)
      free_affine_expand_cache (&memory_accesses.ttae_cache);
+ 
+   free (bb_loop_postorder);
  }
  
  /* Moves invariants from loops.  Only "expensive" invariants are moved out --

Reply via email to