[PATCH 2/3] commit-slab: avoid large realloc

2013-04-14 Thread Junio C Hamano
Instead of using a single slab and keep reallocating it as we find
that we need to deal with commits with larger values of commit-index,
make a slab an array of many slab_pieces. Each access may need
two levels of indirections, but we only need to reallocate the first
level array of pointers when we have to grow the table this way.

Signed-off-by: Junio C Hamano gits...@pobox.com
---
 commit.c | 62 ++
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/commit.c b/commit.c
index 9365e3b..4c05b39 100644
--- a/commit.c
+++ b/commit.c
@@ -501,34 +501,56 @@ struct commit *pop_commit(struct commit_list **stack)
return item;
 }
 
+struct commit_slab_piece {
+   int buf;
+};
+
 struct commit_slab {
-   int *buf;
-   int alloc;
+   int piece_size;
+   int piece_count;
+   struct commit_slab_piece **piece;
 };
 
 static void slab_init(struct commit_slab *s)
 {
-   memset(s, 0, sizeof(*s));
+   /* allocate ~512kB at once, allowing for malloc overhead */
+   int size = (512*1024-32) / sizeof(struct commit_slab_piece);
+
+   s-piece_size = size;
+   s-piece_count = 0;
+   s-piece = NULL;
 }
 
 static void slab_clear(struct commit_slab *s)
 {
-   free(s-buf);
-   slab_init(s);
+   int i;
+
+   for (i = 0; i  s-piece_count; i++)
+   free(s-piece[i]);
+   s-piece_count = 0;
+   free(s-piece);
+   s-piece = NULL;
 }
 
-static inline int *slab_at(struct commit_slab *s, const struct commit *c)
+static inline struct commit_slab_piece *slab_at(struct commit_slab *s,
+   const struct commit *c)
 {
-   if (s-alloc = c-index) {
-   int new_alloc = alloc_nr(s-alloc);
-   if (new_alloc = c-index)
-   new_alloc = c-index + 1;
-
-   s-buf = xrealloc(s-buf, new_alloc * sizeof(*s-buf));
-   memset(s-buf + s-alloc, 0, new_alloc - s-alloc);
-   s-alloc = new_alloc;
+   int nth_piece, nth_slot;
+
+   nth_piece = c-index / s-piece_size;
+   nth_slot = c-index % s-piece_size;
+
+   if (s-piece_count = nth_piece) {
+   int i;
+
+   s-piece = xrealloc(s-piece, (nth_piece + 1) * 
sizeof(s-piece));
+   for (i = s-piece_count; i = nth_piece; i++)
+   s-piece[i] = NULL;
+   s-piece_count = nth_piece + 1;
}
-   return s-buf + c-index;
+   if (!s-piece[nth_piece])
+   s-piece[nth_piece] = xcalloc(s-piece_size, 
sizeof(**s-piece));
+   return s-piece[nth_piece][nth_slot];
 }
 
 /*
@@ -550,7 +572,7 @@ void sort_in_topological_order(struct commit_list ** list, 
int lifo)
/* Mark them and clear the indegree */
for (next = orig; next; next = next-next) {
struct commit *commit = next-item;
-   *slab_at(indegree, commit) = 1;
+   slab_at(indegree, commit)-buf = 1;
}
 
/* update the indegree */
@@ -558,7 +580,7 @@ void sort_in_topological_order(struct commit_list ** list, 
int lifo)
struct commit_list * parents = next-item-parents;
while (parents) {
struct commit *parent = parents-item;
-   int *pi = slab_at(indegree, parent);
+   int *pi = slab_at(indegree, parent)-buf;
 
if (*pi)
(*pi)++;
@@ -578,7 +600,7 @@ void sort_in_topological_order(struct commit_list ** list, 
int lifo)
for (next = orig; next; next = next-next) {
struct commit *commit = next-item;
 
-   if (*slab_at(indegree, commit) == 1)
+   if (slab_at(indegree, commit)-buf == 1)
insert = commit_list_insert(commit, insert)-next;
}
 
@@ -599,7 +621,7 @@ void sort_in_topological_order(struct commit_list ** list, 
int lifo)
commit = work_item-item;
for (parents = commit-parents; parents ; parents = 
parents-next) {
struct commit *parent = parents-item;
-   int *pi = slab_at(indegree, parent);
+   int *pi = slab_at(indegree, parent)-buf;
 
if (!*pi)
continue;
@@ -620,7 +642,7 @@ void sort_in_topological_order(struct commit_list ** list, 
int lifo)
 * work_item is a commit all of whose children
 * have already been emitted. we can emit it now.
 */
-   *slab_at(indegree, commit) = 0;
+   slab_at(indegree, commit)-buf = 0;
*pptr = work_item;
pptr = work_item-next;
}
-- 
1.8.2.1-514-gf369d36

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH 2/3] commit-slab: avoid large realloc

2013-04-14 Thread Jeff King
On Sat, Apr 13, 2013 at 11:04:48PM -0700, Junio C Hamano wrote:

 Instead of using a single slab and keep reallocating it as we find
 that we need to deal with commits with larger values of commit-index,
 make a slab an array of many slab_pieces. Each access may need
 two levels of indirections, but we only need to reallocate the first
 level array of pointers when we have to grow the table this way.

I don't know if shrinking the size of the realloc is all that big a
deal. We are doubling, so the allocation cost is already amortized
constant time.

Whereas the cost of the extra level of indirection is paid for every
lookup. So for some workloads, I can imagine this actually being slower
(I haven't yet measured it for --topo-order, though).

An interesting side effect of this patch is that the pointers returned
by slab_at() are stable until slab_clear(). It doesn't matter for
--topo-order, but if, for example, you had a recursive function, you
would not have to worry about invalidation in sub-functions. I.e.,
before this patch, this would be wrong:

  static struct commit_slab generation_cache;

  int commit_generation(struct commit *c)
  {
  int *gp = slab_at(generation_cache, c);

  if (!*gp) {
  int max = 0;
  struct commit_list *p;

  parse_commit(c);
  for (p = c-parents; p; p = p-next) {
  int g = commit_generation(p-item);
  if (max  g)
  max = g;
  }

  *gp = max + 1;
  }

  return *gp;
  }

because the recursive calls might invalidate the gp pointer (you would
have to call slab_at repeatedly instead). Whereas with your patch, it's
fine.

  struct commit_slab {
 - int *buf;
 - int alloc;
 + int piece_size;
 + int piece_count;
 + struct commit_slab_piece **piece;
  };

Is there a reason to make piece_size a struct member? It must be
constant after any pieces are allocated. Is the intent that callers
could do:

  slab_init(s);
  /* I know ahead of time we are going to need N of these. */
  s.piece_size = n;

? I think that is OK (though they do not need slab_* in that case at
all), but we should probably have a warning in the struct that
piece_size should never be touched except in that circumstance.

-Peff
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] commit-slab: avoid large realloc

2013-04-14 Thread Junio C Hamano
Jeff King p...@peff.net writes:

 On Sat, Apr 13, 2013 at 11:04:48PM -0700, Junio C Hamano wrote:

 Instead of using a single slab and keep reallocating it as we find
 that we need to deal with commits with larger values of commit-index,
 make a slab an array of many slab_pieces. Each access may need
 two levels of indirections, but we only need to reallocate the first
 level array of pointers when we have to grow the table this way.

 I don't know if shrinking the size of the realloc is all that big a
 deal. We are doubling, so the allocation cost is already amortized
 constant time.

I was more disturbed about copying the actual bytes. One of the
envisioned use of the mechanism is to give us unbound number of flag
bits to paint the history, and also this could be later used to
store larger structures per commit.

Also you can now take a pointer you illustrated (but I snipped
from here) is a good point.

  struct commit_slab {
 -int *buf;
 -int alloc;
 +int piece_size;
 +int piece_count;
 +struct commit_slab_piece **piece;
  };

 Is there a reason to make piece_size a struct member? It must be
 constant after any pieces are allocated. Is the intent that callers
 could do:

   slab_init(s);
   /* I know ahead of time we are going to need N of these. */
   s.piece_size = n;

The piece_size (later slab_size) is to hold the number of commits
each slice (i.e. the piece of memory s-slab[nth_slab] points at)
handles.

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] commit-slab: avoid large realloc

2013-04-14 Thread Jeff King
On Sun, Apr 14, 2013 at 11:51:40AM -0700, Junio C Hamano wrote:

  I don't know if shrinking the size of the realloc is all that big a
  deal. We are doubling, so the allocation cost is already amortized
  constant time.
 
 I was more disturbed about copying the actual bytes. One of the
 envisioned use of the mechanism is to give us unbound number of flag
 bits to paint the history, and also this could be later used to
 store larger structures per commit.

Right; your solution does lower the constant factor. I just don't know
that it matters all that much, since the allocations are so few (i.e.,
log2(N) to get to N items). And you are hurting the hot path of every
access of the flags to do it (by introducing the extra division and
indirection). Probably the extra work on lookup doesn't matter on modern
processors due to pipelining. I'd like to measure a few cases to be
sure, but I probably won't get to it today.

 Also you can now take a pointer you illustrated (but I snipped
 from here) is a good point.

Yeah, assuming the timings are equal, I'd take your solution on that
principle alone.

   struct commit_slab {
  -  int *buf;
  -  int alloc;
  +  int piece_size;
  +  int piece_count;
  +  struct commit_slab_piece **piece;
   };
 
  Is there a reason to make piece_size a struct member? It must be
  constant after any pieces are allocated. Is the intent that callers
  could do:
 
slab_init(s);
/* I know ahead of time we are going to need N of these. */
s.piece_size = n;
 
 The piece_size (later slab_size) is to hold the number of commits
 each slice (i.e. the piece of memory s-slab[nth_slab] points at)
 handles.

Yes, but isn't that a constant:

  (512*1024-32) / sizeof(struct commit_slab_piece)

Leaving it as such lets the compiler optimize better, and is safe from
anybody changing it at runtime. But I think the answer to my question is
yes, that would be the best thing for patch 2, but patch 3 will allow a
run-time stride parameter anyway. Correct?

-Peff
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] commit-slab: avoid large realloc

2013-04-14 Thread Junio C Hamano
Jeff King p...@peff.net writes:

 Yes, but isn't that a constant:

   (512*1024-32) / sizeof(struct commit_slab_piece)

 Leaving it as such lets the compiler optimize better, and is safe from
 anybody changing it at runtime. But I think the answer to my question is
 yes, that would be the best thing for patch 2, but patch 3 will allow a
 run-time stride parameter anyway. Correct?

Yup.
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html