From: Jean-Jacques Lafay <jeanjacques.la...@gmail.com>
Date: Sat, 10 Nov 2012 18:36:10 +0100

In large repos, the recursion implementation of contains(commit,
commit_list) may result in a stack overflow. Replace the recursion with
a loop to fix it.

This problem is more apparent on Windows than on Linux, where the stack
is more limited by default.

See also this thread on the msysGit list:

        https://groups.google.com/d/topic/msysgit/FqT6boJrb2g/discussion

[jes: re-written to imitate the original recursion more closely]

Thomas Braun pointed out several documentation shortcomings.

Signed-off-by: Jean-Jacques Lafay <jeanjacques.la...@gmail.com>
Signed-off-by: Johannes Schindelin <johannes.schinde...@gmx.de>
Tested-by: Stepan Kasal <ka...@ucw.cz>
Thanks-to: Thomas Braun <thomas.br...@byte-physics.de>
---
 builtin/tag.c  | 81 ++++++++++++++++++++++++++++++++++++++++++++++++----------
 t/t7004-tag.sh | 21 +++++++++++++++
 2 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/builtin/tag.c b/builtin/tag.c
index 74d3780..79c8c28 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -73,11 +73,13 @@ static int in_commit_list(const struct commit_list *want, 
struct commit *c)
        return 0;
 }
 
-static int contains_recurse(struct commit *candidate,
+/*
+ * Test whether the candidate or one of its parents is contained in the list.
+ * Do not recurse to find out, though, but return -1 if inconclusive.
+ */
+static int contains_test(struct commit *candidate,
                            const struct commit_list *want)
 {
-       struct commit_list *p;
-
        /* was it previously marked as containing a want commit? */
        if (candidate->object.flags & TMP_MARK)
                return 1;
@@ -85,26 +87,77 @@ static int contains_recurse(struct commit *candidate,
        if (candidate->object.flags & UNINTERESTING)
                return 0;
        /* or are we it? */
-       if (in_commit_list(want, candidate))
+       if (in_commit_list(want, candidate)) {
+               candidate->object.flags |= TMP_MARK;
                return 1;
+       }
 
        if (parse_commit(candidate) < 0)
                return 0;
 
-       /* Otherwise recurse and mark ourselves for future traversals. */
-       for (p = candidate->parents; p; p = p->next) {
-               if (contains_recurse(p->item, want)) {
-                       candidate->object.flags |= TMP_MARK;
-                       return 1;
-               }
-       }
-       candidate->object.flags |= UNINTERESTING;
-       return 0;
+       return -1;
+}
+
+/*
+ * Mimicking the real stack, this stack lives on the heap, avoiding stack
+ * overflows.
+ *
+ * At each recursion step, the stack items points to the commits whose
+ * ancestors are to be inspected.
+ */
+struct stack {
+       int nr, alloc;
+       struct stack_entry {
+               struct commit *commit;
+               struct commit_list *parents;
+       } *stack;
+};
+
+static void push_to_stack(struct commit *candidate, struct stack *stack)
+{
+       int index = stack->nr++;
+       ALLOC_GROW(stack->stack, stack->nr, stack->alloc);
+       stack->stack[index].commit = candidate;
+       stack->stack[index].parents = candidate->parents;
 }
 
 static int contains(struct commit *candidate, const struct commit_list *want)
 {
-       return contains_recurse(candidate, want);
+       struct stack stack = { 0, 0, NULL };
+       int result = contains_test(candidate, want);
+
+       if (result >= 0)
+               return result;
+
+       push_to_stack(candidate, &stack);
+       while (stack.nr) {
+               struct stack_entry *entry = &stack.stack[stack.nr - 1];
+               struct commit *commit = entry->commit;
+               struct commit_list *parents = entry->parents;
+
+               if (!parents) {
+                       commit->object.flags = UNINTERESTING;
+                       stack.nr--;
+               }
+               /*
+                * If we just popped the stack, parents->item has been marked,
+                * therefore contains_test will return a meaningful 0 or 1.
+                */
+               else switch (contains_test(parents->item, want)) {
+               case 1:
+                       commit->object.flags |= TMP_MARK;
+                       stack.nr--;
+                       break;
+               case 0:
+                       entry->parents = parents->next;
+                       break;
+               default:
+                       push_to_stack(parents->item, &stack);
+                       break;
+               }
+       }
+       free(stack.stack);
+       return contains_test(candidate, want);
 }
 
 static void show_tag_lines(const unsigned char *sha1, int lines)
diff --git a/t/t7004-tag.sh b/t/t7004-tag.sh
index c8d6e9f..edaff13 100755
--- a/t/t7004-tag.sh
+++ b/t/t7004-tag.sh
@@ -1380,4 +1380,25 @@ test_expect_success 'multiple --points-at are OR-ed 
together' '
        test_cmp expect actual
 '
 
+>expect
+# ulimit is a bash builtin; we can rely on that in MinGW, but nowhere else
+test_expect_success MINGW '--contains works in a deep repo' '
+       ulimit -s 64
+       i=1 &&
+       while test $i -lt 1000
+       do
+               echo "commit refs/heads/master
+committer A U Thor <aut...@example.com> $((1000000000 + $i * 100)) +0200
+data <<EOF
+commit #$i
+EOF"
+               test $i = 1 && echo "from refs/heads/master^0"
+               i=$(($i + 1))
+       done | git fast-import &&
+       git checkout master &&
+       git tag far-far-away HEAD^ &&
+       git tag --contains HEAD >actual &&
+       test_cmp expect actual
+'
+
 test_done
-- 
1.9.2.msysgit.0.154.g978f18d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to