On Wed, May 01, 2013 at 04:49:47PM -0400, Jeff King wrote:

> Another avenue I'd like to explore is actually doing a tree-diff from
> the last processed commit, since we should need to examine only the
> changed entries. I suspect it won't be a big benefit, though, because
> even though the tree diff can happen in O(# of entries), we are trying
> to beat doing an O(1) hash for each entry. So it's the same algorithmic
> complexity, and it is hard to beat a few hashcmp() calls. We'll see.

As I suspected, this is not a win:

Test                               origin            this tree
--------------------------------------------------------------------------
0001.1: rev-list --all             0.40(0.37+0.02)   0.40(0.38+0.01) +0.0%
0001.2: rev-list --all --objects   2.22(2.16+0.05)   2.41(2.16+0.24) +8.6%

I've included the patch below for reference. It really does work as
planned; the number of calls to lookup_object for doing "rev-list --all
--objects" on git.git is reduced from ~15M to ~2.6M. But the benefit is
eaten up by the extra tree-processing time. For fun, here's an excerpt
from the "perf diff" between the two versions:

            +10.51%  git                 [.] update_tree_entry
             +8.97%  git                 [.] lookup_object
             +7.81%  git                 [.] process_tree
             +5.20%  libc-2.13.so        [.] __memcmp_sse4_1
             +3.77%  libc-2.13.so        [.] __strlen_sse42
             +2.69%  git                 [.] base_name_compare
             [....]
             -7.90%  git                 [.] tree_entry
            -39.19%  git                 [.] lookup_object

Shawn had suggested trying to store the tree deltas in such a way that
these tree comparisons could be done more cheaply. That might tip things
in favor of the tree-diff approach.

-Peff

---
diff --git a/list-objects.c b/list-objects.c
index 3dd4a96..b87a049 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -59,8 +59,57 @@ static void process_tree(struct rev_info *revs,
        /* Nothing to do */
 }
 
+static struct tree *skip_processed_entries(struct tree_desc *cur,
+                                          struct tree_desc *old)
+{
+       while (cur->size && old->size) {
+               int cmp = base_name_compare(cur->entry.path,
+                                           tree_entry_len(&cur->entry),
+                                           0, /* ignore mode */
+                                           old->entry.path,
+                                           tree_entry_len(&old->entry),
+                                           0);
+               if (cmp > 0) {
+                       /*
+                        * Old tree has something we do not; ignore it, as it
+                        * was already processed.
+                        */
+                       update_tree_entry(old);
+               }
+               else if (cmp == 0) {
+                       /*
+                        * We have the same path; if the sha1s match, then we
+                        * have already processed it and can ignore. Otherwise,
+                        * return for processing, but also provide the old
+                        * tree so that we can recurse.
+                        */
+                       if (!hashcmp(cur->entry.sha1, old->entry.sha1)) {
+                               update_tree_entry(cur);
+                               update_tree_entry(old);
+                       }
+                       else {
+                               struct object *o = 
lookup_object(old->entry.sha1);
+                               update_tree_entry(old);
+                               if (o && o->type == OBJ_TREE)
+                                       return (struct tree *)o;
+                               else
+                                       return NULL;
+                       }
+               }
+               else {
+                       /*
+                        * We have an entry the old one does not; we must look
+                        * at it (and there is no matching old tree to report).
+                        */
+                       return NULL;
+               }
+       }
+       return NULL;
+}
+
 static void process_tree(struct rev_info *revs,
                         struct tree *tree,
+                        struct tree *old_tree,
                         show_object_fn show,
                         struct name_path *path,
                         struct strbuf *base,
@@ -68,7 +117,7 @@ static void process_tree(struct rev_info *revs,
                         void *cb_data)
 {
        struct object *obj = &tree->object;
-       struct tree_desc desc;
+       struct tree_desc desc, old_desc;
        struct name_entry entry;
        struct name_path me;
        enum interesting match = revs->diffopt.pathspec.nr == 0 ?
@@ -97,7 +146,18 @@ static void process_tree(struct rev_info *revs,
 
        init_tree_desc(&desc, tree->buffer, tree->size);
 
-       while (tree_entry(&desc, &entry)) {
+       if (old_tree)
+               init_tree_desc(&old_desc, old_tree->buffer, old_tree->size);
+       else
+               init_tree_desc(&old_desc, NULL, 0);
+
+       while (desc.size) {
+               struct tree *old_tree;
+
+               old_tree = skip_processed_entries(&desc, &old_desc);
+               if (!tree_entry(&desc, &entry))
+                       break;
+
                if (match != all_entries_interesting) {
                        match = tree_entry_interesting(&entry, base, 0,
                                                       &revs->diffopt.pathspec);
@@ -109,7 +169,7 @@ static void process_tree(struct rev_info *revs,
 
                if (S_ISDIR(entry.mode))
                        process_tree(revs,
-                                    lookup_tree(entry.sha1),
+                                    lookup_tree(entry.sha1), old_tree,
                                     show, &me, base, entry.path,
                                     cb_data);
                else if (S_ISGITLINK(entry.mode))
@@ -123,8 +183,6 @@ static void process_tree(struct rev_info *revs,
                                     cb_data);
        }
        strbuf_setlen(base, baselen);
-       free(tree->buffer);
-       tree->buffer = NULL;
 }
 
 static void mark_edge_parents_uninteresting(struct commit *commit,
@@ -173,6 +231,7 @@ void traverse_commit_list(struct rev_info *revs,
        int i;
        struct commit *commit;
        struct strbuf base;
+       struct tree *last_root_tree = NULL;
 
        strbuf_init(&base, PATH_MAX);
        while ((commit = get_revision(revs)) != NULL) {
@@ -196,8 +255,9 @@ void traverse_commit_list(struct rev_info *revs,
                        continue;
                }
                if (obj->type == OBJ_TREE) {
-                       process_tree(revs, (struct tree *)obj, show_object,
-                                    NULL, &base, name, data);
+                       process_tree(revs, (struct tree *)obj, last_root_tree,
+                                    show_object, NULL, &base, name, data);
+                       last_root_tree = (struct tree *)obj;
                        continue;
                }
                if (obj->type == OBJ_BLOB) {
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to