Every time the object hash table grows, all entries must be
rearranged. The few last times could be really expensive when the
table contains a lot of entries.

When we do "--all --objects" we know in advance we may need to hash
all objects. Just prepare the hash table big enough, so there won't be
any resizing later on. The hash table is resized a couple times before
prehash_objects() is called. But that's ok because there aren't many
objects by that time (unless you have tons of refs, likely tags..)

On linux-2.6.git:

        before       after
real    0m34.402s    0m33.288s
user    0m34.111s    0m32.863s
sys     0m0.205s     0m0.352s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 object.c   | 21 +++++++++++++++++++--
 object.h   |  2 ++
 revision.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/object.c b/object.c
index 20703f5..bcfd2c6 100644
--- a/object.c
+++ b/object.c
@@ -88,10 +88,10 @@ struct object *lookup_object(const unsigned char *sha1)
        return obj;
 }
 
-static void grow_object_hash(void)
+void grow_object_hash_to(unsigned long nr)
 {
        int i;
-       int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size;
+       int new_hash_size = nr < 32 ? 32 : nr * 2;
        struct object **new_hash;
 
        new_hash = xcalloc(new_hash_size, sizeof(struct object *));
@@ -106,6 +106,11 @@ static void grow_object_hash(void)
        obj_hash_size = new_hash_size;
 }
 
+static void grow_object_hash(void)
+{
+       grow_object_hash_to(obj_hash_size);
+}
+
 void *create_object(const unsigned char *sha1, int type, void *o)
 {
        struct object *obj = o;
@@ -307,3 +312,15 @@ void clear_object_flags(unsigned flags)
                        obj->flags &= ~flags;
        }
 }
+
+int has_object_flags(unsigned flags)
+{
+       int i;
+
+       for (i = 0; i < obj_hash_size; i++) {
+               struct object *obj = obj_hash[i];
+               if (obj && (obj->flags & flags))
+                       return 1;
+       }
+       return 0;
+}
diff --git a/object.h b/object.h
index 97d384b..1e8fee8 100644
--- a/object.h
+++ b/object.h
@@ -52,6 +52,7 @@ extern struct object *get_indexed_object(unsigned int);
  */
 struct object *lookup_object(const unsigned char *sha1);
 
+extern void grow_object_hash_to(unsigned long nr);
 extern void *create_object(const unsigned char *sha1, int type, void *obj);
 
 /*
@@ -87,6 +88,7 @@ void add_object_array(struct object *obj, const char *name, 
struct object_array
 void add_object_array_with_mode(struct object *obj, const char *name, struct 
object_array *array, unsigned mode);
 void object_array_remove_duplicates(struct object_array *);
 
+int has_object_flags(unsigned flags);
 void clear_object_flags(unsigned flags);
 
 #endif /* OBJECT_H */
diff --git a/revision.c b/revision.c
index 71e62d8..f9ea2d1 100644
--- a/revision.c
+++ b/revision.c
@@ -1665,8 +1665,9 @@ static int for_each_good_bisect_ref(const char 
*submodule, each_ref_fn fn, void
 }
 
 static int handle_revision_pseudo_opt(const char *submodule,
-                               struct rev_info *revs,
-                               int argc, const char **argv, int *flags)
+                                     struct rev_info *revs,
+                                     int argc, const char **argv,
+                                     int *flags, int *all)
 {
        const char *arg = argv[0];
        const char *optarg;
@@ -1685,6 +1686,7 @@ static int handle_revision_pseudo_opt(const char 
*submodule,
        if (!strcmp(arg, "--all")) {
                handle_refs(submodule, revs, *flags, for_each_ref_submodule);
                handle_refs(submodule, revs, *flags, head_ref_submodule);
+               *all = 1;
        } else if (!strcmp(arg, "--branches")) {
                handle_refs(submodule, revs, *flags, 
for_each_branch_ref_submodule);
        } else if (!strcmp(arg, "--bisect")) {
@@ -1738,6 +1740,49 @@ static int handle_revision_pseudo_opt(const char 
*submodule,
        return 1;
 }
 
+static void preallocate_hash_table(void)
+{
+       unsigned long cnt = 0;
+       struct packed_git *p;
+       int i;
+
+       if (has_object_flags(UNINTERESTING))
+               /*
+                * nope this is not simply "--all --objects"
+                * not worth preallocation.
+                */
+               return;
+
+       for (i = 0; i < 256; i++) {
+               struct dirent *ent;
+               DIR *d = opendir(git_path("objects/%02x", i));
+               if (!d)
+                       continue;
+               while ((ent = readdir(d)) != NULL)
+                       /*
+                        * We only worry about insufficient size which
+                        * leads to expensive growths later on. A few
+                        * extra slots in the hash table would not hurt.
+                        */
+                       cnt++;
+               closedir(d);
+       }
+
+       if (!packed_git)
+               prepare_packed_git();
+
+       for (p = packed_git; p; p = p->next) {
+               if (!p->pack_local)
+                       /* this may lead to extra growths later */
+                       continue;
+               if (open_pack_index(p))
+                       continue;
+               cnt += p->num_objects;
+       }
+
+       grow_object_hash_to(cnt);
+}
+
 /*
  * Parse revision information, filling in the "rev_info" structure,
  * and removing the used arguments from the argument list.
@@ -1750,6 +1795,7 @@ int setup_revisions(int argc, const char **argv, struct 
rev_info *revs, struct s
        int i, flags, left, seen_dashdash, read_from_stdin, got_rev_arg = 0, 
revarg_opt;
        struct cmdline_pathspec prune_data;
        const char *submodule = NULL;
+       int all = 0;
 
        memset(&prune_data, 0, sizeof(prune_data));
        if (opt)
@@ -1785,8 +1831,9 @@ int setup_revisions(int argc, const char **argv, struct 
rev_info *revs, struct s
                        int opts;
 
                        opts = handle_revision_pseudo_opt(submodule,
-                                               revs, argc - i, argv + i,
-                                               &flags);
+                                                         revs, argc - i,
+                                                         argv + i,
+                                                         &flags, &all);
                        if (opts > 0) {
                                i += opts - 1;
                                continue;
@@ -1856,6 +1903,10 @@ int setup_revisions(int argc, const char **argv, struct 
rev_info *revs, struct s
                              get_pathspec(revs->prefix, prune_data.path));
        }
 
+       if (all && revs->tag_objects &&
+           revs->tree_objects && revs->blob_objects)
+               preallocate_hash_table();
+
        if (revs->def == NULL)
                revs->def = opt ? opt->def : NULL;
        if (opt && opt->tweak)
-- 
1.8.2.83.gc99314b

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to