The code was using two string_lists, one for the directories and
one for the files.  The code never checks the lists independently
so we should be able to only use one list.  The string_list also
is a O(log n) for lookup and insertion.  Switching this to use a
hashmap will give O(1) which will save some time when there are
millions of paths that will be checked.

Signed-off-by: Kevin Willford <kewi...@microsoft.com>
---
 merge-recursive.c | 48 +++++++++++++++++++++++++++++++++++++-----------
 merge-recursive.h |  3 +--
 2 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index d47f40ea81..ef4fe5f09f 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -24,6 +24,26 @@
 #include "dir.h"
 #include "submodule.h"
 
+struct path_hashmap_entry {
+       struct hashmap_entry;
+       char path[FLEX_ARRAY];
+};
+
+static unsigned int (*pathhash)(const char *path);
+static int (*pathcmp)(const char *a, const char *b);
+
+static int path_hashmap_cmp(const void *cmp_data,
+                           const void *entry,
+                           const void *entry_or_key,
+                           const void *keydata)
+{
+       const struct path_hashmap_entry *a = entry;
+       const struct path_hashmap_entry *b = entry_or_key;
+       const char *key = keydata;
+
+       return pathcmp(a->path, key ? key : b->path);
+}
+
 static void flush_output(struct merge_options *o)
 {
        if (o->buffer_output < 2 && o->obuf.len) {
@@ -314,15 +334,15 @@ static int save_files_dirs(const unsigned char *sha1,
                struct strbuf *base, const char *path,
                unsigned int mode, int stage, void *context)
 {
+       struct path_hashmap_entry *entry;
        int baselen = base->len;
        struct merge_options *o = context;
 
        strbuf_addstr(base, path);
 
-       if (S_ISDIR(mode))
-               string_list_insert(&o->current_directory_set, base->buf);
-       else
-               string_list_insert(&o->current_file_set, base->buf);
+       FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
+       hashmap_entry_init(entry, pathhash(entry->path));
+       hashmap_add(&o->current_file_dir_set, entry);
 
        strbuf_setlen(base, baselen);
        return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -642,6 +662,8 @@ static void add_flattened_path(struct strbuf *out, const 
char *s)
 
 static char *unique_path(struct merge_options *o, const char *path, const char 
*branch)
 {
+       struct path_hashmap_entry dummy;
+       struct path_hashmap_entry *entry;
        struct strbuf newpath = STRBUF_INIT;
        int suffix = 0;
        size_t base_len;
@@ -650,14 +672,17 @@ static char *unique_path(struct merge_options *o, const 
char *path, const char *
        add_flattened_path(&newpath, branch);
 
        base_len = newpath.len;
-       while (string_list_has_string(&o->current_file_set, newpath.buf) ||
-              string_list_has_string(&o->current_directory_set, newpath.buf) ||
+       hashmap_entry_init(&dummy, pathhash(newpath.buf));
+       while (hashmap_get(&o->current_file_dir_set, &dummy, newpath.buf) ||
               (!o->call_depth && file_exists(newpath.buf))) {
                strbuf_setlen(&newpath, base_len);
                strbuf_addf(&newpath, "_%d", suffix++);
+               hashmap_entry_init(&dummy, pathhash(newpath.buf));
        }
 
-       string_list_insert(&o->current_file_set, newpath.buf);
+       FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
+       hashmap_entry_init(entry, pathhash(entry->path));
+       hashmap_add(&o->current_file_dir_set, entry);
        return strbuf_detach(&newpath, NULL);
 }
 
@@ -1941,8 +1966,7 @@ int merge_trees(struct merge_options *o,
        if (unmerged_cache()) {
                struct string_list *entries, *re_head, *re_merge;
                int i;
-               string_list_clear(&o->current_file_set, 1);
-               string_list_clear(&o->current_directory_set, 1);
+               hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 
512);
                get_files_dirs(o, head);
                get_files_dirs(o, merge);
 
@@ -1978,6 +2002,8 @@ int merge_trees(struct merge_options *o,
                string_list_clear(re_head, 0);
                string_list_clear(entries, 1);
 
+               hashmap_free(&o->current_file_dir_set, 1);
+
                free(re_merge);
                free(re_head);
                free(entries);
@@ -2179,8 +2205,8 @@ void init_merge_options(struct merge_options *o)
        if (o->verbosity >= 5)
                o->buffer_output = 0;
        strbuf_init(&o->obuf, 0);
-       string_list_init(&o->current_file_set, 1);
-       string_list_init(&o->current_directory_set, 1);
+       pathhash = ignore_case ? strihash : strhash;
+       pathcmp = ignore_case ? strcasecmp : strcmp;
        string_list_init(&o->df_conflict_file_set, 1);
 }
 
diff --git a/merge-recursive.h b/merge-recursive.h
index 735343b413..80d69d1401 100644
--- a/merge-recursive.h
+++ b/merge-recursive.h
@@ -25,8 +25,7 @@ struct merge_options {
        int show_rename_progress;
        int call_depth;
        struct strbuf obuf;
-       struct string_list current_file_set;
-       struct string_list current_directory_set;
+       struct hashmap current_file_dir_set;
        struct string_list df_conflict_file_set;
 };
 
-- 
2.14.1.329.g6edf0add19

Reply via email to