The big changes in this itteration are:

- Switched to index.threads to provide control over the use of threading

- Added another worker thread to load the index extensions in parallel

- Applied optimization expand_name_field() suggested by Duy

The net result of these optimizations is a savings of 25.8% (1,000,000 files)
to 38.1% (100,000 files) as measured by p0002-read-cache.sh.

This patch conflicts with Duy's patch to remove the double memory copy and
pass in the previous ce instead.  The two will need to be merged/reconciled
once they settle down a bit.


Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/39f2b0f5fe
Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v2 
&& git checkout 39f2b0f5fe


### Interdiff (v1..v2):

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 3344685cc4..79f8296d9c 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -899,14 +899,6 @@ relatively high IO latencies.  When enabled, Git will do 
the
 index comparison to the filesystem data in parallel, allowing
 overlapping IO's.  Defaults to true.
 
-core.fastIndex::
-       Enable parallel index loading
-+
-This can speed up operations like 'git diff' and 'git status' especially
-when the index is very large.  When enabled, Git will do the index
-loading from the on disk format to the in-memory format in parallel.
-Defaults to true.
-
 core.createObject::
        You can set this to 'link', in which case a hardlink followed by
        a delete of the source are used to make sure that object creation
@@ -2399,6 +2391,12 @@ imap::
        The configuration variables in the 'imap' section are described
        in linkgit:git-imap-send[1].
 
+index.threads::
+       Specifies the number of threads to spawn when loading the index.
+       This is meant to reduce index load time on multiprocessor machines.
+       Specifying 0 or 'true' will cause Git to auto-detect the number of
+       CPU's and set the number of threads accordingly. Defaults to 'true'.
+
 index.version::
        Specify the version with which new index files should be
        initialized.  This does not affect existing repositories.
diff --git a/config.c b/config.c
index 883092fdd3..3bda124550 100644
--- a/config.c
+++ b/config.c
@@ -2289,17 +2289,18 @@ int git_config_get_fsmonitor(void)
        return 0;
 }
 
-int git_config_get_fast_index(void)
+int git_config_get_index_threads(void)
 {
-       int val;
+       int is_bool, val;
 
-       if (!git_config_get_maybe_bool("core.fastindex", &val))
+       if (!git_config_get_bool_or_int("index.threads", &is_bool, &val)) {
+               if (is_bool)
+                       return val ? 0 : 1;
+               else
                        return val;
+       }
 
-       if (getenv("GIT_FASTINDEX_TEST"))
-               return 1;
-
-       return -1; /* default value */
+       return 0; /* auto-detect */
 }
 
 NORETURN
diff --git a/config.h b/config.h
index 74ca4e7db5..a06027e69b 100644
--- a/config.h
+++ b/config.h
@@ -250,7 +250,7 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 extern int git_config_get_fsmonitor(void);
-extern int git_config_get_fast_index(void);
+extern int git_config_get_index_threads(void);
 
 /* This dies if the configured or default date is in the future */
 extern int git_config_get_expiry(const char *key, const char **output);
diff --git a/read-cache.c b/read-cache.c
index 0fa7e1a04c..f5e7c86c42 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -24,10 +24,6 @@
 #include "utf8.h"
 #include "fsmonitor.h"
 
-#ifndef min
-#define min(a,b) (((a) < (b)) ? (a) : (b))
-#endif
-
 /* Mask for the name length in ce_flags in the on-disk index */
 
 #define CE_NAMEMASK  (0x0fff)
@@ -1758,9 +1754,8 @@ static unsigned long expand_name_field(struct strbuf 
*name, const char *cp_)
 
        if (name->len < len)
                die("malformed name field in the index");
-       strbuf_remove(name, name->len - len, len);
-       for (ep = cp; *ep; ep++)
-               ; /* find the end */
+       strbuf_setlen(name, name->len - len);
+       ep = cp + strlen((const char *)cp);
        strbuf_add(name, cp, ep - cp);
        return (const char *)ep + 1 - cp_;
 }
@@ -1893,7 +1888,13 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
        return ondisk_size + entries * per_entry;
 }
 
-static unsigned long load_cache_entry_block(struct index_state *istate, struct 
mem_pool *ce_mem_pool, int offset, int nr, void *mmap, unsigned long 
start_offset, struct strbuf *previous_name)
+/*
+ * A helper function that will load the specified range of cache entries
+ * from the memory mapped file and add them to the given index.
+ */
+static unsigned long load_cache_entry_block(struct index_state *istate,
+                       struct mem_pool *ce_mem_pool, int offset, int nr, void 
*mmap,
+                       unsigned long start_offset, struct strbuf 
*previous_name)
 {
        int i;
        unsigned long src_offset = start_offset;
@@ -1912,7 +1913,8 @@ static unsigned long load_cache_entry_block(struct 
index_state *istate, struct m
        return src_offset - start_offset;
 }
 
-static unsigned long load_all_cache_entries(struct index_state *istate, void 
*mmap, size_t mmap_size, unsigned long src_offset)
+static unsigned long load_all_cache_entries(struct index_state *istate,
+                       void *mmap, size_t mmap_size, unsigned long src_offset)
 {
        struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
        unsigned long consumed;
@@ -1927,7 +1929,8 @@ static unsigned long load_all_cache_entries(struct 
index_state *istate, void *mm
                              estimate_cache_size(mmap_size, istate->cache_nr));
        }
 
-       consumed = load_cache_entry_block(istate, istate->ce_mem_pool, 0, 
istate->cache_nr, mmap, src_offset, previous_name);
+       consumed = load_cache_entry_block(istate, istate->ce_mem_pool,
+                                       0, istate->cache_nr, mmap, src_offset, 
previous_name);
        strbuf_release(&previous_name_buf);
        return consumed;
 }
@@ -1955,67 +1958,110 @@ struct load_cache_entries_thread_data
        struct mem_pool *ce_mem_pool;
        int offset, nr;
        void *mmap;
+       size_t mmap_size;
        unsigned long start_offset;
        struct strbuf previous_name_buf;
        struct strbuf *previous_name;
        unsigned long consumed; /* return # of bytes in index file processed */
 };
 
-/*
-* A thread proc to run the load_cache_entries() computation
-* across multiple background threads.
-*/
 static void *load_cache_entries_thread(void *_data)
 {
        struct load_cache_entries_thread_data *p = _data;
 
-       p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, 
p->offset, p->nr, p->mmap, p->start_offset, p->previous_name);
+       p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool,
+               p->offset, p->nr, p->mmap, p->start_offset, p->previous_name);
+       return NULL;
+}
+
+static void *load_index_extensions_thread(void *_data)
+{
+       struct load_cache_entries_thread_data *p = _data;
+       unsigned long src_offset = p->start_offset;
+
+       while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+               /* After an array of active_nr index entries,
+                * there can be arbitrary number of extended
+                * sections, each of which is prefixed with
+                * extension name (4-byte) and section length
+                * in 4-byte network byte order.
+                */
+               uint32_t extsize;
+               memcpy(&extsize, (char *)p->mmap + src_offset + 4, 4);
+               extsize = ntohl(extsize);
+               if (read_index_extension(p->istate,
+                                                               (const char 
*)p->mmap + src_offset,
+                                                               (char *)p->mmap 
+ src_offset + 8,
+                                                               extsize) < 0) {
+                       munmap(p->mmap, p->mmap_size);
+                       die("index file corrupt");
+               }
+               src_offset += 8;
+               src_offset += extsize;
+       }
+       p->consumed += src_offset - p->start_offset;
+
        return NULL;
 }
 
-static unsigned long load_cache_entries(struct index_state *istate, void 
*mmap, size_t mmap_size, unsigned long src_offset)
+static unsigned long load_cache_entries(struct index_state *istate,
+                       void *mmap, size_t mmap_size, unsigned long src_offset)
 {
        struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
        struct load_cache_entries_thread_data *data;
-       int threads, cpus, thread_nr;
+       int nr_threads, cpus, ce_per_thread;
        unsigned long consumed;
        int i, thread;
 
+       nr_threads = git_config_get_index_threads();
+       if (!nr_threads) {
                cpus = online_cpus();
-       threads = istate->cache_nr / THREAD_COST;
-       if (threads > cpus)
-               threads = cpus;
+               nr_threads = istate->cache_nr / THREAD_COST;
+               if (nr_threads > cpus)
+                       nr_threads = cpus;
+       }
 
        /* enable testing with fewer than default minimum of entries */
-       if ((istate->cache_nr > 1) && (threads < 2) && 
getenv("GIT_FASTINDEX_TEST"))
-               threads = 2;
+       if ((istate->cache_nr > 1) && (nr_threads < 2) && 
git_env_bool("GIT_INDEX_THREADS_TEST", 0))
+               nr_threads = 2;
 
-       if (threads < 2 || !git_config_get_fast_index())
+       if (nr_threads < 2)
                return load_all_cache_entries(istate, mmap, mmap_size, 
src_offset);
 
+       /* a little sanity checking */
+       if (istate->name_hash_initialized)
+               die("the name hash isn't thread safe");
+
        mem_pool_init(&istate->ce_mem_pool, 0);
        if (istate->version == 4)
                previous_name = &previous_name_buf;
        else
                previous_name = NULL;
 
-       thread_nr = (istate->cache_nr + threads - 1) / threads;
-       data = xcalloc(threads, sizeof(struct load_cache_entries_thread_data));
+       /* allocate an extra thread for loading the index extensions */
+       ce_per_thread = DIV_ROUND_UP(istate->cache_nr, nr_threads);
+       data = xcalloc(nr_threads + 1, sizeof(struct 
load_cache_entries_thread_data));
 
-       /* loop through index entries starting a thread for every thread_nr 
entries */
+       /*
+        * Loop through index entries starting a thread for every ce_per_thread
+        * entries.
+        */
        consumed = thread = 0;
-       for (i = 0; ; i++) {
+       for (i = 0; i < istate->cache_nr; i++) {
                struct ondisk_cache_entry *ondisk;
                const char *name;
                unsigned int flags;
 
-               /* we've reached the begining of a block of cache entries, kick 
off a thread to process them */
-               if (0 == i % thread_nr) {
+               /*
+                * we've reached the beginning of a block of cache entries,
+                * kick off a thread to process them
+                */
+               if (0 == i % ce_per_thread) {
                        struct load_cache_entries_thread_data *p = 
&data[thread];
 
                        p->istate = istate;
                        p->offset = i;
-                       p->nr = min(thread_nr, istate->cache_nr - i);
+                       p->nr = ce_per_thread < istate->cache_nr - i ? 
ce_per_thread : istate->cache_nr - i;
 
                        /* create a mem_pool for each thread */
                        if (istate->version == 4)
@@ -2034,8 +2080,8 @@ static unsigned long load_cache_entries(struct 
index_state *istate, void *mmap,
 
                        if (pthread_create(&p->pthread, NULL, 
load_cache_entries_thread, p))
                                die("unable to create 
load_cache_entries_thread");
-                       if (++thread == threads || p->nr != thread_nr)
-                               break;
+
+                       ++thread;
                }
 
                ondisk = (struct ondisk_cache_entry *)((char *)mmap + 
src_offset);
@@ -2064,7 +2110,18 @@ static unsigned long load_cache_entries(struct 
index_state *istate, void *mmap,
                        src_offset += (name - ((char *)ondisk)) + 
expand_name_field(previous_name, name);
        }
 
-       for (i = 0; i < threads; i++) {
+       /* create a thread to load the index extensions */
+       struct load_cache_entries_thread_data *p = &data[thread];
+       p->istate = istate;
+       mem_pool_init(&p->ce_mem_pool, 0);
+       p->mmap = mmap;
+       p->mmap_size = mmap_size;
+       p->start_offset = src_offset;
+
+       if (pthread_create(&p->pthread, NULL, load_index_extensions_thread, p))
+               die("unable to create load_index_extensions_thread");
+
+       for (i = 0; i < nr_threads + 1; i++) {
                struct load_cache_entries_thread_data *p = data + i;
                if (pthread_join(p->pthread, NULL))
                        die("unable to join load_cache_entries_thread");


### Patches

Ben Peart (3):
  read-cache: speed up index load through parallelization
  read-cache: load cache extensions on worker thread
  read-cache: micro-optimize expand_name_field() to speed up V4 index
    parsing.

 Documentation/config.txt |   6 +
 config.c                 |  14 ++
 config.h                 |   1 +
 read-cache.c             | 281 +++++++++++++++++++++++++++++++++++----
 4 files changed, 275 insertions(+), 27 deletions(-)


base-commit: 29d9e3e2c47dd4b5053b0a98c891878d398463e3
-- 
2.18.0.windows.1


Reply via email to