Re: [PATCH v3 3/4] read-cache: load cache extensions on a worker thread

2018-09-08 Thread Ben Peart




On 9/7/2018 5:10 PM, Junio C Hamano wrote:

Ben Peart  writes:


+struct load_index_extensions
+{
+#ifndef NO_PTHREADS
+   pthread_t pthread;
+#endif
+   struct index_state *istate;
+   void *mmap;
+   size_t mmap_size;
+   unsigned long src_offset;


If the file format only allows uint32_t on any platform, perhaps
this is better specified as uint32_t?  Or if this is offset into
a mmap'ed region of memory, size_t may be more appropriate.

Same comment applies to "extension_offset" we see below (which in
turn means the returned type of read_eoie_extension() function may
want to match).


+ };


Space before '}'??


+
+static void *load_index_extensions(void *_data)
+{
+   struct load_index_extensions *p = _data;


Perhaps we are being superstitious, but I think our code try to
avoid leading underscore when able, i.e.

load_index_extensions(void *data_)
{
struct load_index_extensions *p = data;


That's what I get for copying code from elsewhere in the source. :-)

static void *preload_thread(void *_data)
{
int nr;
struct thread_data *p = _data;

since there isn't any need for the underscore at all, I'll just make it:

static void *load_index_extensions(void *data)
{
struct load_index_extensions *p = data;




+   unsigned long src_offset = p->src_offset;
+
+   while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+   /* After an array of active_nr index entries,
+* there can be arbitrary number of extended
+* sections, each of which is prefixed with
+* extension name (4-byte) and section length
+* in 4-byte network byte order.
+*/
+   uint32_t extsize;
+   memcpy(, (char *)p->mmap + src_offset + 4, 4);
+   extsize = ntohl(extsize);


The same "ntohl(), not get_be32()?" question as the one for the
previous step applies here, too.  I think the answer is "the
original was written that way" and that is acceptable, but once this
series lands, we may want to review the whole file and see if it is
worth making them consistent with a separate clean-up patch.



Makes sense, I'll add a cleanup patch to fix the inconsistency and have 
them use get_be32().



I think mmap() and munmap() are the only places that wants p->mmap
and mmap parameters passed around in various callchains to be of
type "void *"---I wonder if it is simpler to use "const char *"
throughout and only cast it to "void *" when necessary (I suspect
that there is nowhere we need to cast to or from "void *" explicitly
if we did so---assignment and argument passing would give us an
appropriate cast for free)?


Sure, I'll add minimizing the casting to the clean up patch.




+   if (read_index_extension(p->istate,
+   (const char *)p->mmap + src_offset,
+   (char *)p->mmap + src_offset + 8,
+   extsize) < 0) {
+   munmap(p->mmap, p->mmap_size);
+   die("index file corrupt");
+   }
+   ...
@@ -1907,6 +1951,11 @@ ...
...
+   p.mmap = mmap;
+   p.mmap_size = mmap_size;
+
+#ifndef NO_PTHREADS
+   nr_threads = git_config_get_index_threads();
+   if (!nr_threads)
+   nr_threads = online_cpus();
+
+   if (nr_threads >= 2) {
+   extension_offset = read_eoie_extension(mmap, mmap_size);
+   if (extension_offset) {
+   /* create a thread to load the index extensions */
+   p.src_offset = extension_offset;
+   if (pthread_create(, NULL, load_index_extensions, 
))
+   die(_("unable to create 
load_index_extensions_thread"));
+   }
+   }
+#endif


Makes sense.



Re: [PATCH v3 3/4] read-cache: load cache extensions on a worker thread

2018-09-07 Thread Junio C Hamano
Ben Peart  writes:

> +struct load_index_extensions
> +{
> +#ifndef NO_PTHREADS
> + pthread_t pthread;
> +#endif
> + struct index_state *istate;
> + void *mmap;
> + size_t mmap_size;
> + unsigned long src_offset;

If the file format only allows uint32_t on any platform, perhaps
this is better specified as uint32_t?  Or if this is offset into
a mmap'ed region of memory, size_t may be more appropriate.

Same comment applies to "extension_offset" we see below (which in
turn means the returned type of read_eoie_extension() function may
want to match).

> + };

Space before '}'??

> +
> +static void *load_index_extensions(void *_data)
> +{
> + struct load_index_extensions *p = _data;

Perhaps we are being superstitious, but I think our code try to
avoid leading underscore when able, i.e.

load_index_extensions(void *data_)
{
struct load_index_extensions *p = data;

> + unsigned long src_offset = p->src_offset;
> +
> + while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
> + /* After an array of active_nr index entries,
> +  * there can be arbitrary number of extended
> +  * sections, each of which is prefixed with
> +  * extension name (4-byte) and section length
> +  * in 4-byte network byte order.
> +  */
> + uint32_t extsize;
> + memcpy(, (char *)p->mmap + src_offset + 4, 4);
> + extsize = ntohl(extsize);

The same "ntohl(), not get_be32()?" question as the one for the
previous step applies here, too.  I think the answer is "the
original was written that way" and that is acceptable, but once this
series lands, we may want to review the whole file and see if it is
worth making them consistent with a separate clean-up patch.

I think mmap() and munmap() are the only places that wants p->mmap
and mmap parameters passed around in various callchains to be of
type "void *"---I wonder if it is simpler to use "const char *"
throughout and only cast it to "void *" when necessary (I suspect
that there is nowhere we need to cast to or from "void *" explicitly
if we did so---assignment and argument passing would give us an
appropriate cast for free)?

> + if (read_index_extension(p->istate,
> + (const char *)p->mmap + src_offset,
> + (char *)p->mmap + src_offset + 8,
> + extsize) < 0) {
> + munmap(p->mmap, p->mmap_size);
> + die("index file corrupt");
> + }
> + ...
> @@ -1907,6 +1951,11 @@ ...
> ...
> + p.mmap = mmap;
> + p.mmap_size = mmap_size;
> +
> +#ifndef NO_PTHREADS
> + nr_threads = git_config_get_index_threads();
> + if (!nr_threads)
> + nr_threads = online_cpus();
> +
> + if (nr_threads >= 2) {
> + extension_offset = read_eoie_extension(mmap, mmap_size);
> + if (extension_offset) {
> + /* create a thread to load the index extensions */
> + p.src_offset = extension_offset;
> + if (pthread_create(, NULL, 
> load_index_extensions, ))
> + die(_("unable to create 
> load_index_extensions_thread"));
> + }
> + }
> +#endif

Makes sense.


[PATCH v3 3/4] read-cache: load cache extensions on a worker thread

2018-09-06 Thread Ben Peart
This patch helps address the CPU cost of loading the index by loading
the cache extensions on a worker thread in parallel with loading the cache
entries.

In some cases, loading the extensions takes longer than loading the
cache entries so this patch utilizes the new EOIE to start the thread to
load the extensions before loading all the cache entries in parallel.

This is possible because the current extensions don't access the cache
entries in the index_state structure so are OK that they don't all exist
yet.

The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED
extensions don't even get a pointer to the index so don't have access to the
cache entries.

CACHE_EXT_LINK only uses the index_state to initialize the split index.
CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last
update and dirty flags.

I used p0002-read-cache.sh to generate some performance data:

p0002-read-cache.sh w/100,000 files
Baseline Thread extensions
---
21.14(0.03+0.01) 20.71(0.03+0.03) -2.0%

p0002-read-cache.sh w/1,000,000 files
Baseline  Thread extensions
--
295.42(0.01+0.07) 217.60(0.03+0.04) -26.3%

Signed-off-by: Ben Peart 
---
 Documentation/config.txt |  6 +++
 config.c | 18 
 config.h |  1 +
 read-cache.c | 94 
 4 files changed, 102 insertions(+), 17 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 1c42364988..79f8296d9c 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2391,6 +2391,12 @@ imap::
The configuration variables in the 'imap' section are described
in linkgit:git-imap-send[1].
 
+index.threads::
+   Specifies the number of threads to spawn when loading the index.
+   This is meant to reduce index load time on multiprocessor machines.
+   Specifying 0 or 'true' will cause Git to auto-detect the number of
+   CPU's and set the number of threads accordingly. Defaults to 'true'.
+
 index.version::
Specify the version with which new index files should be
initialized.  This does not affect existing repositories.
diff --git a/config.c b/config.c
index 9a0b10d4bc..9bd79fb165 100644
--- a/config.c
+++ b/config.c
@@ -2289,6 +2289,24 @@ int git_config_get_fsmonitor(void)
return 0;
 }
 
+/*
+ * You can disable multi-threaded code by setting index.threads
+ * to 'false' (or 1)
+ */
+int git_config_get_index_threads(void)
+{
+   int is_bool, val;
+
+   if (!git_config_get_bool_or_int("index.threads", _bool, )) {
+   if (is_bool)
+   return val ? 0 : 1;
+   else
+   return val;
+   }
+
+   return 0; /* auto-detect */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
diff --git a/config.h b/config.h
index ab46e0165d..a06027e69b 100644
--- a/config.h
+++ b/config.h
@@ -250,6 +250,7 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 extern int git_config_get_fsmonitor(void);
+extern int git_config_get_index_threads(void);
 
 /* This dies if the configured or default date is in the future */
 extern int git_config_get_expiry(const char *key, const char **output);
diff --git a/read-cache.c b/read-cache.c
index d0d2793780..fcc776aaf0 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -23,6 +23,10 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "fsmonitor.h"
+#ifndef NO_PTHREADS
+#include 
+#include 
+#endif
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1897,6 +1901,46 @@ static unsigned long read_eoie_extension(void *mmap, 
size_t mmap_size);
 #endif
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, unsigned long offset);
 
+struct load_index_extensions
+{
+#ifndef NO_PTHREADS
+   pthread_t pthread;
+#endif
+   struct index_state *istate;
+   void *mmap;
+   size_t mmap_size;
+   unsigned long src_offset;
+ };
+
+static void *load_index_extensions(void *_data)
+{
+   struct load_index_extensions *p = _data;
+   unsigned long src_offset = p->src_offset;
+
+   while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+   /* After an array of active_nr index entries,
+* there can be arbitrary number of extended
+* sections, each of which is prefixed with
+* extension name (4-byte) and section length
+* in 4-byte network byte order.
+*/
+   uint32_t extsize;
+   memcpy(, (char *)p->mmap + src_offset + 4, 4);
+   extsize = ntohl(extsize);
+   if (read_index_extension(p->istate,
+   (const char *)p->mmap + src_offset,
+