Re: [PATCH v1] teach git to support a virtual (partially populated) work directory

2018-11-29 Thread Ben Peart



On 11/28/2018 8:31 AM, SZEDER Gábor wrote:

On Tue, Nov 27, 2018 at 02:50:57PM -0500, Ben Peart wrote:


diff --git a/t/t1092-virtualworkdir.sh b/t/t1092-virtualworkdir.sh
new file mode 100755
index 00..0cdfe9b362
--- /dev/null
+++ b/t/t1092-virtualworkdir.sh
@@ -0,0 +1,393 @@
+#!/bin/sh
+
+test_description='virtual work directory tests'
+
+. ./test-lib.sh
+
+# We need total control of the virtual work directory hook
+sane_unset GIT_TEST_VIRTUALWORKDIR
+
+clean_repo () {
+   rm .git/index &&
+   git -c core.virtualworkdir=false reset --hard HEAD &&
+   git -c core.virtualworkdir=false clean -fd &&
+   touch untracked.txt &&

We would usually run '>untracked.txt' instead, sparing the external
process.

A further nit is that a function called 'clean_repo' creates new
untracked files...


Thanks, all good suggestions I've incorporated for the next iteration.




+   touch dir1/untracked.txt &&
+   touch dir2/untracked.txt
+}
+
+test_expect_success 'setup' '
+   mkdir -p .git/hooks/ &&
+   cat > .gitignore <<-\EOF &&

CodingGuidelines suggest no space between redirection operator and
filename.


+   .gitignore
+   expect*
+   actual*
+   EOF
+   touch file1.txt &&
+   touch file2.txt &&
+   mkdir -p dir1 &&
+   touch dir1/file1.txt &&
+   touch dir1/file2.txt &&
+   mkdir -p dir2 &&
+   touch dir2/file1.txt &&
+   touch dir2/file2.txt &&
+   git add . &&
+   git commit -m "initial" &&
+   git config --local core.virtualworkdir true
+'



+test_expect_success 'verify files not listed are ignored by git clean -f -x' '
+   clean_repo &&

I find it odd to clean the repo right after setting it up; but then
again, 'clean_repo' not only cleans, but also creates new files.
Perhaps rename it to 'reset_repo'?  Dunno.


+   write_script .git/hooks/virtual-work-dir <<-\EOF &&
+   printf "untracked.txt\0"
+   printf "dir1/\0"
+   EOF
+   mkdir -p dir3 &&
+   touch dir3/untracked.txt &&
+   git clean -f -x &&
+   test -f file1.txt &&

Please use the 'test_path_is_file', ...


+   test -f file2.txt &&
+   test ! -f untracked.txt &&

... 'test_path_is_missing', and ...


+   test -d dir1 &&

... 'test_path_is_dir' helpers, respectively, because they print
informative error messages on failure.


+   test -f dir1/file1.txt &&
+   test -f dir1/file2.txt &&
+   test ! -f dir1/untracked.txt &&
+   test -f dir2/file1.txt &&
+   test -f dir2/file2.txt &&
+   test -f dir2/untracked.txt &&
+   test -d dir3 &&
+   test -f dir3/untracked.txt
+'


Re: [PATCH v1] mem_pool: add GIT_TRACE_MEMPOOL support

2018-11-29 Thread Ben Peart



On 11/28/2018 4:37 AM, Johannes Schindelin wrote:

Hi Ben,

On Tue, 27 Nov 2018, Ben Peart wrote:


From: Ben Peart 

Add tracing around initializing and discarding mempools. In discard report
on the amount of memory unused in the current block to help tune setting
the initial_size.

Signed-off-by: Ben Peart 
---

Looks good.

My only question: should we also trace calls to _alloc(), _calloc() and
_combine()?


I was trying to tune the initial size in my use of the mem_pool and so 
found this tracing useful to see how much memory was actually being 
used.  I'm inclined to only add tracing as it is needed rather that 
proactively because we think it _might_ be needed.  I suspect _alloc() 
and _calloc() would get very noisy and not add much value.




Ciao,
Johannes


Notes:
 Base Ref: * git-trace-mempool
 Web-Diff: https://github.com/benpeart/git/commit/9ac84bbca2
 Checkout: git fetch https://github.com/benpeart/git git-trace-mempool-v1 
&& git checkout 9ac84bbca2

  mem-pool.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/mem-pool.c b/mem-pool.c
index a2841a4a9a..065389aaec 100644
--- a/mem-pool.c
+++ b/mem-pool.c
@@ -5,6 +5,7 @@
  #include "cache.h"
  #include "mem-pool.h"
  
+static struct trace_key trace_mem_pool = TRACE_KEY_INIT(MEMPOOL);

  #define BLOCK_GROWTH_SIZE 1024*1024 - sizeof(struct mp_block);
  
  /*

@@ -48,12 +49,16 @@ void mem_pool_init(struct mem_pool **mem_pool, size_t 
initial_size)
mem_pool_alloc_block(pool, initial_size, NULL);
  
  	*mem_pool = pool;

+   trace_printf_key(_mem_pool, "mem_pool (%p): init (%"PRIuMAX") initial 
size\n",
+   pool, (uintmax_t)initial_size);
  }
  
  void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory)

  {
struct mp_block *block, *block_to_free;
  
+	trace_printf_key(_mem_pool, "mem_pool (%p): discard (%"PRIuMAX") unused\n",

+   mem_pool, (uintmax_t)(mem_pool->mp_block->end - 
mem_pool->mp_block->next_free));
block = mem_pool->mp_block;
while (block)
{

base-commit: bb75be6cb916297f271c846f2f9caa3daaaec718
--
2.18.0.windows.1




[PATCH v1] mem_pool: add GIT_TRACE_MEMPOOL support

2018-11-27 Thread Ben Peart
From: Ben Peart 

Add tracing around initializing and discarding mempools. In discard report
on the amount of memory unused in the current block to help tune setting
the initial_size.

Signed-off-by: Ben Peart 
---

Notes:
Base Ref: * git-trace-mempool
Web-Diff: https://github.com/benpeart/git/commit/9ac84bbca2
Checkout: git fetch https://github.com/benpeart/git git-trace-mempool-v1 && 
git checkout 9ac84bbca2

 mem-pool.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/mem-pool.c b/mem-pool.c
index a2841a4a9a..065389aaec 100644
--- a/mem-pool.c
+++ b/mem-pool.c
@@ -5,6 +5,7 @@
 #include "cache.h"
 #include "mem-pool.h"
 
+static struct trace_key trace_mem_pool = TRACE_KEY_INIT(MEMPOOL);
 #define BLOCK_GROWTH_SIZE 1024*1024 - sizeof(struct mp_block);
 
 /*
@@ -48,12 +49,16 @@ void mem_pool_init(struct mem_pool **mem_pool, size_t 
initial_size)
mem_pool_alloc_block(pool, initial_size, NULL);
 
*mem_pool = pool;
+   trace_printf_key(_mem_pool, "mem_pool (%p): init (%"PRIuMAX") 
initial size\n",
+   pool, (uintmax_t)initial_size);
 }
 
 void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory)
 {
struct mp_block *block, *block_to_free;
 
+   trace_printf_key(_mem_pool, "mem_pool (%p): discard (%"PRIuMAX") 
unused\n",
+   mem_pool, (uintmax_t)(mem_pool->mp_block->end - 
mem_pool->mp_block->next_free));
block = mem_pool->mp_block;
while (block)
{

base-commit: bb75be6cb916297f271c846f2f9caa3daaaec718
-- 
2.18.0.windows.1



[PATCH v1] teach git to support a virtual (partially populated) work directory

2018-11-27 Thread Ben Peart
From: Ben Peart 

To make git perform well on the very largest repos, we must make git
operations O(modified) instead of O(size of repo).  This takes advantage of
the fact that the number of files a developer has modified (especially
in very large repos) is typically a tiny fraction of the overall repo size.

We accomplished this by utilizing the existing internal logic for the skip
worktree bit and excludes to tell git to ignore all files and folders other
than those that have been modified.  This logic is driven by an external
process that monitors writes to the repo and communicates the list of files
and folders with changes to git via the virtual work directory hook in this
patch.

The external process maintains a list of files and folders that have been
modified.  When git runs, it requests the list of files and folders that
have been modified via the virtual work directory hook.  Git then sets/clears
the skip-worktree bit on the cache entries and builds a hashmap of the
modified files/folders that is used by the excludes logic to avoid scanning
the entire repo looking for changes and untracked files.

With this system, we have been able to make local git command performance on
extremely large repos (millions of files, 1/2 million folders) entirely
manageable (30 second checkout, 3.5 seconds status, 4 second add, 7 second
commit, etc).

On index load, clear/set the skip worktree bits based on the virtual
work directory data. Use virtual work directory data to update skip-worktree
bit in unpack-trees. Use virtual work directory data to exclude files and
folders not explicitly requested.

Signed-off-by: Ben Peart 
---

I believe I've incorporated all the feedback from the RFC.  Renaming the
feature, updating the setting to be a boolean with a hard coded hook name,
labeling the feature "experimental," and only calling get_dtype() if the
feature is turned on.

If there are other suggestions on how to ensure this is a useful and general
purpose feature please let me know.

Notes:
Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/65c3ca2e5f
Checkout: git fetch https://github.com/benpeart/git virtual-workdir-v1 && 
git checkout 65c3ca2e5f

 Documentation/config/core.txt |   9 +
 Documentation/githooks.txt|  23 ++
 Makefile  |   1 +
 cache.h   |   1 +
 config.c  |  32 ++-
 config.h  |   1 +
 dir.c |  26 ++-
 environment.c |   1 +
 read-cache.c  |   2 +
 t/t1092-virtualworkdir.sh | 393 ++
 unpack-trees.c|  23 +-
 virtualworkdir.c  | 314 +++
 virtualworkdir.h  |  25 +++
 13 files changed, 843 insertions(+), 8 deletions(-)
 create mode 100755 t/t1092-virtualworkdir.sh
 create mode 100644 virtualworkdir.c
 create mode 100644 virtualworkdir.h

diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt
index d0e6635fe0..49b7699a4e 100644
--- a/Documentation/config/core.txt
+++ b/Documentation/config/core.txt
@@ -68,6 +68,15 @@ core.fsmonitor::
avoiding unnecessary processing of files that have not changed.
See the "fsmonitor-watchman" section of linkgit:githooks[5].
 
+core.virtualWorkDir::
+   Please regard this as an experimental feature.
+   If set to true, utilize the virtual-work-dir hook to identify all
+   files and directories that are present in the working directory.
+   Git will only track and update files listed in the virtual work
+   directory.  Using the virtual work directory will supersede the
+   sparse-checkout settings which will be ignored.
+   See the "virtual-work-dir" section of linkgit:githooks[6].
+
 core.trustctime::
If false, the ctime differences between the index and the
working tree are ignored; useful when the inode change time
diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt
index 959044347e..9888d504b4 100644
--- a/Documentation/githooks.txt
+++ b/Documentation/githooks.txt
@@ -485,6 +485,29 @@ The exit status determines whether git will use the data 
from the
 hook to limit its search.  On error, it will fall back to verifying
 all files and folders.
 
+virtual-work-dir
+
+
+Please regard this as an experimental feature.
+
+The "Virtual Work Directory" hook allows populating the working directory
+sparsely. The virtual work directory data is typically automatically
+generated by an external process.  Git will limit what files it checks for
+changes as well as which directories are checked for untracked files based
+on the path names given. Git will also only update those files listed in the
+virtual work directory.
+
+The hook is invoked when the configuration option core.virtualWorkDir is
+set to true.  The hook takes one argument, a version (currently 

Re: [PATCH 2/5] ieot: default to not writing IEOT section

2018-11-26 Thread Ben Peart




On 11/26/2018 2:59 PM, Stefan Beller wrote:

+static int record_ieot(void)
+{
+ int val;
+


Initialize stack val to zero to ensure proper default.


I don't think that is needed here, as we only use `val` when
we first write to it via git_config_get_bool.

Did you spot this via code review and thought of
defensive programming or is there a tool that
has a false positive here?



Code review and defensive programming.  I had to review the code in 
git_config_get_bool() to see if it always initialized the val even if it 
didn't find the requested config variable (esp since we don't pass in a 
default value for this function like we do others).





+ if (!git_config_get_bool("index.recordoffsettable", ))
+ return val;
+ return 0;
+}


Re: [PATCH 1/5] eoie: default to not writing EOIE section

2018-11-20 Thread Ben Peart




On 11/20/2018 1:11 AM, Jonathan Nieder wrote:

Since 3b1d9e04 (eoie: add End of Index Entry (EOIE) extension,
2018-10-10) Git defaults to writing the new EOIE section when writing
out an index file.  Usually that is a good thing because it improves
threaded performance, but when a Git repository is shared with older
versions of Git, it produces a confusing warning:

   $ git status
   ignoring EOIE extension
   HEAD detached at 371ed0defa
   nothing to commit, working tree clean

Let's introduce the new index extension more gently.  First we'll roll
out the new version of Git that understands it, and then once
sufficiently many users are using such a version, we can flip the
default to writing it by default.

Introduce a '[index] recordEndOfIndexEntries' configuration variable
to allow interested users to benefit from this index extension early.

Signed-off-by: Jonathan Nieder 
---
Rebased.  No other change from v1.

As Jonathan pointed out, it would be nice to have tests here.  Ben,
any advice for how I could write some in a followup change?  E.g. does
Derrick Stolee's tracing based testing trick apply here?



I suppose a 'test-dump-eoie' could be written along the lines of 
test-dump-fsmonitor or test-dump-untracked-cache.  Unlike those, there 
isn't much state to dump other than the existence of the extension and 
the offset.  That could be used to test that the new settings are 
working properly.




Re: [PATCH 5/5] index: offer advice for unknown index extensions

2018-11-20 Thread Ben Peart




On 11/20/2018 4:26 AM, Ævar Arnfjörð Bjarmason wrote:


On Tue, Nov 20 2018, Jonathan Nieder wrote:

Just commenting here on the end-state of this since it's easier than
each patch at a time:

First, do we still need to be doing %.4s instead of just %s? It would be
easier for translators / to understand what's going on if it were just
%s. I.e. "this is the extension name" v.s. "this is the first 4 bytes of
whatever it is...".


return error("index uses %.4s extension, which we do not 
understand",
 ext);


Missing _(). Not the fault of this series, but something to fix while
we're at it.

Also not the fault of this series, the "is this upper case" test is
unportable, but this is probably the tip of the iceberg for git not
working on EBCDIC systems.

This message should say something like "Index uses the mandatory %s
extension" to clarify and distinguish it from the below. We don't
understand the upper-case one either, but the important distinction is
that one is mandatory, and the other can be dropped. The two messages
should make this clear.

Also, having advice() for that case is even more valuable since we have
a hard error at this point. So something like:

 "This is likely due to the index having been written by a future
 version of Git. All-lowercase index extensions are mandatory, as
 opposed to optional all-uppercase ones which we'll drop with a
 warning if we see them".



I agree that we should have different messages for mandatory and 
optional extensions.  I don't think we should try and educate the end 
user on the implementation detail that git makes lower cases mandatory 
and upper case optional (ie drop the 'All-lowercase..." part).  They 
will never see the lower vs upper case difference and can't do anything 
about it anyway.



trace_printf("ignoring %.4s extension\n", ext);
+   if (advice_unknown_index_extension) {
+   warning(_("ignoring optional %.4s index extension"), 
ext);


Should start with upper-case. Good that it says "optional".


+   advise(_("This is likely due to the file having been written 
by a newer\n"
+"version of Git than is reading it. You can upgrade 
Git to\n"
+"take advantage of performance improvements from 
the updated\n"
+"file format.\n"


Let's not promise performance improvements with this extension in a
future version. We have no idea what the extension is, yeah right now
it's going to be true for the extension that prompted this patch series,
but may not be in the future. So just something like this for the last
sentence:

 You can try upgrading Git to use this new index format.


Agree - not all are guaranteed to be perf related.




+"\n"
+"Run \"%s\"\n"
+"to suppress this message."),
+  "git config advice.unknownIndexExtension false");


Somewhat of an aside, but if I grep:

 git grep -C10 'git config advice\..*false' -- '*.[ch]'

There's a few existing examples of this, but the majority of advice()
messages don't say in the message how you can turn these off. Do we
think this a message users would especially like to turn off? I have the
opposite impression, it's a one-off in most cases, although not in the
case where an editor has an embedded git.

I think it would make sense to add this sort of thing to the advice()
API, i.e.:

 advice_with_config_hint(_(""), "unknownIndexExtension");

Which would then know how to consistently print this advice about how to
turn off the warning.



I like this.  I personally never knew you could turn of the "spent xxx 
seconds finding untracked files" advice until I worked on this patch 
series. This would help make that feature more discoverable.


Re: [PATCH 4/5] index: make index.threads=true enable ieot and eoie

2018-11-20 Thread Ben Peart




On 11/20/2018 1:14 AM, Jonathan Nieder wrote:

If a user explicitly sets

[index]
threads = true

to read the index using multiple threads, ensure that index writes
include the offset table by default to make that possible.  This
ensures that the user's intent of turning on threading is respected.

In other words, permit the following configurations:

- index.threads and index.recordOffsetTable unspecified: do not write
   the offset table yet (to avoid alarming the user with "ignoring IEOT
   extension" messages when an older version of Git accesses the
   repository) but do make use of multiple threads to read the index if
   the supporting offset table is present.

   This can also be requested explicitly by setting index.threads=true,
   0, or >1 and index.recordOffsetTable=false.

- index.threads=false or 1: do not write the offset table, and do not
   make use of the offset table.

   One can set index.recordOffsetTable=false as well, to be more
   explicit.

- index.threads=true, 0, or >1 and index.recordOffsetTable unspecified:
   write the offset table and make use of threads at read time.

   This can also be requested by setting index.threads=true, 0, >1, or
   unspecified and index.recordOffsetTable=true.

Fortunately the complication is temporary: once most Git installations
have upgraded to a version with support for the IEOT and EOIE
extensions, we can flip the defaults for index.recordEndOfIndexEntries
and index.recordOffsetTable to true and eliminate the settings.



This looks good.  I think this provides good default behavior while 
enabling fine grained control to those who want/need it.


I'm looking forward to the day when we can turn it back on by default so 
that people can take advantage of the speed improvements.




Re: [PATCH 2/5] ieot: default to not writing IEOT section

2018-11-20 Thread Ben Peart




On 11/20/2018 1:12 AM, Jonathan Nieder wrote:

As with EOIE, popular versions of Git do not support the new IEOT
extension yet.  When accessing a Git repository written by a more
modern version of Git, they correctly ignore the unrecognized section,
but in the process they loudly warn

ignoring IEOT extension

resulting in confusion for users.  Introduce the index extension more
gently by not writing it yet in this first version with support for
it.  Soon, once sufficiently many users are running a modern version
of Git, we can flip the default so users benefit from this index
extension by default.

Introduce a '[index] recordOffsetTable' configuration variable to
control whether the new index extension is written.

Signed-off-by: Jonathan Nieder 
---
As with patch 1/5, no change from v1 other than rebasing.

  Documentation/config/index.txt |  7 +++
  read-cache.c   | 11 ++-
  2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
index 8e138aba7a..de44183235 100644
--- a/Documentation/config/index.txt
+++ b/Documentation/config/index.txt
@@ -5,6 +5,13 @@ index.recordEndOfIndexEntries::
reading the index using Git versions before 2.20. Defaults to
'false'.
  
+index.recordOffsetTable::

+   Specifies whether the index file should include an "Index Entry
+   Offset Table" section. This reduces index load time on
+   multiprocessor machines but produces a message "ignoring IEOT
+   extension" when reading the index using Git versions before 2.20.
+   Defaults to 'false'.
+
  index.threads::
Specifies the number of threads to spawn when loading the index.
This is meant to reduce index load time on multiprocessor machines.
diff --git a/read-cache.c b/read-cache.c
index 1e9c772603..f3d5638d9e 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2698,6 +2698,15 @@ static int record_eoie(void)
return 0;
  }
  
+static int record_ieot(void)

+{
+   int val;
+


Initialize stack val to zero to ensure proper default.


+   if (!git_config_get_bool("index.recordoffsettable", ))
+   return val;
+   return 0;
+}
+
  /*
   * On success, `tempfile` is closed. If it is the temporary file
   * of a `struct lock_file`, we will therefore effectively perform
@@ -2761,7 +2770,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
else
nr_threads = 1;
  
-	if (nr_threads != 1) {

+   if (nr_threads != 1 && record_ieot()) {
int ieot_blocks, cpus;
  
  		/*




Re: [PATCH 1/5] eoie: default to not writing EOIE section

2018-11-20 Thread Ben Peart




On 11/20/2018 1:11 AM, Jonathan Nieder wrote:

Since 3b1d9e04 (eoie: add End of Index Entry (EOIE) extension,
2018-10-10) Git defaults to writing the new EOIE section when writing
out an index file.  Usually that is a good thing because it improves
threaded performance, but when a Git repository is shared with older
versions of Git, it produces a confusing warning:

   $ git status
   ignoring EOIE extension
   HEAD detached at 371ed0defa
   nothing to commit, working tree clean

Let's introduce the new index extension more gently.  First we'll roll
out the new version of Git that understands it, and then once
sufficiently many users are using such a version, we can flip the
default to writing it by default.

Introduce a '[index] recordEndOfIndexEntries' configuration variable
to allow interested users to benefit from this index extension early.

Signed-off-by: Jonathan Nieder 
---
Rebased.  No other change from v1.

As Jonathan pointed out, it would be nice to have tests here.  Ben,
any advice for how I could write some in a followup change?  E.g. does
Derrick Stolee's tracing based testing trick apply here?

  Documentation/config/index.txt |  7 +++
  read-cache.c   | 11 ++-
  t/t1700-split-index.sh | 11 +++
  3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
index 4b94b6bedc..8e138aba7a 100644
--- a/Documentation/config/index.txt
+++ b/Documentation/config/index.txt
@@ -1,3 +1,10 @@
+index.recordEndOfIndexEntries::
+   Specifies whether the index file should include an "End Of Index
+   Entry" section. This reduces index load time on multiprocessor
+   machines but produces a message "ignoring EOIE extension" when
+   reading the index using Git versions before 2.20. Defaults to
+   'false'.
+
  index.threads::
Specifies the number of threads to spawn when loading the index.
This is meant to reduce index load time on multiprocessor machines.
diff --git a/read-cache.c b/read-cache.c
index 4ca81286c0..1e9c772603 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2689,6 +2689,15 @@ void update_index_if_able(struct index_state *istate, 
struct lock_file *lockfile
rollback_lock_file(lockfile);
  }
  
+static int record_eoie(void)

+{
+   int val;


I believe you are going to want to initialize val to 0 here as it is on 
the stack so is not guaranteed to be zero.



+
+   if (!git_config_get_bool("index.recordendofindexentries", ))
+   return val;
+   return 0;
+}
+
  /*
   * On success, `tempfile` is closed. If it is the temporary file
   * of a `struct lock_file`, we will therefore effectively perform
@@ -2936,7 +2945,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
 * read.  Write it out regardless of the strip_extensions parameter as 
we need it
 * when loading the shared index.
 */
-   if (offset) {
+   if (offset && record_eoie()) {
struct strbuf sb = STRBUF_INIT;
  
  		write_eoie_extension(, _c, offset);

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 2ac47aa0e4..0cbac64e28 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -25,14 +25,17 @@ test_expect_success 'enable split index' '
git update-index --split-index &&
test-tool dump-split-index .git/index >actual &&
indexversion=$(test-tool index-version <.git/index) &&
+
+   # NEEDSWORK: Stop hard-coding checksums.
if test "$indexversion" = "4"
then
-   own=3527df833c6c100d3d1d921a9a782d62a8be4b58
-   base=746f7ab2ed44fb839efdfbffcf399d0b113fb4cb
+   own=432ef4b63f32193984f339431fd50ca796493569
+   base=508851a7f0dfa8691e9f69c7f055865389012491
else
-   own=5e9b60117ece18da410ddecc8b8d43766a0e4204
-   base=4370042739b31cd17a5c5cd6043a77c9a00df113
+   own=8299b0bcd1ac364e5f1d7768efb62fa2da79a339
+   base=39d890139ee5356c7ef572216cebcd27aa41f9df
fi &&
+
cat >expect <<-EOF &&
own $own
base $base



Re: Git Test Coverage Report (v2.20.0-rc0)

2018-11-19 Thread Ben Peart




On 11/19/2018 10:40 AM, Derrick Stolee wrote:
The test coverage reports started mid-way through this release cycle, so 
I thought it would be good to do a full review of the new uncovered code 
since the last release.


I eliminated most of the uncovered code due to the following cases:

1. Code was only moved or refactored.
2. Code was related to unusual error conditions (e.g. open_pack_index() 
fails)


The comments below are intended only to point out potential directions 
to improve test coverage. Some of it is for me to do!


Thanks,
-Stolee

On 11/18/2018 9:54 PM, Derrick Stolee wrote:

There are a lot of lines introduced by the IEOT extension in these commits:

 > Ben Peart  3255089ad: ieot: add Index Entry Offset Table (IEOT) 
extension

 > Ben Peart  3b1d9e045: eoie: add End of Index Entry (EOIE) extension
 > Ben Peart  77ff1127a: read-cache: load cache entries on worker 
threads
 > Ben Peart  abb4bb838: read-cache: load cache extensions on a 
worker thread

 > Ben Peart  c780b9cfe: config: add new index.threads config setting
 > Ben Peart  d1664e73a: add: speed up cmd_add() by utilizing 
read_cache_preload()
 > Ben Peart  fa655d841: checkout: optimize "git checkout -b 
"




These should be hit if you run the test suite with 
GIT_TEST_INDEX_THREADS=2.  Without that, the indexes for the various 
tests are too small to trigger multi-threaded index reads/writes.


From t/README:

GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading
of the index for the whole test suite by bypassing the default number of
cache entries and thread minimums. Setting this to 1 will make the
index loading single threaded.




Re: [PATCH 3/3] index: do not warn about unrecognized extensions

2018-11-14 Thread Ben Peart




On 11/13/2018 10:24 PM, Junio C Hamano wrote:

Jonathan Nieder  writes:


We cannot change the past, but for index extensions of the future,
there is a straightforward improvement: silence that message except
when tracing.  This way, the message is still available when
debugging, but in everyday use it does not show up so (once most Git
users have this patch) we can turn on new optional extensions right
away without alarming people.


That argument ignores the "let the users know they are using a stale
version when they did use (either by accident or deliberately) a
more recent one" value, though.

Even if we consider that this is only for debugging, I am not sure
if tracing is the right place to add.  As long as the "optional
extensions can be ignored without affecting the correctness" rule
holds, there is nothing gained by letting these messages shown for
debugging purposes


Having recently written a couple of patches that utilize an optional 
extension - I actually found the warning to be a helpful debugging tool 
and would like to see them enabled via tracing.  It would also be 
helpful to see the opposite - I'm looking for an optional extension but 
it is missing.


The most common scenario was when I'd be testing my changes in different 
repos and forget that I needed to force an updated index to be written 
that contained the extension I was trying to test.  The "silently ignore 
the optional extension" behavior is good for end users but as a 
developer, I'd like to be able to have it yell at me via tracing. :-)


IMHO - if an end user has to turn on tracing, I view that as a failure 
on our part.  No end user should have to understand the inner workings 
of git to be able to use it effectively.


and if there is such a bug (e.g. we introduced

an optional extension but the code that wrote an index with an
optional extension wrote the non-optional part in such a way that it
cannot be correctly handled without the extension that is supposed
to be optional) we'd probably want to let users notice without
having to explicitly go into a debugging session.  If Googling for
"ignoring FNCY ext" gives "discard your index with 'reset HEAD',
because an index file with FNCY ext cannot be read without
understanding it", that may prevent damages from happening in the
first place.  On the other hand, hiding it behind tracing would mean
the user first need to exprience an unknown breakage first and then
has to enable tracing among other 47 different things to diagnose
and drill down to the root cause.




Re: [PATCH 2/3] ieot: default to not writing IEOT section

2018-11-14 Thread Ben Peart




On 11/13/2018 4:08 PM, Jonathan Nieder wrote:

Hi again,

Ben Peart wrote:

On 11/13/2018 1:18 PM, Jonathan Nieder wrote:

Ben Peart wrote:



Why introduce a new setting to disable writing the IEOT extension instead of
just using the existing index.threads setting?  If index.threads=1 then the
IEOT extension isn't written which (I believe) will accomplish the same
goal.


Do you mean defaulting to index.threads=1?  I don't think that would
be a good default, but if you have a different change in mind then I'd
be happy to hear it.

Or do you mean that if the user has explicitly specified index.threads=true,
then that should imply index.recordOffsetTable=true so users only have
to set one setting to turn it on?  I can imagine that working well.


Reading the index with multiple threads requires the EOIE and IEOT
extensions to exist in the index.  If either extension doesn't exist, then
the code falls back to the single threaded path.  That means you can't have
both 1) no warning for old versions of git and 2) multi-threaded reading for
new versions of git.

If you set index.threads=1, that will prevent the IEOT extension from being
written and there will be no "ignoring IEOT extension" warning in older
versions of git.

With this patch 'as is' you would have to set both index.threads=true and
index.recordOffsetTable=true to get multi-threaded index reads.  If either
is set to false, it will silently drop back to single threaded reads.


Sorry, I'm still not understanding what you're proposing.  What would be

- the default behavior
- the mechanism for changing that behavior

under your proposal?

I consider index.threads=1 to be a bad default.  I would understand if
you are saying that that should be the default, and I tried to propose
a different way to achieve what you're looking for in the quoted reply
above (but I'm not understanding whether you like that proposal or
not).



Today, both the write logic (ie should we write out the IEOT extension) 
and the read logic (should I use the IEOT, if available, and do 
multi-threaded reading) are controlled by the single "index.threads" 
setting.  I would like to keep the settings as simple as possible to 
prevent user confusion.


If we have two different settings (index.threads and 
index.recordoffsettable) then the only combination that will result in 
the user actually getting multi-threaded reads is when they are both set 
to true.  Any other combination will silently fail.  I think it would be 
confusing if you set index.threads=true and got no error message but 
didn't get multi-threaded reads either (or vice versa).


If you want to prevent any of the scary "ignoring IEOT extension" from 
ever happening then your only option is to turn off the IEOT writing by 
default.  The downside is that people have to discover and turn it on if 
they want the improvements.  This can be achieved by changing the 
default for index.threads from "true" to "false."


diff --git a/config.c b/config.c
index 2ee29f6f86..86f5c14294 100644
--- a/config.c
+++ b/config.c
@@ -2291,7 +2291,7 @@ int git_config_get_fsmonitor(void)

 int git_config_get_index_threads(void)
 {
-   int is_bool, val = 0;
+   int is_bool, val = 1;

val = git_env_ulong("GIT_TEST_INDEX_THREADS", 0);
if (val)


If you want to provide a way for a concerned user to disable the message 
after the first time they have seen it, then they can be instructed to 
run 'git config --global index.threads false'


There is no way to get multi-threaded reads and NOT get the scary 
message with older versions of git.  Multi-threaded reads require the 
IEOT extension to be written into the index and the existence of the 
IEOT extension in the index will always generate the scary warning.



Jonathan



Re: [PATCH 2/3] ieot: default to not writing IEOT section

2018-11-13 Thread Ben Peart




On 11/13/2018 1:18 PM, Jonathan Nieder wrote:

Hi,

Ben Peart wrote:

On 11/12/2018 7:39 PM, Jonathan Nieder wrote:



As with EOIE, popular versions of Git do not support the new IEOT
extension yet.  When accessing a Git repository written by a more
modern version of Git, they correctly ignore the unrecognized section,
but in the process they loudly warn

ignoring IEOT extension

resulting in confusion for users.  Introduce the index extension more
gently by not writing it yet in this first version with support for
it.

[...]

Introduce a '[index] recordOffsetTable' configuration variable to
control whether the new index extension is written.


Why introduce a new setting to disable writing the IEOT extension instead of
just using the existing index.threads setting?  If index.threads=1 then the
IEOT extension isn't written which (I believe) will accomplish the same
goal.


Do you mean defaulting to index.threads=1?  I don't think that would
be a good default, but if you have a different change in mind then I'd
be happy to hear it.

Or do you mean that if the user has explicitly specified index.threads=true,
then that should imply index.recordOffsetTable=true so users only have
to set one setting to turn it on?  I can imagine that working well.



Reading the index with multiple threads requires the EOIE and IEOT 
extensions to exist in the index.  If either extension doesn't exist, 
then the code falls back to the single threaded path.  That means you 
can't have both 1) no warning for old versions of git and 2) 
multi-threaded reading for new versions of git.


If you set index.threads=1, that will prevent the IEOT extension from 
being written and there will be no "ignoring IEOT extension" warning in 
older versions of git.


With this patch 'as is' you would have to set both index.threads=true 
and index.recordOffsetTable=true to get multi-threaded index reads.  If 
either is set to false, it will silently drop back to single threaded reads.



Thanks,
Jonathan



Re: [PATCH 3/3] index: do not warn about unrecognized extensions

2018-11-13 Thread Ben Peart




On 11/12/2018 7:40 PM, Jonathan Nieder wrote:

Documentation/technical/index-format explains:

  4-byte extension signature. If the first byte is 'A'..'Z' the
  extension is optional and can be ignored.

This allows gracefully introducing a new index extension without
having to rely on all readers having support for it.  Mandatory
extensions start with a lowercase letter and optional ones start with
a capital.  Thus the versions of Git acting on a shared local
repository do not have to upgrade in lockstep.

We almost obey that convention, but there is a problem: when
encountering an unrecognized optional extension, we write

ignoring FNCY extension

to stderr, which alarms users.  This means that in practice we have
had to introduce index extensions in two steps: first add read
support, and then a while later, start writing by default.  This
delays when users can benefit from improvements to the index format.

We cannot change the past, but for index extensions of the future,
there is a straightforward improvement: silence that message except
when tracing.  This way, the message is still available when
debugging, but in everyday use it does not show up so (once most Git
users have this patch) we can turn on new optional extensions right
away without alarming people.



The best patch of the bunch. Glad to see it.

I'm fine with doing this via advise.unknownIndexExtension as well.  Who 
knows, someone may actually want to see this and not have tracing turned 
on.  I don't know who but it is possible :-)



Signed-off-by: Jonathan Nieder 
---
Thanks for reading.  Thoughts?

Sincerely,
Jonathan

  read-cache.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 290bd54708..65530a68c2 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1720,7 +1720,7 @@ static int read_index_extension(struct index_state 
*istate,
if (*ext < 'A' || 'Z' < *ext)
return error("index uses %.4s extension, which we do not 
understand",
 ext);
-   fprintf(stderr, "ignoring %.4s extension\n", ext);
+   trace_printf("ignoring %.4s extension\n", ext);
break;
}
return 0;



Re: [PATCH 2/3] ieot: default to not writing IEOT section

2018-11-13 Thread Ben Peart




On 11/12/2018 7:39 PM, Jonathan Nieder wrote:

As with EOIE, popular versions of Git do not support the new IEOT
extension yet.  When accessing a Git repository written by a more
modern version of Git, they correctly ignore the unrecognized section,
but in the process they loudly warn

ignoring IEOT extension

resulting in confusion for users.  Introduce the index extension more
gently by not writing it yet in this first version with support for
it.  Soon, once sufficiently many users are running a modern version
of Git, we can flip the default so users benefit from this index
extension by default.

Introduce a '[index] recordOffsetTable' configuration variable to
control whether the new index extension is written.



Why introduce a new setting to disable writing the IEOT extension 
instead of just using the existing index.threads setting?  If 
index.threads=1 then the IEOT extension isn't written which (I believe) 
will accomplish the same goal.



Signed-off-by: Jonathan Nieder 
---
  Documentation/config.txt |  7 +++
  read-cache.c | 11 ++-
  2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index d702379db4..cc66fb7de3 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2195,6 +2195,13 @@ index.recordEndOfIndexEntries::
reading the index using Git versions before 2.20. Defaults to
'false'.
  
+index.recordOffsetTable::

+   Specifies whether the index file should include an "Index Entry
+   Offset Table" section. This reduces index load time on
+   multiprocessor machines but produces a message "ignoring IEOT
+   extension" when reading the index using Git versions before 2.20.
+   Defaults to 'false'.
+
  index.threads::
Specifies the number of threads to spawn when loading the index.
This is meant to reduce index load time on multiprocessor machines.
diff --git a/read-cache.c b/read-cache.c
index 4bfe93c4c2..290bd54708 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2707,6 +2707,15 @@ static int record_eoie(void)
return 0;
  }
  
+static int record_ieot(void)

+{
+   int val;
+
+   if (!git_config_get_bool("index.recordoffsettable", ))
+   return val;
+   return 0;
+}
+
  /*
   * On success, `tempfile` is closed. If it is the temporary file
   * of a `struct lock_file`, we will therefore effectively perform
@@ -2767,7 +2776,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
  
  #ifndef NO_PTHREADS

nr_threads = git_config_get_index_threads();
-   if (nr_threads != 1) {
+   if (nr_threads != 1 && record_ieot()) {
int ieot_blocks, cpus;
  
  		/*




Re: [PATCH 1/3] eoie: default to not writing EOIE section

2018-11-13 Thread Ben Peart




On 11/12/2018 8:05 PM, Junio C Hamano wrote:

Jonathan Nieder  writes:


Since 3b1d9e04 (eoie: add End of Index Entry (EOIE) extension,
2018-10-10) Git defaults to writing the new EOIE section when writing
out an index file.  Usually that is a good thing because it improves
threaded performance, but when a Git repository is shared with older
versions of Git, it produces a confusing warning:

   $ git status
   ignoring EOIE extension
   HEAD detached at 371ed0defa
   nothing to commit, working tree clean

Let's introduce the new index extension more gently.  First we'll roll
out the new version of Git that understands it, and then once
sufficiently many users are using such a version, we can flip the
default to writing it by default.

Introduce a '[index] recordEndOfIndexEntries' configuration variable
to allow interested users to benefit from this index extension early.


Thanks.  I am in principle OK with this approach.  In fact, I
suspect that the default may want to be dynamically determined, and
we give this knob to let the users further force their preference.
When no extension that benefits from multi-threading is written, the
default can stay "no" in future versions of Git, for example.



While I can understand the user confusion the warning about ignoring an 
extension could cause I guess I'm a little surprised that people would 
see it that often.  To see the warning means they are running a new 
version of git in the same repo as they are running an old version of 
git.  I just haven't ever experienced that (I only ever have one copy of 
git installed) so am surprised it comes up often enough to warrant this 
change.


That said, if it _is_ that much of an issue, this patch makes sense and 
provides a way to more gracefully transition into this feature.  Even if 
we had some logic to dynamically determine whether to write it or not, 
we'd still want to avoid confusing users when it did get written out.



diff --git a/Documentation/config.txt b/Documentation/config.txt
index 41a9ff2b6a..d702379db4 100644


The timing is a bit unfortunate for any topic to touch this file,
and contrib/cocci would not help us in this case X-<.


diff --git a/read-cache.c b/read-cache.c
index f3a848d61c..4bfe93c4c2 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2698,6 +2698,15 @@ void update_index_if_able(struct index_state *istate, 
struct lock_file *lockfile
rollback_lock_file(lockfile);
  }
  
+static int record_eoie(void)

+{
+   int val;
+
+   if (!git_config_get_bool("index.recordendofindexentries", ))
+   return val;
+   return 0;
+}


Unconditionally defaulting to no in this round is perfectly fine.
Let's make a mental note that this is the place to decide dynamic
default in the future when we want to.  It would probably have to
ask around various "extension writing" helpers if they want to have
a say in the outcome (e.g. if there are very many cache entries in
the istate, the entry offset table may want to be written and
otherwise not).


@@ -2945,7 +2954,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
 * read.  Write it out regardless of the strip_extensions parameter as 
we need it
 * when loading the shared index.
 */
-   if (offset) {
+   if (offset && record_eoie()) {
struct strbuf sb = STRBUF_INIT;
  
  		write_eoie_extension(, _c, offset);

diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 2ac47aa0e4..0cbac64e28 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -25,14 +25,17 @@ test_expect_success 'enable split index' '
git update-index --split-index &&
test-tool dump-split-index .git/index >actual &&
indexversion=$(test-tool index-version <.git/index) &&
+
+   # NEEDSWORK: Stop hard-coding checksums.


Also let's stop hard-coding the assumption that the new knob is off
by default.  Ideally, you'd want to test both cases, right?

Perhaps you'd call "git update-index --split-index" we see in the
precontext twice, with "-c VAR=false" and "-c VAR=true", to prepare
"actual.without-eoie" and "actual.with-eoie", or something like
that?

Thanks.


if test "$indexversion" = "4"
then
-   own=3527df833c6c100d3d1d921a9a782d62a8be4b58
-   base=746f7ab2ed44fb839efdfbffcf399d0b113fb4cb
+   own=432ef4b63f32193984f339431fd50ca796493569
+   base=508851a7f0dfa8691e9f69c7f055865389012491
else
-   own=5e9b60117ece18da410ddecc8b8d43766a0e4204
-   base=4370042739b31cd17a5c5cd6043a77c9a00df113
+   own=8299b0bcd1ac364e5f1d7768efb62fa2da79a339
+   base=39d890139ee5356c7ef572216cebcd27aa41f9df
fi &&
+
cat >expect <<-EOF &&
own $own
base $base


Re: [RFC v1] Add virtual file system settings and hook proc

2018-11-05 Thread Ben Peart




On 11/4/2018 4:01 PM, brian m. carlson wrote:

On Sun, Nov 04, 2018 at 07:34:01AM +0100, Duy Nguyen wrote:

On Wed, Oct 31, 2018 at 9:53 PM Ben Peart  wrote:

It's more than a dynamic sparse-checkout because the same list is also
used to exclude any file/folder not listed.  That means any file not
listed won't ever be updated by git (like in 'checkout' for example) so
'stale' files could be left in the working directory.  It also means git
won't find new/untracked files unless they are specifically added to the
list.


OK. I'm not at all interested in carrying maintenance burden for some
software behind closed doors. I could see values in having a more
flexible sparse checkout but this now seems like very tightly designed
for GVFS. So unless there's another use case (preferably open source)
for this, I don't think this should be added in git.git.


I should point out that VFS for Git is an open-source project and will
likely have larger use than just at Microsoft.  There are both Windows
and Mac clients and there are plans for a Linux client as well.
Ideally, it would work with an unmodified upstream Git, which is (I
assume) why Ben is sending this series.

Personally, I don't love the current name used in this series.  I don't
see this patch as introducing a virtual file system in the Unix sense of
that word, and I think calling it that in Git core will be confusing to
Unix users.  I would prefer to see it as a hook (maybe called
"sparse-checkout" or "sparse-exclude"; better names are okay), and
simply turn it on based on whether or not there's an appropriate hook
file there and whether core.sparseCheckout is on (or possibly with
hook.sparseExclude or something).  With a design more like that, I don't
see a problem with it in principle.



I'm really bad at naming so am happy to choose something else that will 
be more descriptive to the community at large.  The name came from the 
fact that we started with the (equally awful) 'VFS for Git' and this was 
the big enabling feature in git so for better or worse it got saddled 
with the same 'VFS' name.


In other feedback it was suggested to not add a core.vfs setting that 
was the path to the hook and I like that.  I can change it to 
core.sparseExclude (unless someone has something better) and hard code 
the hook name for now.  I do like the idea of having config based hooks 
so when I get some time I will put together a patch series to implement 
that.


Re: [RFC v1] Add virtual file system settings and hook proc

2018-11-05 Thread Ben Peart




On 11/5/2018 10:22 AM, Duy Nguyen wrote:

On Sun, Nov 4, 2018 at 10:01 PM brian m. carlson
 wrote:


On Sun, Nov 04, 2018 at 07:34:01AM +0100, Duy Nguyen wrote:

On Wed, Oct 31, 2018 at 9:53 PM Ben Peart  wrote:

It's more than a dynamic sparse-checkout because the same list is also
used to exclude any file/folder not listed.  That means any file not
listed won't ever be updated by git (like in 'checkout' for example) so
'stale' files could be left in the working directory.  It also means git
won't find new/untracked files unless they are specifically added to the
list.


OK. I'm not at all interested in carrying maintenance burden for some
software behind closed doors. I could see values in having a more
flexible sparse checkout but this now seems like very tightly designed
for GVFS. So unless there's another use case (preferably open source)
for this, I don't think this should be added in git.git.


I should point out that VFS for Git is an open-source project and will
likely have larger use than just at Microsoft.  There are both Windows
and Mac clients and there are plans for a Linux client as well.
Ideally, it would work with an unmodified upstream Git, which is (I
assume) why Ben is sending this series.


Ah I didn't know that. Thank you. I'll have to look at this GVFS some time then.

If we're going to support GVFS though, I think there should be a big
(RFC perhaps) series that includes everything to at least give an
overview what the end game looks like. Then it could be split up into
smaller series.



We've always had the goal of not needing a fork at all and are 
continually working to bring the list of differences to zero but in the 
mean time, you can see the entire set of changes we've made here [1].


If you look, most of them are changes we are already in process of 
submitting to git (ie midx, tracing, etc) or patches we fast tracked 
from master to our branch (your unpack_trees() optimizations for example).


Most of the others are small tweaks and features for performance or to 
smooth integration issues.  This RFC contains the core changes that were 
required to enable VFS for Git.


[1] 
https://github.com/git-for-windows/git/compare/master...Microsoft:gvfs-2.19.1


Re: [RFC v1] Add virtual file system settings and hook proc

2018-11-05 Thread Ben Peart




On 11/5/2018 10:26 AM, Duy Nguyen wrote:

On Mon, Nov 5, 2018 at 12:40 PM Ævar Arnfjörð Bjarmason
 wrote:



On Sun, Nov 04 2018, Duy Nguyen wrote:


On Wed, Oct 31, 2018 at 9:53 PM Ben Peart  wrote:

+core.virtualFilesystem::
+   If set, the value of this variable is used as a command which
+   will identify all files and directories that are present in
+   the working directory.  Git will only track and update files
+   listed in the virtual file system.  Using the virtual file system
+   will supersede the sparse-checkout settings which will be ignored.
+   See the "virtual file system" section of linkgit:githooks[6].


It sounds like "virtual file system" is just one of the use cases for
this feature, which is more about a dynamic source of sparse-checkout
bits. Perhaps name the config key with something along sparse checkout
instead of naming it after a use case.


It's more than a dynamic sparse-checkout because the same list is also
used to exclude any file/folder not listed.  That means any file not
listed won't ever be updated by git (like in 'checkout' for example) so
'stale' files could be left in the working directory.  It also means git
won't find new/untracked files unless they are specifically added to the
list.


OK. I'm not at all interested in carrying maintenance burden for some
software behind closed doors. I could see values in having a more
flexible sparse checkout but this now seems like very tightly designed
for GVFS. So unless there's another use case (preferably open source)
  for this, I don't think this should be added in git.git.


I haven't looked at the patch in any detail beyond skimming it, and
you're more familiar with this area...

But in principle I'm very interested in getting something like this in
git.git, even if we were to assume GVFS was a 100% proprietary
implementation.


I have nothing against building a GVFS-like solution. If what's
submitted can be the building blocks for that, great! But if it was
just for GVFS (and it was not available to everybody) then no thank
you.



Not only is VFS for Git open source and is/will be supported on Windows, 
Mac and Linux, the interface being proposed is quite generic so should 
be usable for other implementations.


To use it, you just need to provide a hook that will return a list of 
files git should pay attention to (using a subset of the existing 
sparse-checkout format).


If you see anything that would make using it difficult for other 
solutions to use, let's fix it now!


Re: [RFC v1] Add virtual file system settings and hook proc

2018-11-05 Thread Ben Peart




On 11/4/2018 7:02 PM, Junio C Hamano wrote:

Ben Peart  writes:


+   if (*dtype == DT_UNKNOWN)
+   *dtype = get_dtype(NULL, istate, pathname, pathlen);


We try to defer paying cost to determine unknown *dtype as late as
possible by having this call in last_exclude_matching_from_list(),
and not here.  If we are doing this, we probably should update the
callpaths that call last_exclude_matching_from_list() to make the
caller responsible for doing get_dtype() and drop the lazy finding
of dtype from the callee.  Alternatively, the new "is excluded from
vfs" helper can learn to do the lazy get_dtype() just like the
existing last_exclude_matching_from_list() does.  I suspect the
latter may be simpler.


In is_excluded_from_virtualfilesystem() dtype can't be lazy because it
is always needed (which is why I test and die if it isn't known).


You make a call to that function even when virtual-file-system hook
is not in use, i.e. instead of the caller saying

if (is_vfs_in_use()) {
*dtype = get_dtype(...);
 if (is_excluded_from_vfs(...) > 0)
return 1;
}

your caller makes an unconditional call to is_excluded_from_vfs().
Isn't that the only reason why you break the laziness of determining
dtype?

You can keep the caller simple by making an unconditional call, but
maintain the laziness by updating the callig convention to pass
dtype (not *dtype) to the function, e.g..

if (is_excluded_from_vfs(pathname, pathlen, dtype) > 0)
return 1;

and then at the beginning of the helper

if (is_vfs_in_use())
return -1; /* undetermined */
*dtype = get_dtype(...);
... whatever logic it has now ...

no?



Oops!  You're right, I docall get_dtype() even if the vfs isn't in use. 
I'll add an additional test to avoid doing that.  Thank you.


I did look into moving the delay load logic into is_excluded_from_vfs() 
but get_dtype() is static to dir.c and I'd prefer to keep it that way if 
possible.



Your comments are all feedback on the code - how it was implemented,
style, etc.  Any thoughts on whether this is something we could/should
merge into master (after any necessary cleanup)?  Would anyone else
find this interesting/helpful?


I am pretty much neutral.  Not strongly opposed to it, but not all
that interested until seeing its integration with the "userland" to
see how the whole thing works ;-)



[PATCH v1] refresh_index: remove unnecessary calls to preload_index()

2018-11-05 Thread Ben Peart
From: Ben Peart 

With refresh_index() learning to utilize preload_index() to speed up its
operation there is no longer any benefit to having the caller preload the
index first. Remove those unneeded calls by calling read_index() instead of
the preload variant.

There is no measurable performance impact of this patch - the 2nd call to
preload_index() bails out quickly but there is no reason to call it twice.

Signed-off-by: Ben Peart 
---

Notes:
Base Ref:
Web-Diff: https://github.com/benpeart/git/commit/384f7fed53
Checkout: git fetch https://github.com/benpeart/git no-index-preload-v1 && 
git checkout 384f7fed53

 builtin/commit.c   | 2 +-
 builtin/describe.c | 2 +-
 builtin/update-index.c | 2 +-
 sequencer.c| 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 074bd9a551..96d336ec3d 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1363,7 +1363,7 @@ int cmd_status(int argc, const char **argv, const char 
*prefix)
if (status_format != STATUS_FORMAT_PORCELAIN &&
status_format != STATUS_FORMAT_PORCELAIN_V2)
progress_flag = REFRESH_PROGRESS;
-   read_index_preload(_index, , progress_flag);
+   read_index(_index);
refresh_index(_index,
  REFRESH_QUIET|REFRESH_UNMERGED|progress_flag,
  , NULL, NULL);
diff --git a/builtin/describe.c b/builtin/describe.c
index c48c34e866..cc118448ee 100644
--- a/builtin/describe.c
+++ b/builtin/describe.c
@@ -629,7 +629,7 @@ int cmd_describe(int argc, const char **argv, const char 
*prefix)
struct argv_array args = ARGV_ARRAY_INIT;
int fd, result;
 
-   read_cache_preload(NULL);
+   read_cache();
refresh_index(_index, 
REFRESH_QUIET|REFRESH_UNMERGED,
  NULL, NULL, NULL);
fd = hold_locked_index(_lock, 0);
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 07c10bcb7d..0e1dcf0438 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -782,7 +782,7 @@ struct refresh_params {
 static int refresh(struct refresh_params *o, unsigned int flag)
 {
setup_work_tree();
-   read_cache_preload(NULL);
+   read_cache();
*o->has_errors |= refresh_cache(o->flags | flag);
return 0;
 }
diff --git a/sequencer.c b/sequencer.c
index 9e1ab3a2a7..ab2048ac3a 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1919,7 +1919,7 @@ static int read_and_refresh_cache(struct replay_opts 
*opts)
 {
struct lock_file index_lock = LOCK_INIT;
int index_fd = hold_locked_index(_lock, 0);
-   if (read_index_preload(_index, NULL, 0) < 0) {
+   if (read_index(_index) < 0) {
rollback_lock_file(_lock);
return error(_("git %s: failed to read the index"),
_(action_name(opts)));

base-commit: 095c8dc8c2a9d61783dbae79a7f6e8d80092696f
-- 
2.18.0.windows.1



Re: [PATCH v1] add: speed up cmd_add() by utilizing read_cache_preload()

2018-11-02 Thread Ben Peart




On 11/2/2018 11:23 AM, Junio C Hamano wrote:

Ben Peart  writes:


From: Ben Peart 

During an "add", a call is made to run_diff_files() which calls
check_remove() for each index-entry.  The preload_index() code
distributes some of the costs across multiple threads.


Nice.  I peeked around and noticed that we already do this in
builtin_diff_index() before running run_diff_index() when !cached,
and builtin_diff_files(), of course.



Thanks for double checking!


Because the files checked are restricted to pathspec, adding individual
files makes no measurable impact but on a Windows repo with ~200K files,
'git add .' drops from 6.3 seconds to 3.3 seconds for a 47% savings.


;-)


diff --git a/builtin/add.c b/builtin/add.c
index ad49806ebf..f65c172299 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -445,11 +445,6 @@ int cmd_add(int argc, const char **argv, const char 
*prefix)
return 0;
}
  
-	if (read_cache() < 0)

-   die(_("index file corrupt"));
-
-   die_in_unpopulated_submodule(_index, prefix);
-
/*
 * Check the "pathspec '%s' did not match any files" block
 * below before enabling new magic.


It is not explained why this is not a mere s/read_cache/&_preload/
in the log message.  I can see it is because you wanted to make the
pathspec available to preload to further cut down the preloaded
paths, and I do not think it has any unintended (negatie) side
effect to parse the pathspec before populating the in-core index,
but that would have been a good thing to mention in the proposed log
message.



That is correct.  parse_pathspec() was after read_cache() because it 
_used_ to use the index to determine whether a pathspec is in a 
submodule.  That was fixed by Brandon in Aug 2017 when he cleaned up all 
submodule config code so it is safe to move read_cache_preload() after 
the call to parse_pathspec().


How about this for a revised commit message?



During an "add", a call is made to run_diff_files() which calls
check_remove() for each index-entry.  The preload_index() code 
distributes some of the costs across multiple threads.


Limit read_cache_preload() to pathspec, so that it doesn't process more 
entries than are needed and end up slowing things down instead of 
speeding them up.


Because the files checked are restricted to pathspec, adding individual
files makes no measurable impact but on a Windows repo with ~200K files,
'git add .' drops from 6.3 seconds to 3.3 seconds for a 47% savings.




@@ -459,6 +454,10 @@ int cmd_add(int argc, const char **argv, const char 
*prefix)
   PATHSPEC_SYMLINK_LEADING_PATH,
   prefix, argv);
  
+	if (read_cache_preload() < 0)

+   die(_("index file corrupt"));
+
+   die_in_unpopulated_submodule(_index, prefix);
die_path_inside_submodule(_index, );
  
  	if (add_new_files) {


base-commit: 4ede3d42dfb57f9a41ac96a1f216c62eb7566cc2


[PATCH v1] add: speed up cmd_add() by utilizing read_cache_preload()

2018-11-02 Thread Ben Peart
From: Ben Peart 

During an "add", a call is made to run_diff_files() which calls
check_remove() for each index-entry.  The preload_index() code distributes
some of the costs across multiple threads.

Because the files checked are restricted to pathspec, adding individual
files makes no measurable impact but on a Windows repo with ~200K files,
'git add .' drops from 6.3 seconds to 3.3 seconds for a 47% savings.

Signed-off-by: Ben Peart 
---

Notes:
Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/fc4830b545
Checkout: git fetch https://github.com/benpeart/git add-preload-index-v1 && 
git checkout fc4830b545

 builtin/add.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index ad49806ebf..f65c172299 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -445,11 +445,6 @@ int cmd_add(int argc, const char **argv, const char 
*prefix)
return 0;
}
 
-   if (read_cache() < 0)
-   die(_("index file corrupt"));
-
-   die_in_unpopulated_submodule(_index, prefix);
-
/*
 * Check the "pathspec '%s' did not match any files" block
 * below before enabling new magic.
@@ -459,6 +454,10 @@ int cmd_add(int argc, const char **argv, const char 
*prefix)
   PATHSPEC_SYMLINK_LEADING_PATH,
   prefix, argv);
 
+   if (read_cache_preload() < 0)
+   die(_("index file corrupt"));
+
+   die_in_unpopulated_submodule(_index, prefix);
die_path_inside_submodule(_index, );
 
if (add_new_files) {

base-commit: 4ede3d42dfb57f9a41ac96a1f216c62eb7566cc2
-- 
2.18.0.windows.1



Re: [RFC v1] Add virtual file system settings and hook proc

2018-10-31 Thread Ben Peart




On 10/31/2018 3:11 PM, Duy Nguyen wrote:

not really a review, just  a couple quick notes..



Perfect!  As an RFC, I'm more looking for high level thoughts/notes than 
a style/syntax code review.



On Tue, Oct 30, 2018 at 9:40 PM Ben Peart  wrote:


From: Ben Peart 

On index load, clear/set the skip worktree bits based on the virtual
file system data. Use virtual file system data to update skip-worktree
bit in unpack-trees. Use virtual file system data to exclude files and
folders not explicitly requested.

Signed-off-by: Ben Peart 
---

We have taken several steps to make git perform well on very large repos.
Some of those steps include: improving underlying algorithms, utilizing
multi-threading where possible, and simplifying the behavior of some commands.
These changes typically benefit all git repos to varying degrees.  While
these optimizations all help, they are insufficient to provide adequate
performance on the very large repos we often work with.

To make git perform well on the very largest repos, we had to make more
significant changes.  The biggest performance win by far is the work we have
done to make git operations O(modified) instead of O(size of repo).  This
takes advantage of the fact that the number of files a developer has modified
is a tiny fraction of the overall repo size.

We accomplished this by utilizing the existing internal logic for the skip
worktree bit and excludes to tell git to ignore all files and folders other
than those that have been modified.  This logic is driven by an external
process that monitors writes to the repo and communicates the list of files
and folders with changes to git via the virtual file system hook in this patch.

The external process maintains a list of files and folders that have been
modified.  When git runs, it requests the list of files and folders that
have been modified via the virtual file system hook.  Git then sets/clears
the skip-worktree bit on the cache entries and builds a hashmap of the
modified files/folders that is used by the excludes logic to avoid scanning
the entire repo looking for changes and untracked files.

With this system, we have been able to make local git command performance on
extremely large repos (millions of files, 1/2 million folders) entirely
manageable (30 second checkout, 3.5 seconds status, 4 second add, 7 second
commit, etc).

Our desire is to eliminate all custom patches in our fork of git.  To that
end, I'm submitting this as an RFC to see how much interest there is and how
much willingness to take this type of change into git.


Most of these paragraphs (perhaps except the last one) should be part
of the commit message. You describe briefly what the patch does but
it's even more important to say why you want to do it.


+core.virtualFilesystem::
+   If set, the value of this variable is used as a command which
+   will identify all files and directories that are present in
+   the working directory.  Git will only track and update files
+   listed in the virtual file system.  Using the virtual file system
+   will supersede the sparse-checkout settings which will be ignored.
+   See the "virtual file system" section of linkgit:githooks[6].


It sounds like "virtual file system" is just one of the use cases for
this feature, which is more about a dynamic source of sparse-checkout
bits. Perhaps name the config key with something along sparse checkout
instead of naming it after a use case.


It's more than a dynamic sparse-checkout because the same list is also 
used to exclude any file/folder not listed.  That means any file not 
listed won't ever be updated by git (like in 'checkout' for example) so 
'stale' files could be left in the working directory.  It also means git 
won't find new/untracked files unless they are specifically added to the 
list.




This is a hook. I notice we start to avoid adding real hooks and just
add config keys instead. Eventually we should have config-based hooks,
but if we're going to add more like this, I think these should be in a
separate section, hook.virtualFileSystem or something.



That is a great idea.  I don't personally like specifying the hook as 
the 'flag' for whether a feature should be used.  I'd rather have it be 
a bool (enable the feature? true/false) and 1) either have the hook name 
hard coded (like most existing hooks) or 2) as you suggest add a 
consistent way to have config-based hooks.  Config based hooks could 
also help provide a consistent way to configure them using GIT_TEST_* 
environment variables for testing.



I don't think the superseding makes sense. There's no reason this
could not be used in combination with $GIT_DIR/info/sparse-checkout.
If you don't want both, disable the other.

One last note. Since this is related to filesystem. Shouldn't it be
part of fsmonitor (the protocol, not the implementation)? Then
watchman user could use it to.



To get this to work properly takes a lot mo

Re: [RFC v1] Add virtual file system settings and hook proc

2018-10-31 Thread Ben Peart




On 10/30/2018 7:07 PM, Junio C Hamano wrote:

Ben Peart  writes:


diff --git a/config.c b/config.c
index 4051e38823..96e05ee0f1 100644
--- a/config.c
+++ b/config.c
...
@@ -2307,6 +2311,37 @@ int git_config_get_index_threads(void)
return 0; /* auto */
  }
  
+int git_config_get_virtualfilesystem(void)

+{
+   if (git_config_get_pathname("core.virtualfilesystem", 
_virtualfilesystem))
+   core_virtualfilesystem = getenv("GIT_TEST_VIRTUALFILESYSTEM");
+
+   if (core_virtualfilesystem && !*core_virtualfilesystem)
+   core_virtualfilesystem = NULL;
+
+   if (core_virtualfilesystem) {
+   /*
+* Some git commands spawn helpers and redirect the index to a 
different
+* location.  These include "difftool -d" and the sequencer
+* (i.e. `git rebase -i`, `git cherry-pick` and `git revert`) 
and others.
+* In those instances we don't want to update their temporary 
index with
+* our virtualization data.
+*/
+   char *default_index_file = xstrfmt("%s/%s", the_repository->gitdir, 
"index");
+   int should_run_hook = !strcmp(default_index_file, 
the_repository->index_file);
+
+   free(default_index_file);
+   if (should_run_hook) {
+   /* virtual file system relies on the sparse checkout 
logic so force it on */
+   core_apply_sparse_checkout = 1;
+   return 1;
+   }
+   core_virtualfilesystem = NULL;


It would be a small leak if this came from config_get_pathname(),
but if it came from $GIT_TEST_VFS env, we cannot free it X-<.

A helper function called *_get_X() that does not return X as its
return value or updating the location pointed by its *dst parameter,
and instead only stores its finding in a global variable feels
somewhat odd.  It smells more like "find out", "probe", "check",
etc.



I agree.  Frankly, I think it should just return whether it should be 
used or not (bool) and the hook name should be fixed.  I got push back 
when I did that for fsmonitor so I made this the same in an effort to 
head off that same feedback.



diff --git a/dir.c b/dir.c
index 47c2fca8dc..3097b0e446 100644
--- a/dir.c
+++ b/dir.c
@@ -21,6 +21,7 @@
  #include "ewah/ewok.h"
  #include "fsmonitor.h"
  #include "submodule-config.h"
+#include "virtualfilesystem.h"
  
  /*

   * Tells read_directory_recursive how a file or directory should be treated.
@@ -1109,6 +1110,18 @@ int is_excluded_from_list(const char *pathname,
  struct exclude_list *el, struct index_state *istate)
  {
struct exclude *exclude;
+
+   /*
+* The virtual file system data is used to prevent git from traversing
+* any part of the tree that is not in the virtual file system.  Return
+* 1 to exclude the entry if it is not found in the virtual file system,
+* else fall through to the regular excludes logic as it may further 
exclude.
+*/


This comment will sit better immediately in front of the call to "is
excluded from vfs?" helper function.


+   if (*dtype == DT_UNKNOWN)
+   *dtype = get_dtype(NULL, istate, pathname, pathlen);


We try to defer paying cost to determine unknown *dtype as late as
possible by having this call in last_exclude_matching_from_list(),
and not here.  If we are doing this, we probably should update the
callpaths that call last_exclude_matching_from_list() to make the
caller responsible for doing get_dtype() and drop the lazy finding
of dtype from the callee.  Alternatively, the new "is excluded from
vfs" helper can learn to do the lazy get_dtype() just like the
existing last_exclude_matching_from_list() does.  I suspect the
latter may be simpler.


In is_excluded_from_virtualfilesystem() dtype can't be lazy because it 
is always needed (which is why I test and die if it isn't known).  I 
considered doing the test/call to get_dtype() within 
is_excluded_from_virtualfilesystem() but didn't like making it dependent 
on istate just so I could move the get_dtype() call in one level.  It is 
functionally identical so I can easily move it in if that is preferred.





+   if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0)
+   return 1;
+
exclude = last_exclude_matching_from_list(pathname, pathlen, basename,
  dtype, el, istate);
if (exclude)
@@ -1324,8 +1337,20 @@ struct exclude *last_exclude_matching(struct dir_struct 
*dir,
  int is_excluded(struct dir_struct *dir, struct index_state *istate,
const char *pathname, int *dtype_p)
  {
-   struct exclude *exclude =
-   last_exclude_matchi

[RFC v1] Add virtual file system settings and hook proc

2018-10-30 Thread Ben Peart
From: Ben Peart 

On index load, clear/set the skip worktree bits based on the virtual
file system data. Use virtual file system data to update skip-worktree
bit in unpack-trees. Use virtual file system data to exclude files and
folders not explicitly requested.

Signed-off-by: Ben Peart 
---

We have taken several steps to make git perform well on very large repos.
Some of those steps include: improving underlying algorithms, utilizing
multi-threading where possible, and simplifying the behavior of some commands.
These changes typically benefit all git repos to varying degrees.  While
these optimizations all help, they are insufficient to provide adequate
performance on the very large repos we often work with.

To make git perform well on the very largest repos, we had to make more
significant changes.  The biggest performance win by far is the work we have
done to make git operations O(modified) instead of O(size of repo).  This
takes advantage of the fact that the number of files a developer has modified
is a tiny fraction of the overall repo size.

We accomplished this by utilizing the existing internal logic for the skip
worktree bit and excludes to tell git to ignore all files and folders other
than those that have been modified.  This logic is driven by an external
process that monitors writes to the repo and communicates the list of files
and folders with changes to git via the virtual file system hook in this patch.

The external process maintains a list of files and folders that have been
modified.  When git runs, it requests the list of files and folders that
have been modified via the virtual file system hook.  Git then sets/clears
the skip-worktree bit on the cache entries and builds a hashmap of the
modified files/folders that is used by the excludes logic to avoid scanning
the entire repo looking for changes and untracked files.

With this system, we have been able to make local git command performance on
extremely large repos (millions of files, 1/2 million folders) entirely
manageable (30 second checkout, 3.5 seconds status, 4 second add, 7 second
commit, etc).

Our desire is to eliminate all custom patches in our fork of git.  To that
end, I'm submitting this as an RFC to see how much interest there is and how
much willingness to take this type of change into git.

Notes:
Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/c06b290d2f
Checkout: git fetch https://github.com/benpeart/git virtual-filesystem-v1 
&& git checkout c06b290d2f

 Documentation/config.txt |   8 +
 Documentation/githooks.txt   |  20 ++
 Makefile |   1 +
 cache.h  |   1 +
 config.c |  37 +++-
 config.h |   1 +
 dir.c|  34 +++-
 environment.c|   1 +
 read-cache.c |   2 +
 t/README |   3 +
 t/t1092-virtualfilesystem.sh | 354 +++
 t/t1092/virtualfilesystem|  23 +++
 unpack-trees.c   |  23 ++-
 virtualfilesystem.c  | 308 ++
 virtualfilesystem.h  |  25 +++
 15 files changed, 833 insertions(+), 8 deletions(-)
 create mode 100755 t/t1092-virtualfilesystem.sh
 create mode 100755 t/t1092/virtualfilesystem
 create mode 100644 virtualfilesystem.c
 create mode 100644 virtualfilesystem.h

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 09e95e9e98..dd4b834375 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -441,6 +441,14 @@ core.fsmonitor::
avoiding unnecessary processing of files that have not changed.
See the "fsmonitor-watchman" section of linkgit:githooks[5].
 
+core.virtualFilesystem::
+   If set, the value of this variable is used as a command which
+   will identify all files and directories that are present in
+   the working directory.  Git will only track and update files
+   listed in the virtual file system.  Using the virtual file system
+   will supersede the sparse-checkout settings which will be ignored.
+   See the "virtual file system" section of linkgit:githooks[6].
+
 core.trustctime::
If false, the ctime differences between the index and the
working tree are ignored; useful when the inode change time
diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt
index 959044347e..87f9ad2a77 100644
--- a/Documentation/githooks.txt
+++ b/Documentation/githooks.txt
@@ -492,6 +492,26 @@ This hook is invoked by `git-p4 submit`. It takes no 
parameters and nothing
 from standard input. Exiting with non-zero status from this script prevent
 `git-p4 submit` from launching. Run `git-p4 submit --help` for details.
 
+virtualFilesystem
+~~
+
+"Virtual File System" allows populating the working directory sparsely.
+The projection data is typically automatically generated by an externa

[PATCH v1] speed up refresh_index() by utilizing preload_index()

2018-10-29 Thread Ben Peart
From: Ben Peart 

Speed up refresh_index() by utilizing preload_index() to do most of the work
spread across multiple threads.  This works because most cache entries will
get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when
called from within refresh_index().

On a Windows repo with ~200K files, this drops refresh times from 6.64
seconds to 2.87 seconds for a savings of 57%.

Signed-off-by: Ben Peart 
---

Notes:
Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/feee1054c2
Checkout: git fetch https://github.com/benpeart/git 
refresh-index-multithread-preload-v1 && git checkout feee1054c2

 cache.h | 3 +++
 preload-index.c | 8 
 read-cache.c| 6 ++
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/cache.h b/cache.h
index f7fabdde8f..883099db08 100644
--- a/cache.h
+++ b/cache.h
@@ -659,6 +659,9 @@ extern int daemonize(void);
 /* Initialize and use the cache information */
 struct lock_file;
 extern int read_index(struct index_state *);
+extern void preload_index(struct index_state *index,
+ const struct pathspec *pathspec,
+ unsigned int refresh_flags);
 extern int read_index_preload(struct index_state *,
  const struct pathspec *pathspec,
  unsigned int refresh_flags);
diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..222792ccbc 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -9,7 +9,7 @@
 #include "progress.h"
 
 #ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
+void preload_index(struct index_state *index,
  const struct pathspec *pathspec,
  unsigned int refresh_flags)
 {
@@ -100,9 +100,9 @@ static void *preload_thread(void *_data)
return NULL;
 }
 
-static void preload_index(struct index_state *index,
- const struct pathspec *pathspec,
- unsigned int refresh_flags)
+void preload_index(struct index_state *index,
+  const struct pathspec *pathspec,
+  unsigned int refresh_flags)
 {
int threads, i, work, offset;
struct thread_data data[MAX_PARALLEL];
diff --git a/read-cache.c b/read-cache.c
index d57958233e..53733d651d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned 
int flags,
typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n");
added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n");
unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n");
+   /*
+* Use the multi-threaded preload_index() to refresh most of the
+* cache entries quickly then in the single threaded loop below,
+* we only have to do the special cases that are left.
+*/
+   preload_index(istate, pathspec, 0);
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce, *new_entry;
int cache_errno = 0;

base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433
-- 
2.9.2.gvfs.2.27918.g0990287eef



Re: [PATCH v2 06/10] preload-index.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/29/2018 1:26 PM, Duy Nguyen wrote:

On Mon, Oct 29, 2018 at 6:21 PM Ben Peart  wrote:

@@ -114,6 +104,9 @@ static void preload_index(struct index_state *index,
  threads = index->cache_nr / THREAD_COST;
  if ((index->cache_nr > 1) && (threads < 2) &&
git_env_bool("GIT_TEST_PRELOAD_INDEX", 0))
  threads = 2;
+   cpus = online_cpus();
+   if (threads > cpus)
+   threads = cpus;
  if (threads < 2)
  return;
  trace_performance_enter();


Capping the number of threads to online_cpus() does not always make
sense. In this case (or at least the original use case where we stat()
over nfs) we want to issue as many requests to nfs server as possible
to reduce latency and it has nothing to do with how many cores we
have. Using more threads than cores might make sense since threads are
blocked by nfs client, but we still want to send more to the server.



That makes sense.  Some test will be necessary then.

I guess HAVE_THREADS is functionally the same as online_cpus() == 1. 
For some reason, I prefer the latter - probably because I'm typically 
already calling it and so it doesn't feel like a 'special' value/test I 
have to remember. I just know I need to make sure the logic works 
correctly with any number of cps greater than zero. :-)


Re: [PATCH 09/10] read-cache.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/29/2018 1:21 PM, Duy Nguyen wrote:

On Mon, Oct 29, 2018 at 6:05 PM Ben Peart  wrote:

@@ -2756,8 +2745,11 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
   if (ce_write(, newfd, , sizeof(hdr)) < 0)
   return -1;

-#ifndef NO_PTHREADS
- nr_threads = git_config_get_index_threads();
+ if (HAVE_THREADS)


This shouldn't be needed either.  My assumption was that if someone
explicitly asked for multiple threads we're better off failing than
silently ignoring their request.  The default/automatic setting will
detect the number of cpus and behave correctly.


No. I can have ~/.gitconfig shared between different machines. One
with multithread support and one without. I should not have to fall
back to conditional includes in order to use the same config file on
the machine without multithread.



If you want to share the ~/.gitconfig across machines that have 
different numbers of CPUs, it makes more sense to me to leave the 
setting at the "auto" setting rather than specifically overriding it to 
a number that won't work on at least one of your machines.  Then it all 
"just works" without the need to conditional includes.


Re: [PATCH v2 08/10] read-cache.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/29/2018 10:30 AM, Jeff King wrote:

On Sat, Oct 27, 2018 at 07:30:06PM +0200, Nguyễn Thái Ngọc Duy wrote:


-#ifndef NO_PTHREADS
-   nr_threads = git_config_get_index_threads();
+   if (HAVE_THREADS) {
+   nr_threads = git_config_get_index_threads();
  
-	/* TODO: does creating more threads than cores help? */

-   if (!nr_threads) {
-   nr_threads = istate->cache_nr / THREAD_COST;
-   cpus = online_cpus();
-   if (nr_threads > cpus)
-   nr_threads = cpus;
+   /* TODO: does creating more threads than cores help? */
+   if (!nr_threads) {
+   nr_threads = istate->cache_nr / THREAD_COST;
+   cpus = online_cpus();
+   if (nr_threads > cpus)
+   nr_threads = cpus;
+   }
+   } else {
+   nr_threads = 1;
}


I'd have thought we could just rely on online_cpus() returning 1 here to
avoid having to ask "do we even have thread support?". But I guess that
TODO comment implies that we might one day two 2 threads on a single
CPU.

-Peff



And here is the patch I created when testing out the idea of removing 
NO_PTHREADS.  It doesn't require any 'HAVE_THREADS' tests.




diff --git a/read-cache.c b/read-cache.c
index 1df5c16dbc..1f088799fc 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1920,19 +1920,15 @@ struct index_entry_offset_table
struct index_entry_offset entries[FLEX_ARRAY];
 };

-#ifndef NO_PTHREADS
 static struct index_entry_offset_table *read_ieot_extension(const char 
*mmap, size_t mmap_size, size_t offset

);
 static void write_ieot_extension(struct strbuf *sb, struct 
index_entry_offset_table *ieot);

-#endif

 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);


 struct load_index_extensions
 {
-#ifndef NO_PTHREADS
pthread_t pthread;
-#endif
struct index_state *istate;
const char *mmap;
size_t mmap_size;
@@ -2010,8 +2006,6 @@ static unsigned long load_all_cache_entries(struct 
index_state *istate,

return consumed;
 }

-#ifndef NO_PTHREADS
-
 /*
  * Mostly randomly chosen maximum thread counts: we
  * cap the parallelism to online_cpus() threads, and we want
@@ -2122,7 +2116,6 @@ static unsigned long 
load_cache_entries_threaded(struct index_state *istate, con



return consumed;
 }
-#endif

 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int 
must_exist)
@@ -2135,10 +2128,8 @@ int do_read_index(struct index_state *istate, 
const char *path, int must_exist)


size_t mmap_size;
struct load_index_extensions p;
size_t extension_offset = 0;
-#ifndef NO_PTHREADS
int nr_threads, cpus;
struct index_entry_offset_table *ieot = NULL;
-#endif

if (istate->initialized)
return istate->cache_nr;
@@ -2181,16 +2172,12 @@ int do_read_index(struct index_state *istate, 
const char *path, int must_exist

)

src_offset = sizeof(*hdr);

-#ifndef NO_PTHREADS
nr_threads = git_config_get_index_threads();
-
-   /* TODO: does creating more threads than cores help? */
-   if (!nr_threads) {
+   if (!nr_threads)
nr_threads = istate->cache_nr / THREAD_COST;
-   cpus = online_cpus();
-   if (nr_threads > cpus)
-   nr_threads = cpus;
-   }
+   cpus = online_cpus();
+   if (nr_threads > cpus)
+   nr_threads = cpus;

if (nr_threads > 1) {
extension_offset = read_eoie_extension(mmap, mmap_size);
@@ -2219,22 +2206,16 @@ int do_read_index(struct index_state *istate, 
const char *path, int must_exist

)
} else {
src_offset += load_all_cache_entries(istate, mmap, 
mmap_size, src_offset);

}
-#else
-   src_offset += load_all_cache_entries(istate, mmap, mmap_size, 
src_offset);

-#endif

istate->timestamp.sec = st.st_mtime;
istate->timestamp.nsec = ST_MTIME_NSEC(st);

/* if we created a thread, join it otherwise load the 
extensions on the primary thread */

-#ifndef NO_PTHREADS
if (extension_offset) {
int ret = pthread_join(p.pthread, NULL);
if (ret)
die(_("unable to join load_index_extensions 
thread: %s"), strerror(ret));

-   }
-#endif
-   if (!extension_offset) {
+   } else {
p.src_offset = src_offset;
load_index_extensions();
}
@@ -2756,7 +2737,6 @@ static int do_write_index(struct index_state 
*istate, struct tempfile *tempfile,


if (ce_write(, newfd, , sizeof(hdr)) < 0)
return -1;

-#ifndef NO_PTHREADS
nr_threads = 

Re: [PATCH v2 06/10] preload-index.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/27/2018 1:30 PM, Nguyễn Thái Ngọc Duy wrote:

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
  preload-index.c | 15 ++-
  1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..0e24886aca 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -7,17 +7,7 @@
  #include "fsmonitor.h"
  #include "config.h"
  #include "progress.h"
-
-#ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
- const struct pathspec *pathspec,
- unsigned int refresh_flags)
-{
-   ; /* nothing */
-}
-#else
-
-#include 
+#include "thread-utils.h"
  
  /*

   * Mostly randomly chosen maximum thread counts: we
@@ -108,7 +98,7 @@ static void preload_index(struct index_state *index,
struct thread_data data[MAX_PARALLEL];
struct progress_data pd;
  
-	if (!core_preload_index)

+   if (!HAVE_THREADS || !core_preload_index)
return;
  
  	threads = index->cache_nr / THREAD_COST;

@@ -151,7 +141,6 @@ static void preload_index(struct index_state *index,
  
  	trace_performance_leave("preload index");

  }
-#endif
  
  int read_index_preload(struct index_state *index,

   const struct pathspec *pathspec,




In the theory that code speaks louder than comments, here is the patch I 
used when testing out the idea of getting rid of NO_PTHREADS:



 git diff head~1 preload-index.c
diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..266bc9d8d7 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -7,17 +7,7 @@
 #include "fsmonitor.h"
 #include "config.h"
 #include "progress.h"
-
-#ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
- const struct pathspec *pathspec,
- unsigned int refresh_flags)
-{
-   ; /* nothing */
-}
-#else
-
-#include 
+#include "thread-utils.h"

 /*
  * Mostly randomly chosen maximum thread counts: we
@@ -104,7 +94,7 @@ static void preload_index(struct index_state *index,
  const struct pathspec *pathspec,
  unsigned int refresh_flags)
 {
-   int threads, i, work, offset;
+   int cpus, threads, i, work, offset;
struct thread_data data[MAX_PARALLEL];
struct progress_data pd;

@@ -114,6 +104,9 @@ static void preload_index(struct index_state *index,
threads = index->cache_nr / THREAD_COST;
if ((index->cache_nr > 1) && (threads < 2) && 
git_env_bool("GIT_TEST_PRELOAD_INDEX", 0))

threads = 2;
+   cpus = online_cpus();
+   if (threads > cpus)
+   threads = cpus;
if (threads < 2)
return;
trace_performance_enter();
@@ -151,7 +144,6 @@ static void preload_index(struct index_state *index,

trace_performance_leave("preload index");
 }
-#endif

 int read_index_preload(struct index_state *index,
   const struct pathspec *pathspec,


Re: [PATCH v2 08/10] read-cache.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/29/2018 10:30 AM, Jeff King wrote:

On Sat, Oct 27, 2018 at 07:30:06PM +0200, Nguyễn Thái Ngọc Duy wrote:


-#ifndef NO_PTHREADS
-   nr_threads = git_config_get_index_threads();
+   if (HAVE_THREADS) {
+   nr_threads = git_config_get_index_threads();
  
-	/* TODO: does creating more threads than cores help? */

-   if (!nr_threads) {
-   nr_threads = istate->cache_nr / THREAD_COST;
-   cpus = online_cpus();
-   if (nr_threads > cpus)
-   nr_threads = cpus;
+   /* TODO: does creating more threads than cores help? */
+   if (!nr_threads) {
+   nr_threads = istate->cache_nr / THREAD_COST;
+   cpus = online_cpus();
+   if (nr_threads > cpus)
+   nr_threads = cpus;
+   }
+   } else {
+   nr_threads = 1;
}


I'd have thought we could just rely on online_cpus() returning 1 here to
avoid having to ask "do we even have thread support?". But I guess that
TODO comment implies that we might one day two 2 threads on a single
CPU.

-Peff



See my earlier response - the HAVE_THREADS tests are not needed.

We can remove the "TODO" comment - I tested it and it wasn't faster to 
have more threads than CPUs.


Re: [PATCH 09/10] read-cache.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/27/2018 3:10 AM, Nguyễn Thái Ngọc Duy wrote:

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
  read-cache.c | 49 ++---
  1 file changed, 18 insertions(+), 31 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index d57958233e..ba870bc3fd 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1920,19 +1920,15 @@ struct index_entry_offset_table
struct index_entry_offset entries[FLEX_ARRAY];
  };
  
-#ifndef NO_PTHREADS

  static struct index_entry_offset_table *read_ieot_extension(const char *mmap, 
size_t mmap_size, size_t offset);
  static void write_ieot_extension(struct strbuf *sb, struct 
index_entry_offset_table *ieot);
-#endif
  
  static size_t read_eoie_extension(const char *mmap, size_t mmap_size);

  static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
  
  struct load_index_extensions

  {
-#ifndef NO_PTHREADS
pthread_t pthread;
-#endif
struct index_state *istate;
const char *mmap;
size_t mmap_size;
@@ -2010,8 +2006,6 @@ static unsigned long load_all_cache_entries(struct 
index_state *istate,
return consumed;
  }
  
-#ifndef NO_PTHREADS

-
  /*
   * Mostly randomly chosen maximum thread counts: we
   * cap the parallelism to online_cpus() threads, and we want
@@ -2122,7 +2116,6 @@ static unsigned long load_cache_entries_threaded(struct 
index_state *istate, con
  
  	return consumed;

  }
-#endif
  
  /* remember to discard_cache() before reading a different cache! */

  int do_read_index(struct index_state *istate, const char *path, int 
must_exist)
@@ -2135,10 +2128,8 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
size_t mmap_size;
struct load_index_extensions p;
size_t extension_offset = 0;
-#ifndef NO_PTHREADS
int nr_threads, cpus;
struct index_entry_offset_table *ieot = NULL;
-#endif
  
  	if (istate->initialized)

return istate->cache_nr;
@@ -2181,15 +2172,18 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
  
  	src_offset = sizeof(*hdr);
  
-#ifndef NO_PTHREADS

-   nr_threads = git_config_get_index_threads();
+   if (HAVE_THREADS) {


In this case, nr_threads is already capped to online_cpus() below so 
this HAVE_THREADS test isn't needed.



+   nr_threads = git_config_get_index_threads();
  
-	/* TODO: does creating more threads than cores help? */

-   if (!nr_threads) {
-   nr_threads = istate->cache_nr / THREAD_COST;
-   cpus = online_cpus();
-   if (nr_threads > cpus)
-   nr_threads = cpus;
+   /* TODO: does creating more threads than cores help? */
+   if (!nr_threads) {
+   nr_threads = istate->cache_nr / THREAD_COST;
+   cpus = online_cpus();
+   if (nr_threads > cpus)
+   nr_threads = cpus;
+   }
+   } else {
+   nr_threads = 1;
}
  
  	if (nr_threads > 1) {

@@ -2219,21 +2213,16 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
} else {
src_offset += load_all_cache_entries(istate, mmap, mmap_size, 
src_offset);
}
-#else
-   src_offset += load_all_cache_entries(istate, mmap, mmap_size, 
src_offset);
-#endif
  
  	istate->timestamp.sec = st.st_mtime;

istate->timestamp.nsec = ST_MTIME_NSEC(st);
  
  	/* if we created a thread, join it otherwise load the extensions on the primary thread */

-#ifndef NO_PTHREADS
-   if (extension_offset) {
+   if (HAVE_THREADS && extension_offset) {


Here extension_offset won't be set if there wasn't a thread created so 
the HAVE_THREADS test is not needed.



int ret = pthread_join(p.pthread, NULL);
if (ret)
die(_("unable to join load_index_extensions thread: 
%s"), strerror(ret));
}
-#endif
if (!extension_offset) {
p.src_offset = src_offset;
load_index_extensions();
@@ -2756,8 +2745,11 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
if (ce_write(, newfd, , sizeof(hdr)) < 0)
return -1;
  
-#ifndef NO_PTHREADS

-   nr_threads = git_config_get_index_threads();
+   if (HAVE_THREADS)


This shouldn't be needed either.  My assumption was that if someone 
explicitly asked for multiple threads we're better off failing than 
silently ignoring their request.  The default/automatic setting will 
detect the number of cpus and behave correctly.



+   nr_threads = git_config_get_index_threads();
+   else
+   nr_threads = 1;
+
if (nr_threads != 1) {
int ieot_blocks, cpus;
  
@@ -2787,7 +2779,6 @@ static int do_write_index(struct index_state *istate, struct tempfile 

Re: [PATCH 07/10] preload-index.c: remove #ifdef NO_PTHREADS

2018-10-29 Thread Ben Peart




On 10/27/2018 3:10 AM, Nguyễn Thái Ngọc Duy wrote:

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
  preload-index.c | 15 ++-
  1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/preload-index.c b/preload-index.c
index 9e7152ab14..0e24886aca 100644
--- a/preload-index.c
+++ b/preload-index.c
@@ -7,17 +7,7 @@
  #include "fsmonitor.h"
  #include "config.h"
  #include "progress.h"
-
-#ifdef NO_PTHREADS
-static void preload_index(struct index_state *index,
- const struct pathspec *pathspec,
- unsigned int refresh_flags)
-{
-   ; /* nothing */
-}
-#else
-
-#include 
+#include "thread-utils.h"
  
  /*

   * Mostly randomly chosen maximum thread counts: we
@@ -108,7 +98,7 @@ static void preload_index(struct index_state *index,
struct thread_data data[MAX_PARALLEL];
struct progress_data pd;
  
-	if (!core_preload_index)

+   if (!HAVE_THREADS || !core_preload_index)


I also "hoped in general that the non-threaded code paths could mostly 
just collapse into the same as the "threads == 1" cases, and we wouldn't 
have to "are threads even supported" in a lot of places."


In this case, if we cap the threads to online_cpus() the later test for 
'if (threads < 2)' would do that.  I haven't measured this specific case 
but in the other cases I have measured, having more threads than cpus 
did not result in a performance win.



return;
  
  	threads = index->cache_nr / THREAD_COST;

@@ -151,7 +141,6 @@ static void preload_index(struct index_state *index,
  
  	trace_performance_leave("preload index");

  }
-#endif
  
  int read_index_preload(struct index_state *index,

   const struct pathspec *pathspec,



Re: [PATCH/RFC] thread-utils: better wrapper to avoid #ifdef NO_PTHREADS

2018-10-26 Thread Ben Peart




On 10/23/2018 4:28 PM, Jeff King wrote:

On Thu, Oct 18, 2018 at 08:05:22PM +0200, Nguyễn Thái Ngọc Duy wrote:


On Thu, Oct 18, 2018 at 7:09 PM Jeff King  wrote:

In this particular case though I think we should be able to avoid so
much #if if we make a wrapper for pthread api that would return an
error or something when pthread is not available. But similar
situation may happen elsewhere too.


Yeah, I think that is generally the preferred method anyway, just
because of readability and simplicity.


I've wanted to do this for a while, so let's test the water and see if
it's well received.

This patch is a proof of concept that adds just enough macros so that
I can build index-pack.c on a single thread mode with zero #ifdef
related to NO_PTHREADS.

Besides readability and simplicity, it reduces the chances of breaking
conditional builds (e.g. you rename a variable name but forgot that
the variable is in #if block that is not used by your
compiler/platform).


Yes, I love this. We're already halfway there with things like
read_lock() in index-pack and elsewhere, which are conditionally no-ops.
The resulting code is much easier to read, I think.



I am also very much in favor of this.  I updated a couple of places 
threading is being used that I've been working in (preload-index and 
read-cache) and both are much simplified using your proof of concept patch.



Performance-wise I don't think there is any loss for single thread
mode. I rely on compilers recognizing HAVE_THREADS being a constant
and remove dead code or at least optimize in favor of non-dead code.

Memory-wise, yes we use some more memory in single thread mode. But we
don't have zillions of mutexes or thread id, so a bit extra memory
does not worry me so much.


Yeah, I don't think carrying around a handful of ints is going to be a
big deal.



Just to be complete, there _is_ an additional cost.  Today, code paths 
that are only executed when there are pthreads available are excluded 
from the binary (via #ifdef).  With this change, those code paths would 
now be included causing some code bloat to NO_PTHREAD threaded images.


One example of this is in read-cache.c where the ieot read/write 
functions aren't included for NO_PTHREAD but now would be.



I also think we may want to make a fundamental shift in our view of
thread support. In the early days, it was "well, this is a thing that
modern systems can take advantage of for certain commands". But these
days I suspect it is more like "there are a handful of legacy systems
that do not even support threads".

I don't think we should break the build on those legacy systems, but
it's probably OK to stop thinking of it as "non-threaded platforms are
the default and must pay zero cost" and more as "threaded platforms are
the default, and non-threaded ones are OK to pay a small cost as long as
they still work".



I agree though I'm still curious if there are still no-threaded 
platforms taking new versions of git.  Perhaps we should do the 
depreciation warning you suggested elsewhere and see how much push back 
we get.  It's unlikely we'd get lucky and be able to stop supporting 
them completely but it's worth asking!



@@ -74,4 +79,29 @@ int init_recursive_mutex(pthread_mutex_t *m)
pthread_mutexattr_destroy();
}
return ret;
+#else
+   return ENOSYS;
+#endif
+}


I suspect some of these ENOSYS could just become a silent success.
("yep, I initialized your dummy mutex"). But it probably doesn't matter
much either way, as we would not generally even bother checking this
return.


+#ifdef NO_PTHREADS
+int dummy_pthread_create(pthread_t *pthread, const void *attr,
+void *(*fn)(void *), void *data)
+{
+   return ENOSYS;
  }


Whereas for this one, ENOSYS makes a lot of sense (we should avoid the
threaded code-path anyway when we see that online_cpus()==1, and this
would let us know when we mess that up).



This highlights something anyone writing multi-threaded code will need 
to pay attention to that wasn't an issue before.  If you attempt to 
create more threads than online_cpus(), the pthread_create() call will 
fail and needs to be handled gracefully.


One example of this is in preload-index.c where (up to) 20 threads are 
created irrespective of what online_cpus() returns and if 
pthread_create() fails, it just dies.  The logic would need to be 
updated for this to work correctly.


I still think this is a much simpler issue to deal with than what we 
have today with having to write/debug multiple code paths but I did want 
to point it out for completeness.



+int dummy_pthread_init(void *data)
+{
+   /*
+* Do nothing.
+*
+* The main purpose of this function is to break compiler's
+* flow analysis or it may realize that functions like
+* pthread_mutex_init() is no-op, which means the (static)
+* variable is not used/initialized at all and trigger
+* 

Re: [PATCH v4 2/3] reset: add new reset.quiet config setting

2018-10-25 Thread Ben Peart




On 10/25/2018 5:26 AM, Junio C Hamano wrote:

Junio C Hamano  writes:


To be honest, I find the second sentence in your rewrite even more
confusing.  It reads as if `reset.quiet` configuration variable
can be used to restore the "show what is yet to be added"
behaviour, due to the parenthetical mention of the default behaviour
without any configuration.

The command reports what is yet to be added to the index
after `reset` by default.  It can be made to only report
errors with the `--quiet` option, or setting `reset.quiet`
configuration variable to `true` (the latter can be
overriden with `--no-quiet`).

That may not be much better, though X-<.


In any case, the comments are getting closer to the bikeshedding
territory, that can be easily addressed incrementally.  I am getting
the impression that everbody agrees that the change is desirable,
sufficiently documented and properly implemented.

Shall we mark it for "Will merge to 'next'" in the what's cooking
report and leave further refinements to incremental updates as
needed?



While not great, I think it is good enough.  I don't think either of the 
last couple of rewrite attempts were clearly better than what is in the 
latest patch. I'd agree we should merge to 'next' and if someone comes 
up with something great, we can update it then.


Re: [PATCH v1] load_cache_entries_threaded: remove unused src_offset parameter

2018-10-23 Thread Ben Peart




On 10/22/2018 7:05 PM, Junio C Hamano wrote:

Jeff King  writes:


If nobody uses it, should we drop the return value, too? Like:


Yup.



I'm good with that.

At one point I also had the additional #ifndef NO_PTHREADS lines but it 
was starting to get messy with the threaded vs non-threaded code paths 
so I removed them.  I'm fine with which ever people find more readable.


It does make me wonder if there are still platforms taking new build of 
git that don't support threads.  Do we still need to 
write/test/debug/read through the single threaded code paths?


Is the diff Peff sent enough or do you want me to send another iteration 
on the patch?


Thanks,

Ben



diff --git a/read-cache.c b/read-cache.c
index 78c9516eb7..4b44a2eae5 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2052,12 +2052,11 @@ static void *load_cache_entries_thread(void *_data)
return NULL;
  }
  
-static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size,

-int nr_threads, struct 
index_entry_offset_table *ieot)
+static void load_cache_entries_threaded(struct index_state *istate, const char 
*mmap, size_t mmap_size,
+   int nr_threads, struct 
index_entry_offset_table *ieot)
  {
int i, offset, ieot_blocks, ieot_start, err;
struct load_cache_entries_thread_data *data;
-   unsigned long consumed = 0;
  
  	/* a little sanity checking */

if (istate->name_hash_initialized)
@@ -2115,12 +2114,9 @@ static unsigned long load_cache_entries_threaded(struct 
index_state *istate, con
if (err)
die(_("unable to join load_cache_entries thread: %s"), 
strerror(err));
mem_pool_combine(istate->ce_mem_pool, p->ce_mem_pool);
-   consumed += p->consumed;
}
  
  	free(data);

-
-   return consumed;
  }
  #endif
  


-Peff


[PATCH v4 0/3] speed up git reset

2018-10-23 Thread Ben Peart
From: Ben Peart 

Updated the wording in the documentation and commit messages to (hopefully)
make it clearer. Added the warning about 'reset --quiet' to the advice
system so that it can be turned off.

Base Ref: 
Web-Diff: https://github.com/benpeart/git/commit/8a2fef45d4
Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v4 && 
git checkout 8a2fef45d4


### Patches

Ben Peart (3):
  reset: don't compute unstaged changes after reset when --quiet
  reset: add new reset.quiet config setting
  reset: warn when refresh_index() takes more than 2 seconds

 Documentation/config.txt|  7 +++
 Documentation/git-reset.txt |  5 -
 advice.c|  2 ++
 advice.h|  1 +
 builtin/reset.c | 15 ++-
 5 files changed, 28 insertions(+), 2 deletions(-)


base-commit: ca63497355222acefcca02b9cbb540a4768f3286
-- 
2.18.0.windows.1




[PATCH v4 1/3] reset: don't compute unstaged changes after reset when --quiet

2018-10-23 Thread Ben Peart
From: Ben Peart 

When git reset is run with the --quiet flag, don't bother finding any
additional unstaged changes as they won't be output anyway.  This speeds up
the git reset command by avoiding having to lstat() every file looking for
changes that aren't going to be reported anyway.

The savings can be significant.  In a repo on Windows with 200K files
"git reset" drops from 7.16 seconds to 0.32 seconds for a savings of 96%.

Signed-off-by: Ben Peart 
---
 builtin/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/reset.c b/builtin/reset.c
index 11cd0dcb8c..04f0d9b4f5 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -375,7 +375,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (get_git_work_tree())
+   if (!quiet && get_git_work_tree())
refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
} else {
-- 
2.18.0.windows.1



[PATCH v4 3/3] reset: warn when refresh_index() takes more than 2 seconds

2018-10-23 Thread Ben Peart
From: Ben Peart 

refresh_index() is done after a reset command as an optimization.  Because
it can be an expensive call, warn the user if it takes more than 2 seconds
and tell them how to avoid it using the --quiet command line option or
reset.quiet config setting.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt |  4 
 advice.c |  2 ++
 advice.h |  1 +
 builtin/reset.c  | 14 +-
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index a2d1b8b116..415db31def 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -333,6 +333,10 @@ advice.*::
commitBeforeMerge::
Advice shown when linkgit:git-merge[1] refuses to
merge to avoid overwriting local changes.
+   resetQuiet::
+   Advice to consider using the `--quiet` option to 
linkgit:git-reset[1]
+   when the command takes more than 2 seconds to enumerate unstaged
+   changes after reset.
resolveConflict::
Advice shown by various commands when conflicts
prevent the operation from being performed.
diff --git a/advice.c b/advice.c
index 3561cd64e9..5f35656409 100644
--- a/advice.c
+++ b/advice.c
@@ -12,6 +12,7 @@ int advice_push_needs_force = 1;
 int advice_status_hints = 1;
 int advice_status_u_option = 1;
 int advice_commit_before_merge = 1;
+int advice_reset_quiet_warning = 1;
 int advice_resolve_conflict = 1;
 int advice_implicit_identity = 1;
 int advice_detached_head = 1;
@@ -65,6 +66,7 @@ static struct {
{ "statusHints", _status_hints },
{ "statusUoption", _status_u_option },
{ "commitBeforeMerge", _commit_before_merge },
+   { "resetQuiet", _reset_quiet_warning },
{ "resolveConflict", _resolve_conflict },
{ "implicitIdentity", _implicit_identity },
{ "detachedHead", _detached_head },
diff --git a/advice.h b/advice.h
index ab24df0fd0..696bf0e7d2 100644
--- a/advice.h
+++ b/advice.h
@@ -12,6 +12,7 @@ extern int advice_push_needs_force;
 extern int advice_status_hints;
 extern int advice_status_u_option;
 extern int advice_commit_before_merge;
+extern int advice_reset_quiet_warning;
 extern int advice_resolve_conflict;
 extern int advice_implicit_identity;
 extern int advice_detached_head;
diff --git a/builtin/reset.c b/builtin/reset.c
index 3b43aee544..b31a0eae8a 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -25,6 +25,8 @@
 #include "submodule.h"
 #include "submodule-config.h"
 
+#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000)
+
 static const char * const git_reset_usage[] = {
N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] 
[]"),
N_("git reset [-q] [] [--] ..."),
@@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (!quiet && get_git_work_tree())
+   if (!quiet && get_git_work_tree()) {
+   uint64_t t_begin, t_delta_in_ms;
+
+   t_begin = getnanotime();
refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
+   t_delta_in_ms = (getnanotime() - t_begin) / 
100;
+   if (advice_reset_quiet_warning && t_delta_in_ms 
> REFRESH_INDEX_DELAY_WARNING_IN_MS) {
+   printf(_("\nIt took %.2f seconds to 
enumerate unstaged changes after reset.  You can\n"
+   "use '--quiet' to avoid this.  
Set the config setting reset.quiet to true\n"
+   "to make this the default.\n"), 
t_delta_in_ms / 1000.0);
+   }
+   }
} else {
int err = reset_index(, reset_type, quiet);
if (reset_type == KEEP && !err)
-- 
2.18.0.windows.1



[PATCH v4 2/3] reset: add new reset.quiet config setting

2018-10-23 Thread Ben Peart
From: Ben Peart 

Add a reset.quiet config setting that sets the default value of the --quiet
flag when running the reset command.  This enables users to change the
default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt| 3 +++
 Documentation/git-reset.txt | 5 -
 builtin/reset.c | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f6f4c21a54..a2d1b8b116 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2728,6 +2728,9 @@ rerere.enabled::
`$GIT_DIR`, e.g. if "rerere" was previously used in the
repository.
 
+reset.quiet::
+   When set to true, 'git reset' will default to the '--quiet' option.
+
 include::sendemail-config.txt[]
 
 sequence.editor::
diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt
index 1d697d9962..2dac95c71a 100644
--- a/Documentation/git-reset.txt
+++ b/Documentation/git-reset.txt
@@ -95,7 +95,10 @@ OPTIONS
 
 -q::
 --quiet::
-   Be quiet, only report errors.
+--no-quiet::
+   Be quiet, only report errors. The default behavior is set by the
+   `reset.quiet` config option. `--quiet` and `--no-quiet` will
+   override the default behavior.
 
 
 EXAMPLES
diff --git a/builtin/reset.c b/builtin/reset.c
index 04f0d9b4f5..3b43aee544 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
};
 
git_config(git_reset_config, NULL);
+   git_config_get_bool("reset.quiet", );
 
argc = parse_options(argc, argv, prefix, options, git_reset_usage,
PARSE_OPT_KEEP_DASHDASH);
-- 
2.18.0.windows.1



Re: [PATCH v3 2/3] reset: add new reset.quiet config setting

2018-10-23 Thread Ben Peart




On 10/22/2018 10:45 AM, Duy Nguyen wrote:

On Mon, Oct 22, 2018 at 3:38 PM Ben Peart  wrote:


From: Ben Peart 

Add a reset.quiet config setting that sets the default value of the --quiet
flag when running the reset command.  This enables users to change the
default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.

Signed-off-by: Ben Peart 
---
  Documentation/config.txt| 3 +++
  Documentation/git-reset.txt | 4 +++-
  builtin/reset.c | 1 +
  3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f6f4c21a54..a2d1b8b116 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2728,6 +2728,9 @@ rerere.enabled::
 `$GIT_DIR`, e.g. if "rerere" was previously used in the
 repository.

+reset.quiet::
+   When set to true, 'git reset' will default to the '--quiet' option.
+


With 'nd/config-split' topic moving pretty much all config keys out of
config.txt, you probably want to do the same for this series: add this
in a new file called Documentation/reset-config.txt then include the
file here like the sendemail one below.



Seems a bit overkill to pull a line of documentation into a separate 
file and replace it with a line of 'import' logic.  Perhaps if/when 
there is more documentation to pull out that would make more sense.



  include::sendemail-config.txt[]

  sequence.editor::


Re: [PATCH v1 2/2] reset: add new reset.quietDefault config setting

2018-10-23 Thread Ben Peart




On 10/23/2018 5:13 AM, Ævar Arnfjörð Bjarmason wrote:


On Wed, Oct 17 2018, Jeff King wrote:


On Wed, Oct 17, 2018 at 02:19:59PM -0400, Eric Sunshine wrote:


On Wed, Oct 17, 2018 at 12:40 PM Ben Peart  wrote:

Add a reset.quietDefault config setting that sets the default value of the
--quiet flag when running the reset command.  This enables users to change
the default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.


As with the previous patch, my knee-jerk reaction is that this really
feels wrong being tied to --quiet. It's particularly unintuitive.

What I _could_ see, and what would feel more natural is if you add a
new option (say, --optimize) which is more general, incorporating
whatever optimizations become available in the future, not just this
one special-case. A side-effect of --optimize is that it implies
--quiet, and that is something which can and should be documented.


Heh, I just wrote something very similar elsewhere in the thread. I'm
still not sure if it's a dumb idea, but at least we can be dumb
together.


Same here. I'm in general if favor of having the ability to configure
porcelain command-line options, but in this case it seems like it would
be more logical to head for something like:

 core.uiMessaging=[default,exhaustive,lossyButFaster,quiet]

Where default would be our current "exhaustive", and this --quiet case
would be covered by lossyButFaster, but also things like the
"--no-ahead-behind" flag for git-status.



This sounds like an easy way to choose a set of default values that we 
think make sense to get bundled together. That could be a way for users 
to quickly choose a set of good defaults but I still think you would 
want find grained control over the individual settings.


Coming up with the set of values to bundle together, figuring out the 
hierarchy of precedence for this new global config->individual 
config->individual command line, updating the code to make it all work 
is outside the scope of this particular patch series.



Just on this implementation: The usual idiom for flags as config is
command.flag=xyz, not command.flagDefault=xyz, so this should be
reset.quiet.



Thanks, I agree and fixed that in later iterations.


Re: [PATCH v3 2/3] reset: add new reset.quiet config setting

2018-10-23 Thread Ben Peart




On 10/22/2018 4:06 PM, Jeff King wrote:

On Mon, Oct 22, 2018 at 08:13:32PM +0100, Ramsay Jones wrote:


  -q::
  --quiet::
-   Be quiet, only report errors.
+--no-quiet::
+   Be quiet, only report errors. The default behavior respects the
+   `reset.quiet` config option, or `--no-quiet` if that is not set.


Sorry, I can't quite parse this; -q,--quiet and --no-quiet on the
command line (should) trump whatever rest.quiet is set to in the
configuration. Is that not the case?


That is the case, and what was meant by "the default behavior" (i.e.,
the behavior when none of these is used). Maybe there's a more clear way
of saying that.

-Peff



Is this more clear?

-q::
--quiet::
--no-quiet::
Be quiet, only report errors. The default behavior is set by the
`reset.quiet` config option. `--quiet` and `--no-quiet` will
overwrite the default behavior.


Re: [PATCH v3 3/3] reset: warn when refresh_index() takes more than 2 seconds

2018-10-23 Thread Ben Peart




On 10/22/2018 8:23 PM, Junio C Hamano wrote:

Ben Peart  writes:


From: Ben Peart 

refresh_index() is done after a reset command as an optimization.  Because
it can be an expensive call, warn the user if it takes more than 2 seconds
and tell them how to avoid it using the --quiet command line option or
reset.quiet config setting.


I am moderately negative on this step.  It will irritate users who
know about and still choose not to use the "--quiet" option, because
they want to gain performance in later real work and/or they want to
know what paths are now dirty.  A working tree that needs long time
to refresh will take long time to instead do "cached stat info says
it may be modified so let's run 'diff' for real---we may discover
that there wasn't any change after all" when a "git diff" is run
after a "reset --quiet" that does not refresh; i.e. there would be
valid reasons to run "reset" without "--quiet".

It feels a bit irresponsible to throw an ad without informing
pros-and-cons and to pretend that we are advising on BCP.  In
general, we do *not* advertise new features randomly like this.

Thanks.  The previous two steps looks quite sensible.



The challenge I'm trying to address is the discoverability of this 
significant performance win.  In earlier review feedback, all mention of 
this option speeding up reset was removed.  I added this patch to enable 
users to find out it even exists as an option.


While I modeled this on the untracked files/--uno and ahead/behind 
logic, I missed adding this to the 'advice' logic so that it can be 
turned off and avoid irritating users.  I'll send an updated patch that 
corrects that.


RE: [PATCH v3 1/3] reset: don't compute unstaged changes after reset when --quiet

2018-10-22 Thread Ben Peart
> -Original Message-
> From: Johannes Schindelin 
> Sent: Monday, October 22, 2018 4:45 PM
> To: Ben Peart 
> Cc: git@vger.kernel.org; gits...@pobox.com; Ben Peart
> ; p...@peff.net; sunsh...@sunshineco.com
> Subject: Re: [PATCH v3 1/3] reset: don't compute unstaged changes after
> reset when --quiet
> 
> Hi Ben,
> 
> On Mon, 22 Oct 2018, Ben Peart wrote:
> 
> > From: Ben Peart 
> >
> > When git reset is run with the --quiet flag, don't bother finding any
> > additional unstaged changes as they won't be output anyway.  This speeds
> up
> > the git reset command by avoiding having to lstat() every file looking for
> > changes that aren't going to be reported anyway.
> >
> > The savings can be significant.  In a repo with 200K files "git reset"
> > drops from 7.16 seconds to 0.32 seconds for a savings of 96%.
> 
> That's very nice!
> 
> Those numbers, just out of curiosity, are they on Windows? Or on Linux?
> 

It's safe to assume all my numbers are on Windows. :-)

> Ciao,
> Dscho




[PATCH v1] load_cache_entries_threaded: remove unused src_offset parameter

2018-10-22 Thread Ben Peart
From: Ben Peart 

Remove the src_offset parameter which is unused as a result of switching
to the IEOT table of offsets.  Also stop incrementing src_offset in the
multi-threaded codepath as it is no longer used and could cause confusion.

Signed-off-by: Ben Peart 
---

Notes:
Base Ref:
Web-Diff: https://github.com/benpeart/git/commit/7360721408
Checkout: git fetch https://github.com/benpeart/git 
read-index-multithread-no-src-offset-v1 && git checkout 7360721408

 read-cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index f9fa6a7979..6db6f0f220 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2037,7 +2037,7 @@ static void *load_cache_entries_thread(void *_data)
 }
 
 static unsigned long load_cache_entries_threaded(struct index_state *istate, 
const char *mmap, size_t mmap_size,
-   unsigned long src_offset, int nr_threads, struct 
index_entry_offset_table *ieot)
+   int nr_threads, struct index_entry_offset_table *ieot)
 {
int i, offset, ieot_blocks, ieot_start, err;
struct load_cache_entries_thread_data *data;
@@ -2198,7 +2198,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
ieot = read_ieot_extension(mmap, mmap_size, extension_offset);
 
if (ieot) {
-   src_offset += load_cache_entries_threaded(istate, mmap, 
mmap_size, src_offset, nr_threads, ieot);
+   load_cache_entries_threaded(istate, mmap, mmap_size, 
nr_threads, ieot);
free(ieot);
} else {
src_offset += load_all_cache_entries(istate, mmap, mmap_size, 
src_offset);

base-commit: f58b85df6937e3f3d9ef26bb52a513c8ada17ffc
-- 
2.18.0.windows.1



Re: [PATCH v8 7/7] read-cache: load cache entries on worker threads

2018-10-22 Thread Ben Peart




On 10/21/2018 10:14 PM, Junio C Hamano wrote:

Jeff King  writes:


On Wed, Oct 10, 2018 at 11:59:38AM -0400, Ben Peart wrote:


+static unsigned long load_cache_entries_threaded(struct index_state *istate, 
const char *mmap, size_t mmap_size,
+   unsigned long src_offset, int nr_threads, struct 
index_entry_offset_table *ieot)


The src_offset parameter isn't used in this function.

In early versions of the series, it was used to feed the p->start_offset
field of each load_cache_entries_thread_data. But after the switch to
ieot, we don't, and instead feed p->ieot_start. But we always begin that
at 0.

Is that right (and we can drop the parameter), or should this logic:


+   offset = ieot_start = 0;
+   ieot_blocks = DIV_ROUND_UP(ieot->nr, nr_threads);
+   for (i = 0; i < nr_threads; i++) {
[...]


be starting at src_offset instead of 0?


I think "offset" has nothing to do with the offset into the mmapped
region of memory.  It is an integer index into a (virtual) array
that is a concatenation of ieot->entries[].entries[], and it is
correct to count from zero.  The value taken from that array using
the index is used to compute the offset into the mmapped region.

Unlike load_all_cache_entries() called from the other side of the
same if() statement in the same caller, this does not depend on the
fact that the first index entry in the mmapped region appears
immediately after the index-file header.  It goes from the offsets
into the file that are recorded in the entry offset table that is an
index extension, so the sizeof(*hdr) that initializes src_offset is
not used by the codepath.

The number of bytes consumed, i.e. its return value from the
function, is not really used, either, as the caller does not use
src_offset for anything other than updating it with "+=" and passing
it to this function (which does not use it) when it calls this
function (i.e. when ieot extension exists--and by definition when
that extension exists extension_offset is not 0, so we do not make
the final load_index_extensions() call in the caller that uses
src_offset).



Thanks for discovering/analyzing this.  You're right, I missed removing 
this when we switched from a single offset to an array of offsets via 
the IEOT.  I'll send a patch to fix both issues shortly.


[PATCH v3 2/3] reset: add new reset.quiet config setting

2018-10-22 Thread Ben Peart
From: Ben Peart 

Add a reset.quiet config setting that sets the default value of the --quiet
flag when running the reset command.  This enables users to change the
default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt| 3 +++
 Documentation/git-reset.txt | 4 +++-
 builtin/reset.c | 1 +
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f6f4c21a54..a2d1b8b116 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2728,6 +2728,9 @@ rerere.enabled::
`$GIT_DIR`, e.g. if "rerere" was previously used in the
repository.
 
+reset.quiet::
+   When set to true, 'git reset' will default to the '--quiet' option.
+
 include::sendemail-config.txt[]
 
 sequence.editor::
diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt
index 1d697d9962..51a427a34a 100644
--- a/Documentation/git-reset.txt
+++ b/Documentation/git-reset.txt
@@ -95,7 +95,9 @@ OPTIONS
 
 -q::
 --quiet::
-   Be quiet, only report errors.
+--no-quiet::
+   Be quiet, only report errors. The default behavior respects the
+   `reset.quiet` config option, or `--no-quiet` if that is not set.
 
 
 EXAMPLES
diff --git a/builtin/reset.c b/builtin/reset.c
index 04f0d9b4f5..3b43aee544 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
};
 
git_config(git_reset_config, NULL);
+   git_config_get_bool("reset.quiet", );
 
argc = parse_options(argc, argv, prefix, options, git_reset_usage,
PARSE_OPT_KEEP_DASHDASH);
-- 
2.18.0.windows.1



[PATCH v3 1/3] reset: don't compute unstaged changes after reset when --quiet

2018-10-22 Thread Ben Peart
From: Ben Peart 

When git reset is run with the --quiet flag, don't bother finding any
additional unstaged changes as they won't be output anyway.  This speeds up
the git reset command by avoiding having to lstat() every file looking for
changes that aren't going to be reported anyway.

The savings can be significant.  In a repo with 200K files "git reset"
drops from 7.16 seconds to 0.32 seconds for a savings of 96%.

Signed-off-by: Ben Peart 
---
 builtin/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/reset.c b/builtin/reset.c
index 11cd0dcb8c..04f0d9b4f5 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -375,7 +375,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (get_git_work_tree())
+   if (!quiet && get_git_work_tree())
refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
} else {
-- 
2.18.0.windows.1



[PATCH v3 3/3] reset: warn when refresh_index() takes more than 2 seconds

2018-10-22 Thread Ben Peart
From: Ben Peart 

refresh_index() is done after a reset command as an optimization.  Because
it can be an expensive call, warn the user if it takes more than 2 seconds
and tell them how to avoid it using the --quiet command line option or
reset.quiet config setting.

Signed-off-by: Ben Peart 
---
 builtin/reset.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/builtin/reset.c b/builtin/reset.c
index 3b43aee544..d95a27d52e 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -25,6 +25,8 @@
 #include "submodule.h"
 #include "submodule-config.h"
 
+#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000)
+
 static const char * const git_reset_usage[] = {
N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] 
[]"),
N_("git reset [-q] [] [--] ..."),
@@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (!quiet && get_git_work_tree())
+   if (!quiet && get_git_work_tree()) {
+   uint64_t t_begin, t_delta_in_ms;
+
+   t_begin = getnanotime();
refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
+   t_delta_in_ms = (getnanotime() - t_begin) / 
100;
+   if (t_delta_in_ms > 
REFRESH_INDEX_DELAY_WARNING_IN_MS) {
+   printf(_("\nIt took %.2f seconds to 
enumerate unstaged changes after reset.  You can\n"
+   "use '--quiet' to avoid this.  
Set the config setting reset.quiet to true\n"
+   "to make this the default."), 
t_delta_in_ms / 1000.0);
+   }
+   }
} else {
int err = reset_index(, reset_type, quiet);
if (reset_type == KEEP && !err)
-- 
2.18.0.windows.1



[PATCH v3 0/3] speed up git reset

2018-10-22 Thread Ben Peart
From: Ben Peart 

Reworded the documentation for git-reset per review feedback.

Base Ref: 
Web-Diff: https://github.com/benpeart/git/commit/1228898917
Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v3 && 
git checkout 1228898917


### Interdiff (v2..v3):

diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt
index 1d697d9962..51a427a34a 100644
--- a/Documentation/git-reset.txt
+++ b/Documentation/git-reset.txt
@@ -95,7 +95,9 @@ OPTIONS
 
 -q::
 --quiet::
-   Be quiet, only report errors.
+--no-quiet::
+   Be quiet, only report errors. The default behavior respects the
+   `reset.quiet` config option, or `--no-quiet` if that is not set.
 
 
 EXAMPLES


### Patches

Ben Peart (3):
  reset: don't compute unstaged changes after reset when --quiet
  reset: add new reset.quiet config setting
  reset: warn when refresh_index() takes more than 2 seconds

 Documentation/config.txt|  3 +++
 Documentation/git-reset.txt |  4 +++-
 builtin/reset.c | 15 ++-
 3 files changed, 20 insertions(+), 2 deletions(-)


base-commit: ca63497355222acefcca02b9cbb540a4768f3286
-- 
2.18.0.windows.1




Re: [PATCH v2 2/3] reset: add new reset.quiet config setting

2018-10-19 Thread Ben Peart




On 10/19/2018 1:11 PM, Jeff King wrote:

On Fri, Oct 19, 2018 at 01:10:34PM -0400, Eric Sunshine wrote:


On Fri, Oct 19, 2018 at 12:46 PM Jeff King  wrote:

On Fri, Oct 19, 2018 at 12:36:44PM -0400, Eric Sunshine wrote:

How does the user reverse this for a particular git-reset invocation?
There is no --no-quiet or --verbose option.

Perhaps you want to use OPT__VERBOSITY() instead of OPT__QUIET() in
builtin/reset.c and document that --verbose overrides --quiet and
reset.quiet (or something like that).


I think OPT__QUIET() provides --no-quiet, since it's really an
OPT_COUNTUP() under the hood. Saying "--no-quiet" should reset it back
to 0.


Okay. In any case, --no-quiet probably ought to be mentioned alongside
the "reset.quiet" option (and perhaps in git-reset.txt to as a way to
reverse "reset.quiet").


Yes, I'd agree with that.

-Peff



Makes sense.  I'll update the docs to say:

-q::
--quiet::
--no-quiet::
Be quiet, only report errors.
+
With --no-quiet report errors and unstaged changes after reset.


Re: [PATCH v2 2/3] reset: add new reset.quiet config setting

2018-10-19 Thread Ben Peart




On 10/19/2018 12:46 PM, Jeff King wrote:

On Fri, Oct 19, 2018 at 12:36:44PM -0400, Eric Sunshine wrote:


On Fri, Oct 19, 2018 at 12:12 PM Ben Peart  wrote:

Add a reset.quiet config setting that sets the default value of the --quiet
flag when running the reset command.  This enables users to change the
default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.

Signed-off-by: Ben Peart 
---
diff --git a/Documentation/config.txt b/Documentation/config.txt
@@ -2728,6 +2728,9 @@ rerere.enabled::
+reset.quiet::
+   When set to true, 'git reset' will default to the '--quiet' option.


How does the user reverse this for a particular git-reset invocation?
There is no --no-quiet or --verbose option.

Perhaps you want to use OPT__VERBOSITY() instead of OPT__QUIET() in
builtin/reset.c and document that --verbose overrides --quiet and
reset.quiet (or something like that).


I think OPT__QUIET() provides --no-quiet, since it's really an
OPT_COUNTUP() under the hood. Saying "--no-quiet" should reset it back
to 0.



Thanks Peff.  That is correct as confirmed by:


C:\Repos\VSO\src>git reset --no-quiet
Unstaged changes after reset:
M   init.ps1

It took 6.74 seconds to enumerate unstaged changes after reset.  You can
use '--quiet' to avoid this.  Set the config setting reset.quiet to true
to make this the default.



-Peff



[PATCH v2 1/3] reset: don't compute unstaged changes after reset when --quiet

2018-10-19 Thread Ben Peart
From: Ben Peart 

When git reset is run with the --quiet flag, don't bother finding any
additional unstaged changes as they won't be output anyway.  This speeds up
the git reset command by avoiding having to lstat() every file looking for
changes that aren't going to be reported anyway.

The savings can be significant.  In a repo with 200K files "git reset"
drops from 7.16 seconds to 0.32 seconds for a savings of 96%.

Signed-off-by: Ben Peart 
---
 builtin/reset.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/reset.c b/builtin/reset.c
index 11cd0dcb8c..04f0d9b4f5 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -375,7 +375,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (get_git_work_tree())
+   if (!quiet && get_git_work_tree())
refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
} else {
-- 
2.18.0.windows.1



[PATCH v2 3/3] reset: warn when refresh_index() takes more than 2 seconds

2018-10-19 Thread Ben Peart
From: Ben Peart 

refresh_index() is done after a reset command as an optimization.  Because
it can be an expensive call, warn the user if it takes more than 2 seconds
and tell them how to avoid it using the --quiet command line option or
reset.quiet config setting.

Signed-off-by: Ben Peart 
---
 builtin/reset.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/builtin/reset.c b/builtin/reset.c
index 3b43aee544..d95a27d52e 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -25,6 +25,8 @@
 #include "submodule.h"
 #include "submodule-config.h"
 
+#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000)
+
 static const char * const git_reset_usage[] = {
N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] 
[]"),
N_("git reset [-q] [] [--] ..."),
@@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (!quiet && get_git_work_tree())
+   if (!quiet && get_git_work_tree()) {
+   uint64_t t_begin, t_delta_in_ms;
+
+   t_begin = getnanotime();
refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
+   t_delta_in_ms = (getnanotime() - t_begin) / 
100;
+   if (t_delta_in_ms > 
REFRESH_INDEX_DELAY_WARNING_IN_MS) {
+   printf(_("\nIt took %.2f seconds to 
enumerate unstaged changes after reset.  You can\n"
+   "use '--quiet' to avoid this.  
Set the config setting reset.quiet to true\n"
+   "to make this the default."), 
t_delta_in_ms / 1000.0);
+   }
+   }
} else {
int err = reset_index(, reset_type, quiet);
if (reset_type == KEEP && !err)
-- 
2.18.0.windows.1



[PATCH v2 2/3] reset: add new reset.quiet config setting

2018-10-19 Thread Ben Peart
From: Ben Peart 

Add a reset.quiet config setting that sets the default value of the --quiet
flag when running the reset command.  This enables users to change the
default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt | 3 +++
 builtin/reset.c  | 1 +
 2 files changed, 4 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f6f4c21a54..a2d1b8b116 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2728,6 +2728,9 @@ rerere.enabled::
`$GIT_DIR`, e.g. if "rerere" was previously used in the
repository.
 
+reset.quiet::
+   When set to true, 'git reset' will default to the '--quiet' option.
+
 include::sendemail-config.txt[]
 
 sequence.editor::
diff --git a/builtin/reset.c b/builtin/reset.c
index 04f0d9b4f5..3b43aee544 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
};
 
git_config(git_reset_config, NULL);
+   git_config_get_bool("reset.quiet", );
 
argc = parse_options(argc, argv, prefix, options, git_reset_usage,
PARSE_OPT_KEEP_DASHDASH);
-- 
2.18.0.windows.1



[PATCH v2 0/3] speed up git reset

2018-10-19 Thread Ben Peart
From: Ben Peart 

This itteration avoids the refresh_index() call completely if 'quiet'.
The advantage of this is that "git refresh" without any pathspec is also
significantly sped up.

Also added a notification if finding unstaged changes after reset takes
longer than 2 seconds to make users aware of the option to speed it up if
they don't need the unstaged changes after reset to be output.

It also renames the new config setting reset.quietDefault to reset.quiet.

Base Ref: 
Web-Diff: https://github.com/benpeart/git/commit/50d3415ef1
Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v2 && 
git checkout 50d3415ef1


### Interdiff (v1..v2):

diff --git a/Documentation/config.txt b/Documentation/config.txt
index a5cf4c019b..a2d1b8b116 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2728,11 +2728,8 @@ rerere.enabled::
`$GIT_DIR`, e.g. if "rerere" was previously used in the
repository.
 
-reset.quietDefault::
-   Sets the default value of the "quiet" option for the reset command.
-   Choosing "quiet" can optimize the performance of the reset command
-   by avoiding the scan of all files in the repo looking for additional
-   unstaged changes. Defaults to false.
+reset.quiet::
+   When set to true, 'git reset' will default to the '--quiet' option.
 
 include::sendemail-config.txt[]
 
diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt
index 8610309b55..1d697d9962 100644
--- a/Documentation/git-reset.txt
+++ b/Documentation/git-reset.txt
@@ -95,9 +95,7 @@ OPTIONS
 
 -q::
 --quiet::
-   Be quiet, only report errors.  Can optimize the performance of reset
-   by avoiding scaning all files in the repo looking for additional
-   unstaged changes.
+   Be quiet, only report errors.
 
 
 EXAMPLES
diff --git a/builtin/reset.c b/builtin/reset.c
index 7d151d48a0..d95a27d52e 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -25,6 +25,8 @@
 #include "submodule.h"
 #include "submodule-config.h"
 
+#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000)
+
 static const char * const git_reset_usage[] = {
N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] 
[]"),
N_("git reset [-q] [] [--] ..."),
@@ -306,7 +308,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
};
 
git_config(git_reset_config, NULL);
-   git_config_get_bool("reset.quietDefault", );
+   git_config_get_bool("reset.quiet", );
 
argc = parse_options(argc, argv, prefix, options, git_reset_usage,
PARSE_OPT_KEEP_DASHDASH);
@@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
int flags = quiet ? REFRESH_QUIET : 
REFRESH_IN_PORCELAIN;
if (read_from_tree(, , intent_to_add))
return 1;
-   if (get_git_work_tree())
-   refresh_index(_index, flags, quiet ? 
 : NULL, NULL,
+   if (!quiet && get_git_work_tree()) {
+   uint64_t t_begin, t_delta_in_ms;
+
+   t_begin = getnanotime();
+   refresh_index(_index, flags, NULL, NULL,
  _("Unstaged changes after 
reset:"));
+   t_delta_in_ms = (getnanotime() - t_begin) / 
100;
+   if (t_delta_in_ms > 
REFRESH_INDEX_DELAY_WARNING_IN_MS) {
+   printf(_("\nIt took %.2f seconds to 
enumerate unstaged changes after reset.  You can\n"
+   "use '--quiet' to avoid this.  
Set the config setting reset.quiet to true\n"
+   "to make this the default."), 
t_delta_in_ms / 1000.0);
+   }
+   }
        } else {
int err = reset_index(, reset_type, quiet);
if (reset_type == KEEP && !err)


### Patches

Ben Peart (3):
  reset: don't compute unstaged changes after reset when --quiet
  reset: add new reset.quiet config setting
  reset: warn when refresh_index() takes more than 2 seconds

 Documentation/config.txt |  3 +++
 builtin/reset.c  | 15 ++-
 2 files changed, 17 insertions(+), 1 deletion(-)


base-commit: ca63497355222acefcca02b9cbb540a4768f3286
-- 
2.18.0.windows.1




Re: [PATCH v1 1/2] reset: don't compute unstaged changes after reset when --quiet

2018-10-18 Thread Ben Peart




On 10/18/2018 2:26 PM, Duy Nguyen wrote:

On Thu, Oct 18, 2018 at 8:18 PM Ben Peart  wrote:

I actually started my effort to speed up reset by attempting to
multi-thread refresh_index().  You can see a work in progress at:

https://github.com/benpeart/git/pull/new/refresh-index-multithread-gvfs

The patch doesn't always work as it is still not thread safe.  When it
works, it's great but I ran into to many difficulties trying to debug
the remaining threading issues (even adding print statements would
change the timing and the repro would disappear).  It will take a lot of
code review to discover and fix the remaining non-thread safe code paths.

In addition, the optimized code path that takes advantage of fsmonitor,
uses multiple threads, fscache, etc _already exists_ in preload_index().
   Trying to recreate all those optimizations in refresh_index() is (as I
discovered) a daunting task.


Why not make refresh_index() run preload_index() first (or the
parallel lstat part to be precise), and only do the heavy
content-based refresh in single thread mode?



Head smack! Why didn't I think of that?

That is a terrific suggestion.  Calling preload_index() right before the 
big for loop in refresh_index() is a trivial and effective way to do the 
bulk of the updating with the optimized code.  After doing that, most of 
the cache entries can bail out quickly down in refresh_cache_ent() when 
it tests ce_uptodate(ce).


Here are the numbers using that optimization (hot cache, averaged across 
3 runs):


0.32 git add asdf
1.67 git reset asdf
1.68 git status
3.67 Total

vs without it:

0.32 git add asdf
2.48 git reset asdf
1.50 git status
4.30 Total

For a savings in the reset command of 32% and 15% overall.

Clearly doing the refresh_index() faster is not as much savings as not 
doing it at all.  Given how simple this patch is, I think it makes sense 
to do both so that we have optimized each path to is fullest.


Re: [PATCH v1 1/2] reset: don't compute unstaged changes after reset when --quiet

2018-10-18 Thread Ben Peart




On 10/18/2018 2:36 AM, Jeff King wrote:

On Thu, Oct 18, 2018 at 12:40:48PM +0900, Junio C Hamano wrote:


Jeff King  writes:


Whereas for the new config variable, you'd probably set it not because
you want it quiet all the time, but because you want to get some time
savings. So there it does make sense to me to explain.

Other than that, this seems like an obvious and easy win. It does feel a
little hacky (you're really losing something in the output, and ideally
we'd just be able to give that answer quickly), but this may be OK as a
hack in the interim.


After "git reset --quiet -- this/area/" with this change, any
operation you'd do next that needs to learn if working tree files
are different from what is recorded in the index outside that area
will have to spend more cycles, because the refresh done by "reset"
is now limited to the area.  So if your final goal is "make 'reset'
as fast as possible", this is an obvious and easy win.  For other
goals, i.e. "make the overall experience of using Git feel faster",
it is not so obvious to me, though.


The final goal is to make git faster (especially on larger repos) and 
this proposal accomplishes that.  Let's look at why that is.


By scoping down (or eliminating) what refresh_index() has to lstat() at 
the end of the reset command, clearly the reset command is faster.  Yes, 
the index isn't as "fresh" because not everything was updated but that 
doesn't typically impact the performance of subsequent commands.


On the next command, git still has to lstat() every file because it 
isn't sure what changes could have happened in the file system.  As a 
result, the overall impact is that we have had to lstat() every file one 
fewer times between the two commands.  A net win overall.


In addition, the preload_index() code that does the lstat() command is 
highly optimized across multiple threads (and on Windows takes advantage 
of the fscache).  This means that it can lstat() every file _much_ 
faster than the single threaded loop in refresh_index().  This also 
makes the overall performance of the pair of git commands faster as we 
got rid of the slow lstat() loop and kept the fast one.


Here are some numbers to demonstrate that.  These are hot cache numbers 
as they are easier to generate.  Cold cache numbers make the net perf 
win significantly better as the cost for the reset jumps from 2.43 
seconds to 7.16 seconds.


0.32 git add asdf
0.31 git -c reset.quiet=true reset asdf
1.34 git status
1.97 Total


0.32 git add asdf
2.43 git -c reset.quiet=false reset asdf
1.32 git status
4.07 Total

Note the status command after the reset doesn't really change as it 
still must lstat() every file (the 0.02 difference is well within the 
variability of run to run differences).


FWIW, none of these numbers are using fsmonitor.



There was additional discussion about whether this should be tied to the 
"--quiet" option and how it should be documented.


One option would be to change the default behavior of reset so that it 
doesn't do the refresh_index() call at all.  This speeds up reset by 
default so there are no user discoverability issues but changes the 
default behavior which is an issue.


Another option that was suggested was to add a separate flag that could 
be passed to reset so that the "quiet" and "fast" options don't get 
conflated.  I don't care for that option because the two options (at 
this point and for the foreseeable future) would be identical in 
behavior from the end users perspective.


It was also suggested that there be a single "fast and quiet" option for 
all of git instead of separate options for each command.  I worry about 
that because now we're forcing users to choose between the "fast and 
quiet" version of git and the "slow and noisy" version.  How do we help 
them decide which they want?  That seems difficult to explain so that 
they can make a rational choice and also hard to discover.  I also have 
to wonder who would say "give me the slow and noisy version please." :)


I'd prefer we systematically move towards a model where the default 
values that are chosen for various settings throughout the code are all 
configurable via settings.  All defaults by necessity make certain 
assumptions about user preference, data shape, machine performance, etc 
and if those assumptions don't match the user's environment then the 
hard coded defaults aren't appropriate.  We do our best but its going to 
be hit or miss.


A consistent way to be able to change those defaults would be very 
useful in those circumstances.  To be clear, I'm not proposing we do a 
wholesale update of our preferences model at this point in time - that 
seems like a significant undertaking and I don't want to tie this 
specific optimization to a potential change in how default settings work.



To move this forward, here is what I propose:

1) If the '--quiet' flag is passed, we silently take advantage of the 
fact we can avoid having to do an "extra" lstat() 

[PATCH v1 1/2] reset: don't compute unstaged changes after reset when --quiet

2018-10-17 Thread Ben Peart
From: Ben Peart 

When git reset is run with the --quiet flag, don't bother finding any
additional unstaged changes as they won't be output anyway.  This speeds up
the git reset command by avoiding having to lstat() every file looking for
changes that aren't going to be reported anyway.

The savings can be significant.  In a repo with 200K files "git reset"
drops from 7.16 seconds to 0.32 seconds for a savings of 96%.

Signed-off-by: Ben Peart 
---
 Documentation/git-reset.txt | 4 +++-
 builtin/reset.c | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt
index 1d697d9962..8610309b55 100644
--- a/Documentation/git-reset.txt
+++ b/Documentation/git-reset.txt
@@ -95,7 +95,9 @@ OPTIONS
 
 -q::
 --quiet::
-   Be quiet, only report errors.
+   Be quiet, only report errors.  Can optimize the performance of reset
+   by avoiding scaning all files in the repo looking for additional
+   unstaged changes.
 
 
 EXAMPLES
diff --git a/builtin/reset.c b/builtin/reset.c
index 11cd0dcb8c..0822798616 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -376,7 +376,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
if (read_from_tree(, , intent_to_add))
return 1;
if (get_git_work_tree())
-   refresh_index(_index, flags, NULL, NULL,
+   refresh_index(_index, flags, quiet ? 
 : NULL, NULL,
  _("Unstaged changes after 
reset:"));
} else {
int err = reset_index(, reset_type, quiet);
-- 
2.18.0.windows.1



[PATCH v1 0/2] speed up git reset

2018-10-17 Thread Ben Peart
From: Ben Peart 

The reset (mixed) command unstages the specified file(s) and then shows you
the remaining unstaged changes.  This can make the command slow on larger
repos because at the end it calls refresh_index() which has a single thread
that loops through all the entries calling lstat() for every file.

If the user passes the --quiet switch, reset doesn�t display the remaining
unstaged changes but it still does all the work to find them, it just
doesn�t print them out so passing "--quiet" doesn�t help performance.

This patch series will:

1) change the behavior of "git reset --quiet" so that it no longer computes
   the remaining unstaged changes.
   
2) add a new config setting so that "--quiet" can be configured as the default
   so that the default performance of "git reset" is improved.
   
The performance benefit of this can be significant.  In a repo with 200K files
"git reset foo" performance drops from 7.16 seconds to 0.32 seconds for a
savings of 96%.  Even with the small git repo, reset times drop from 0.191
seconds to 0.043 seconds for a savings of 77%.

Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/2295a310d0
Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v1 && 
git checkout 2295a310d0

Ben Peart (2):
  reset: don't compute unstaged changes after reset when --quiet
  reset: add new reset.quietDefault config setting

 Documentation/config.txt| 6 ++
 Documentation/git-reset.txt | 4 +++-
 builtin/reset.c | 3 ++-
 3 files changed, 11 insertions(+), 2 deletions(-)


base-commit: a4b8ab5363a32f283a61ef3a962853556d136c0e
-- 
2.18.0.windows.1




[PATCH v1 2/2] reset: add new reset.quietDefault config setting

2018-10-17 Thread Ben Peart
From: Ben Peart 

Add a reset.quietDefault config setting that sets the default value of the
--quiet flag when running the reset command.  This enables users to change
the default behavior to take advantage of the performance advantages of
avoiding the scan for unstaged changes after reset.  Defaults to false.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt | 6 ++
 builtin/reset.c  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index f6f4c21a54..a5cf4c019b 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2728,6 +2728,12 @@ rerere.enabled::
`$GIT_DIR`, e.g. if "rerere" was previously used in the
repository.
 
+reset.quietDefault::
+   Sets the default value of the "quiet" option for the reset command.
+   Choosing "quiet" can optimize the performance of the reset command
+   by avoiding the scan of all files in the repo looking for additional
+   unstaged changes. Defaults to false.
+
 include::sendemail-config.txt[]
 
 sequence.editor::
diff --git a/builtin/reset.c b/builtin/reset.c
index 0822798616..7d151d48a0 100644
--- a/builtin/reset.c
+++ b/builtin/reset.c
@@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char 
*prefix)
};
 
git_config(git_reset_config, NULL);
+   git_config_get_bool("reset.quietDefault", );
 
argc = parse_options(argc, argv, prefix, options, git_reset_usage,
PARSE_OPT_KEEP_DASHDASH);
-- 
2.18.0.windows.1



Re: [PATCH v8 0/7] speed up index load through parallelization

2018-10-15 Thread Ben Peart

fixup! IEOT error messages

Enable localizing new error messages and improve the error message for
invalid IEOT extension sizes.

Signed-off-by: Ben Peart 
---
 read-cache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 7acc2c86f4..f9fa6a7979 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -3480,7 +3480,7 @@ static struct index_entry_offset_table 
*read_ieot_extension(const char *mmap, si

/* validate the version is IEOT_VERSION */
ext_version = get_be32(index);
if (ext_version != IEOT_VERSION) {
-  error("invalid IEOT version %d", ext_version);
+  error(_("invalid IEOT version %d"), ext_version);
   return NULL;
}
index += sizeof(uint32_t);
@@ -3488,7 +3488,7 @@ static struct index_entry_offset_table 
*read_ieot_extension(const char *mmap, si

/* extension size - version bytes / bytes per entry */
nr = (extsize - sizeof(uint32_t)) / (sizeof(uint32_t) + 
sizeof(uint32_t));

if (!nr) {
-  error("invalid number of IEOT entries %d", nr);
+  error(_("invalid IEOT extension size %d"), extsize);
   return NULL;
}
ieot = xmalloc(sizeof(struct index_entry_offset_table)
--
2.18.0.windows.1



On 10/14/2018 8:28 AM, Duy Nguyen wrote:

On Wed, Oct 10, 2018 at 5:59 PM Ben Peart  wrote:

@@ -3460,14 +3479,18 @@ static struct index_entry_offset_table 
*read_ieot_extension(const char *mmap, si

 /* validate the version is IEOT_VERSION */
 ext_version = get_be32(index);
-   if (ext_version != IEOT_VERSION)
+   if (ext_version != IEOT_VERSION) {
+  error("invalid IEOT version %d", ext_version);


Please wrap this string in _() so that it can be translated.


return NULL;
+   }
 index += sizeof(uint32_t);

 /* extension size - version bytes / bytes per entry */
 nr = (extsize - sizeof(uint32_t)) / (sizeof(uint32_t) + 
sizeof(uint32_t));
-   if (!nr)
+   if (!nr) {
+  error("invalid number of IEOT entries %d", nr);


Ditto. And reporting extsize may be more useful than nr, which we know
is zero, but we don't know why it's calculated zero unless we know
extsize.



[PATCH v8 6/7] ieot: add Index Entry Offset Table (IEOT) extension

2018-10-10 Thread Ben Peart
From: Ben Peart 

This patch enables addressing the CPU cost of loading the index by adding
additional data to the index that will allow us to efficiently multi-
thread the loading and conversion of cache entries.

It accomplishes this by adding an (optional) index extension that is a
table of offsets to blocks of cache entries in the index file.  To make
this work for V4 indexes, when writing the cache entries, it periodically
"resets" the prefix-compression by encoding the current entry as if the
path name for the previous entry is completely different and saves the
offset of that entry in the IEOT.  Basically, with V4 indexes, it
generates offsets into blocks of prefix-compressed entries.

Signed-off-by: Ben Peart 
---
 Documentation/technical/index-format.txt |  18 +++
 read-cache.c | 196 ++-
 2 files changed, 211 insertions(+), 3 deletions(-)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index 6bc2d90f7f..7c4d67aa6a 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -337,3 +337,21 @@ The remaining data of each directory block is grouped by 
type:
 
SHA-1("TREE" +  +
"REUC" + )
+
+== Index Entry Offset Table
+
+  The Index Entry Offset Table (IEOT) is used to help address the CPU
+  cost of loading the index by enabling multi-threading the process of
+  converting cache entries from the on-disk format to the in-memory format.
+  The signature for this extension is { 'I', 'E', 'O', 'T' }.
+
+  The extension consists of:
+
+  - 32-bit version (currently 1)
+
+  - A number of index offset entries each consisting of:
+
+- 32-bit offset from the begining of the file to the first cache entry
+   in this block of entries.
+
+- 32-bit count of cache entries in this block
diff --git a/read-cache.c b/read-cache.c
index 2214b3153d..3ace29d58f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -45,6 +45,7 @@
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
 #define CACHE_EXT_FSMONITOR 0x46534D4E   /* "FSMN" */
 #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
+#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -1696,6 +1697,7 @@ static int read_index_extension(struct index_state 
*istate,
read_fsmonitor_extension(istate, data, sz);
break;
case CACHE_EXT_ENDOFINDEXENTRIES:
+   case CACHE_EXT_INDEXENTRYOFFSETTABLE:
/* already handled in do_read_index() */
break;
default:
@@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
return ondisk_size + entries * per_entry;
 }
 
+struct index_entry_offset
+{
+   /* starting byte offset into index file, count of index entries in this 
block */
+   int offset, nr;
+};
+
+struct index_entry_offset_table
+{
+   int nr;
+   struct index_entry_offset entries[FLEX_ARRAY];
+};
+
+#ifndef NO_PTHREADS
+static struct index_entry_offset_table *read_ieot_extension(const char *mmap, 
size_t mmap_size, size_t offset);
+static void write_ieot_extension(struct strbuf *sb, struct 
index_entry_offset_table *ieot);
+#endif
+
 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
 
@@ -1929,6 +1948,15 @@ static void *load_index_extensions(void *_data)
return NULL;
 }
 
+/*
+ * Mostly randomly chosen maximum thread counts: we
+ * cap the parallelism to online_cpus() threads, and we want
+ * to have at least 1 cache entries per thread for it to
+ * be worth starting a thread.
+ */
+
+#define THREAD_COST(1)
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -2521,6 +2549,9 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
int drop_cache_tree = istate->drop_cache_tree;
off_t offset;
+   int ieot_blocks = 1;
+   struct index_entry_offset_table *ieot = NULL;
+   int nr, nr_threads;
 
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -2554,10 +2585,44 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
if (ce_write(, newfd, , sizeof(hdr)) < 0)
return -1;
 
+#ifndef NO_PTHREADS
+   nr_threads = git_config_get_index_threads();
+   if (nr_threads != 1) {
+   int ieot_blocks, cpus;
+

[PATCH v8 5/7] read-cache: load cache extensions on a worker thread

2018-10-10 Thread Ben Peart
From: Ben Peart 

This patch helps address the CPU cost of loading the index by loading
the cache extensions on a worker thread in parallel with loading the cache
entries.

In some cases, loading the extensions takes longer than loading the
cache entries so this patch utilizes the new EOIE to start the thread to
load the extensions before loading all the cache entries in parallel.

This is possible because the current extensions don't access the cache
entries in the index_state structure so are OK that they don't all exist
yet.

The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED
extensions don't even get a pointer to the index so don't have access to the
cache entries.

CACHE_EXT_LINK only uses the index_state to initialize the split index.
CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last
update and dirty flags.

I used p0002-read-cache.sh to generate some performance data:

Test w/100,000 files reduced the time by 0.53%
Test w/1,000,000 files reduced the time by 27.78%

Signed-off-by: Ben Peart 
---
 read-cache.c | 95 +++-
 1 file changed, 79 insertions(+), 16 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 4781515252..2214b3153d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -23,6 +23,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "fsmonitor.h"
+#include "thread-utils.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1890,6 +1891,44 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
 
+struct load_index_extensions
+{
+#ifndef NO_PTHREADS
+   pthread_t pthread;
+#endif
+   struct index_state *istate;
+   const char *mmap;
+   size_t mmap_size;
+   unsigned long src_offset;
+};
+
+static void *load_index_extensions(void *_data)
+{
+   struct load_index_extensions *p = _data;
+   unsigned long src_offset = p->src_offset;
+
+   while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+   /* After an array of active_nr index entries,
+* there can be arbitrary number of extended
+* sections, each of which is prefixed with
+* extension name (4-byte) and section length
+* in 4-byte network byte order.
+*/
+   uint32_t extsize = get_be32(p->mmap + src_offset + 4);
+   if (read_index_extension(p->istate,
+p->mmap + src_offset,
+p->mmap + src_offset + 8,
+extsize) < 0) {
+   munmap((void *)p->mmap, p->mmap_size);
+   die(_("index file corrupt"));
+   }
+   src_offset += 8;
+   src_offset += extsize;
+   }
+
+   return NULL;
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -1900,6 +1939,11 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
const char *mmap;
size_t mmap_size;
const struct cache_entry *previous_ce = NULL;
+   struct load_index_extensions p;
+   size_t extension_offset = 0;
+#ifndef NO_PTHREADS
+   int nr_threads;
+#endif
 
if (istate->initialized)
return istate->cache_nr;
@@ -1936,6 +1980,30 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
istate->initialized = 1;
 
+   p.istate = istate;
+   p.mmap = mmap;
+   p.mmap_size = mmap_size;
+
+#ifndef NO_PTHREADS
+   nr_threads = git_config_get_index_threads();
+   if (!nr_threads)
+   nr_threads = online_cpus();
+
+   if (nr_threads > 1) {
+   extension_offset = read_eoie_extension(mmap, mmap_size);
+   if (extension_offset) {
+   int err;
+
+   p.src_offset = extension_offset;
+   err = pthread_create(, NULL, 
load_index_extensions, );
+   if (err)
+   die(_("unable to create load_index_extensions 
thread: %s"), strerror(err));
+
+   nr_threads--;
+   }
+   }
+#endif
+
if (istate->version == 4) {
mem_pool_init(>ce_mem_pool,
  
estimate_cache_size_from_compressed(istate->cache_nr));
@@ -1960,22 +2028,17 @@ int do_read_index(struct index_state *istate, const 

[PATCH v8 7/7] read-cache: load cache entries on worker threads

2018-10-10 Thread Ben Peart
From: Ben Peart 

This patch helps address the CPU cost of loading the index by utilizing
the Index Entry Offset Table (IEOT) to divide loading and conversion of
the cache entries across multiple threads in parallel.

I used p0002-read-cache.sh to generate some performance data:

Test w/100,000 files reduced the time by 32.24%
Test w/1,000,000 files reduced the time by -4.77%

Note that on the 1,000,000 files case, multi-threading the cache entry parsing
does not yield a performance win.  This is because the cost to parse the
index extensions in this repo, far outweigh the cost of loading the cache
entries.

The high cost of parsing the index extensions is driven by the cache tree
and the untracked cache extensions. As this is currently the longest pole,
any reduction in this time will reduce the overall index load times so is
worth further investigation in another patch series.

Signed-off-by: Ben Peart 
---
 read-cache.c | 230 ++-
 1 file changed, 193 insertions(+), 37 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 3ace29d58f..7acc2c86f4 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1720,7 +1720,8 @@ int read_index(struct index_state *istate)
return read_index_from(istate, get_index_file(), get_git_dir());
 }
 
-static struct cache_entry *create_from_disk(struct index_state *istate,
+static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
+   unsigned int version,
struct ondisk_cache_entry *ondisk,
unsigned long *ent_size,
const struct cache_entry 
*previous_ce)
@@ -1737,7 +1738,7 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
 * number of bytes to be stripped from the end of the previous name,
 * and the bytes to append to the result, to come up with its name.
 */
-   int expand_name_field = istate->version == 4;
+   int expand_name_field = version == 4;
 
/* On-disk flags are just 16 bits */
flags = get_be16(>flags);
@@ -1761,16 +1762,17 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
const unsigned char *cp = (const unsigned char *)name;
size_t strip_len, previous_len;
 
-   previous_len = previous_ce ? previous_ce->ce_namelen : 0;
+   /* If we're at the begining of a block, ignore the previous 
name */
strip_len = decode_varint();
-   if (previous_len < strip_len) {
-   if (previous_ce)
+   if (previous_ce) {
+   previous_len = previous_ce->ce_namelen;
+   if (previous_len < strip_len)
die(_("malformed name field in the index, near 
path '%s'"),
-   previous_ce->name);
-   else
-   die(_("malformed name field in the index in the 
first path"));
+   previous_ce->name);
+   copy_len = previous_len - strip_len;
+   } else {
+   copy_len = 0;
}
-   copy_len = previous_len - strip_len;
name = (const char *)cp;
}
 
@@ -1780,7 +1782,7 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
len += copy_len;
}
 
-   ce = mem_pool__ce_alloc(istate->ce_mem_pool, len);
+   ce = mem_pool__ce_alloc(ce_mem_pool, len);
 
ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec);
ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec);
@@ -1948,6 +1950,52 @@ static void *load_index_extensions(void *_data)
return NULL;
 }
 
+/*
+ * A helper function that will load the specified range of cache entries
+ * from the memory mapped file and add them to the given index.
+ */
+static unsigned long load_cache_entry_block(struct index_state *istate,
+   struct mem_pool *ce_mem_pool, int offset, int nr, const 
char *mmap,
+   unsigned long start_offset, const struct cache_entry 
*previous_ce)
+{
+   int i;
+   unsigned long src_offset = start_offset;
+
+   for (i = offset; i < offset + nr; i++) {
+   struct ondisk_cache_entry *disk_ce;
+   struct cache_entry *ce;
+   unsigned long consumed;
+
+   disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
+   ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, 
, previous_ce);
+   set_index_entry(istate, i, ce);
+
+   src_offset += consumed;
+   previous_ce = ce;
+   }
+   return src_offset - start_offset;
+}

[PATCH v8 4/7] config: add new index.threads config setting

2018-10-10 Thread Ben Peart
From: Ben Peart 

Add support for a new index.threads config setting which will be used to
control the threading code in do_read_index().  A value of 0 will tell the
index code to automatically determine the correct number of threads to use.
A value of 1 will make the code single threaded.  A value greater than 1
will set the maximum number of threads to use.

For testing purposes, this setting can be overwritten by setting the
GIT_TEST_INDEX_THREADS= environment variable to a value greater than 0.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt |  7 +++
 config.c | 18 ++
 config.h |  1 +
 t/README |  5 +
 t/t1700-split-index.sh   |  5 +
 5 files changed, 36 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ad0f4510c3..8fd973b76b 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2413,6 +2413,13 @@ imap::
The configuration variables in the 'imap' section are described
in linkgit:git-imap-send[1].
 
+index.threads::
+   Specifies the number of threads to spawn when loading the index.
+   This is meant to reduce index load time on multiprocessor machines.
+   Specifying 0 or 'true' will cause Git to auto-detect the number of
+   CPU's and set the number of threads accordingly. Specifying 1 or
+   'false' will disable multithreading. Defaults to 'true'.
+
 index.version::
Specify the version with which new index files should be
initialized.  This does not affect existing repositories.
diff --git a/config.c b/config.c
index 3461993f0a..2ee29f6f86 100644
--- a/config.c
+++ b/config.c
@@ -2289,6 +2289,24 @@ int git_config_get_fsmonitor(void)
return 0;
 }
 
+int git_config_get_index_threads(void)
+{
+   int is_bool, val = 0;
+
+   val = git_env_ulong("GIT_TEST_INDEX_THREADS", 0);
+   if (val)
+   return val;
+
+   if (!git_config_get_bool_or_int("index.threads", _bool, )) {
+   if (is_bool)
+   return val ? 0 : 1;
+   else
+   return val;
+   }
+
+   return 0; /* auto */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
diff --git a/config.h b/config.h
index ab46e0165d..a06027e69b 100644
--- a/config.h
+++ b/config.h
@@ -250,6 +250,7 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 extern int git_config_get_fsmonitor(void);
+extern int git_config_get_index_threads(void);
 
 /* This dies if the configured or default date is in the future */
 extern int git_config_get_expiry(const char *key, const char **output);
diff --git a/t/README b/t/README
index 3ea6c85460..8f5c0620ea 100644
--- a/t/README
+++ b/t/README
@@ -327,6 +327,11 @@ GIT_TEST_COMMIT_GRAPH=, when true, forces the 
commit-graph to
 be written after every 'git commit' command, and overrides the
 'core.commitGraph' setting to true.
 
+GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading
+of the index for the whole test suite by bypassing the default number of
+cache entries and thread minimums. Setting this to 1 will make the
+index loading single threaded.
+
 Naming Tests
 
 
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 8e17f8e7a0..ef9349bd70 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -6,7 +6,12 @@ test_description='split index mode tests'
 
 # We need total control of index splitting here
 sane_unset GIT_TEST_SPLIT_INDEX
+
+# Testing a hard coded SHA against an index with an extension
+# that can vary from run to run is problematic so we disable
+# those extensions.
 sane_unset GIT_FSMONITOR_TEST
+sane_unset GIT_TEST_INDEX_THREADS
 
 test_expect_success 'enable split index' '
git config splitIndex.maxPercentChange 100 &&
-- 
2.18.0.windows.1



[PATCH v8 2/7] read-cache: clean up casting and byte decoding

2018-10-10 Thread Ben Peart
From: Ben Peart 

This patch does a clean up pass to minimize the casting required to work
with the memory mapped index (mmap).

It also makes the decoding of network byte order more consistent by using
get_be32() where possible.

Signed-off-by: Ben Peart 
---
 read-cache.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 583a4fb1f8..6ba99e2c96 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1650,7 +1650,7 @@ int verify_index_checksum;
 /* Allow fsck to force verification of the cache entry order. */
 int verify_ce_order;
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 {
git_hash_ctx c;
unsigned char hash[GIT_MAX_RAWSZ];
@@ -1674,7 +1674,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned 
long size)
 }
 
 static int read_index_extension(struct index_state *istate,
-   const char *ext, void *data, unsigned long sz)
+   const char *ext, const char *data, unsigned 
long sz)
 {
switch (CACHE_EXT(ext)) {
case CACHE_EXT_TREE:
@@ -1889,8 +1889,8 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
int fd, i;
struct stat st;
unsigned long src_offset;
-   struct cache_header *hdr;
-   void *mmap;
+   const struct cache_header *hdr;
+   const char *mmap;
size_t mmap_size;
const struct cache_entry *previous_ce = NULL;
 
@@ -1918,7 +1918,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
die_errno("unable to map index file");
close(fd);
 
-   hdr = mmap;
+   hdr = (const struct cache_header *)mmap;
if (verify_hdr(hdr, mmap_size) < 0)
goto unmap;
 
@@ -1943,7 +1943,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
struct cache_entry *ce;
unsigned long consumed;
 
-   disk_ce = (struct ondisk_cache_entry *)((char *)mmap + 
src_offset);
+   disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
ce = create_from_disk(istate, disk_ce, , previous_ce);
set_index_entry(istate, i, ce);
 
@@ -1961,21 +1961,20 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
 * in 4-byte network byte order.
 */
uint32_t extsize;
-   memcpy(, (char *)mmap + src_offset + 4, 4);
-   extsize = ntohl(extsize);
+   extsize = get_be32(mmap + src_offset + 4);
if (read_index_extension(istate,
-(const char *) mmap + src_offset,
-(char *) mmap + src_offset + 8,
+mmap + src_offset,
+mmap + src_offset + 8,
 extsize) < 0)
goto unmap;
src_offset += 8;
src_offset += extsize;
}
-   munmap(mmap, mmap_size);
+   munmap((void *)mmap, mmap_size);
return istate->cache_nr;
 
 unmap:
-   munmap(mmap, mmap_size);
+   munmap((void *)mmap, mmap_size);
die("index file corrupt");
 }
 
-- 
2.18.0.windows.1



[PATCH v8 1/7] read-cache.c: optimize reading index format v4

2018-10-10 Thread Ben Peart
From: Nguyễn Thái Ngọc Duy 

Index format v4 requires some more computation to assemble a path
based on a previous one. The current code is not very efficient
because

 - it doubles memory copy, we assemble the final path in a temporary
   first before putting it back to a cache_entry

 - strbuf_remove() in expand_name_field() is not exactly a good fit
   for stripping a part at the end, _setlen() would do the same job
   and is much cheaper.

 - the open-coded loop to find the end of the string in
   expand_name_field() can't beat an optimized strlen()

This patch avoids the temporary buffer and writes directly to the new
cache_entry, which addresses the first two points. The last point
could also be avoided if the total string length fits in the first 12
bits of ce_flags, if not we fall back to strlen().

Running "test-tool read-cache 100" on webkit.git (275k files), reading
v2 only takes 4.226 seconds, while v4 takes 5.711 seconds, 35% more
time. The patch reduces read time on v4 to 4.319 seconds.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 read-cache.c | 128 ---
 1 file changed, 60 insertions(+), 68 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 8d04d78a58..583a4fb1f8 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1713,63 +1713,24 @@ int read_index(struct index_state *istate)
return read_index_from(istate, get_index_file(), get_git_dir());
 }
 
-static struct cache_entry *cache_entry_from_ondisk(struct mem_pool *mem_pool,
-  struct ondisk_cache_entry 
*ondisk,
-  unsigned int flags,
-  const char *name,
-  size_t len)
-{
-   struct cache_entry *ce = mem_pool__ce_alloc(mem_pool, len);
-
-   ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec);
-   ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec);
-   ce->ce_stat_data.sd_ctime.nsec = get_be32(>ctime.nsec);
-   ce->ce_stat_data.sd_mtime.nsec = get_be32(>mtime.nsec);
-   ce->ce_stat_data.sd_dev   = get_be32(>dev);
-   ce->ce_stat_data.sd_ino   = get_be32(>ino);
-   ce->ce_mode  = get_be32(>mode);
-   ce->ce_stat_data.sd_uid   = get_be32(>uid);
-   ce->ce_stat_data.sd_gid   = get_be32(>gid);
-   ce->ce_stat_data.sd_size  = get_be32(>size);
-   ce->ce_flags = flags & ~CE_NAMEMASK;
-   ce->ce_namelen = len;
-   ce->index = 0;
-   hashcpy(ce->oid.hash, ondisk->sha1);
-   memcpy(ce->name, name, len);
-   ce->name[len] = '\0';
-   return ce;
-}
-
-/*
- * Adjacent cache entries tend to share the leading paths, so it makes
- * sense to only store the differences in later entries.  In the v4
- * on-disk format of the index, each on-disk cache entry stores the
- * number of bytes to be stripped from the end of the previous name,
- * and the bytes to append to the result, to come up with its name.
- */
-static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
-{
-   const unsigned char *ep, *cp = (const unsigned char *)cp_;
-   size_t len = decode_varint();
-
-   if (name->len < len)
-   die("malformed name field in the index");
-   strbuf_remove(name, name->len - len, len);
-   for (ep = cp; *ep; ep++)
-   ; /* find the end */
-   strbuf_add(name, cp, ep - cp);
-   return (const char *)ep + 1 - cp_;
-}
-
-static struct cache_entry *create_from_disk(struct mem_pool *mem_pool,
+static struct cache_entry *create_from_disk(struct index_state *istate,
struct ondisk_cache_entry *ondisk,
unsigned long *ent_size,
-   struct strbuf *previous_name)
+   const struct cache_entry 
*previous_ce)
 {
struct cache_entry *ce;
size_t len;
const char *name;
unsigned int flags;
+   size_t copy_len;
+   /*
+* Adjacent cache entries tend to share the leading paths, so it makes
+* sense to only store the differences in later entries.  In the v4
+* on-disk format of the index, each on-disk cache entry stores the
+* number of bytes to be stripped from the end of the previous name,
+* and the bytes to append to the result, to come up with its name.
+*/
+   int expand_name_field = istate->version == 4;
 
/* On-disk flags are just 16 bits */
flags = get_be16(>flags);
@@ -1789,21 +1750,54 @@ static struct cache_entry *create_from_disk(struct 
mem_pool *mem_pool,
else
name = ondisk->name;
 
-   if (!previous_name) {
-   /* v3 and earlier */
-   if (len == CE_NAMEMASK)
-   len = strlen(name);
-   ce = cache_entry_from_ondisk(mem_pool, 

[PATCH v8 3/7] eoie: add End of Index Entry (EOIE) extension

2018-10-10 Thread Ben Peart
From: Ben Peart 

The End of Index Entry (EOIE) is used to locate the end of the variable
length index entries and the beginning of the extensions. Code can take
advantage of this to quickly locate the index extensions without having
to parse through all of the index entries.

The EOIE extension is always written out to the index file including to
the shared index when using the split index feature. Because it is always
written out, the SHA checksums in t/t1700-split-index.sh were updated
to reflect its inclusion.

It is written as an optional extension to ensure compatibility with other
git implementations that do not yet support it.  It is always written out
to ensure it is available as often as possible to speed up index operations.

Because it must be able to be loaded before the variable length cache
entries and other index extensions, this extension must be written last.
The signature for this extension is { 'E', 'O', 'I', 'E' }.

The extension consists of:

- 32-bit offset to the end of the index entries

- 160-bit SHA-1 over the extension types and their sizes (but not
their contents).  E.g. if we have "TREE" extension that is N-bytes
long, "REUC" extension that is M-bytes long, followed by "EOIE",
then the hash would be:

SHA-1("TREE" +  +
"REUC" + )

Signed-off-by: Ben Peart 
---
 Documentation/technical/index-format.txt |  23 
 read-cache.c | 158 +--
 t/t1700-split-index.sh   |   8 +-
 3 files changed, 177 insertions(+), 12 deletions(-)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index db3572626b..6bc2d90f7f 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -314,3 +314,26 @@ The remaining data of each directory block is grouped by 
type:
 
   - An ewah bitmap, the n-th bit indicates whether the n-th index entry
 is not CE_FSMONITOR_VALID.
+
+== End of Index Entry
+
+  The End of Index Entry (EOIE) is used to locate the end of the variable
+  length index entries and the begining of the extensions. Code can take
+  advantage of this to quickly locate the index extensions without having
+  to parse through all of the index entries.
+
+  Because it must be able to be loaded before the variable length cache
+  entries and other index extensions, this extension must be written last.
+  The signature for this extension is { 'E', 'O', 'I', 'E' }.
+
+  The extension consists of:
+
+  - 32-bit offset to the end of the index entries
+
+  - 160-bit SHA-1 over the extension types and their sizes (but not
+   their contents).  E.g. if we have "TREE" extension that is N-bytes
+   long, "REUC" extension that is M-bytes long, followed by "EOIE",
+   then the hash would be:
+
+   SHA-1("TREE" +  +
+   "REUC" + )
diff --git a/read-cache.c b/read-cache.c
index 6ba99e2c96..4781515252 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -43,6 +43,7 @@
 #define CACHE_EXT_LINK 0x6c696e6b/* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
 #define CACHE_EXT_FSMONITOR 0x46534D4E   /* "FSMN" */
+#define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -1693,6 +1694,9 @@ static int read_index_extension(struct index_state 
*istate,
case CACHE_EXT_FSMONITOR:
read_fsmonitor_extension(istate, data, sz);
break;
+   case CACHE_EXT_ENDOFINDEXENTRIES:
+   /* already handled in do_read_index() */
+   break;
default:
if (*ext < 'A' || 'Z' < *ext)
return error("index uses %.4s extension, which we do 
not understand",
@@ -1883,6 +1887,9 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
return ondisk_size + entries * per_entry;
 }
 
+static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
+static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -2190,11 +2197,15 @@ static int ce_write(git_hash_ctx *context, int fd, void 
*data, unsigned int len)
return 0;
 }
 
-static int write_index_ext_header(git_hash_ctx *context, int fd,
- unsigned int ext, unsigned int sz)
+static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx 
*eoie_context,
+ int fd, unsigned int ext, unsigned int sz)
 {
ext = htonl(ext);
sz = htonl(sz);
+   if (eoie_context) {
+

[PATCH v8 0/7] speed up index load through parallelization

2018-10-10 Thread Ben Peart
From: Ben Peart 

Fixed issues identified in review the most impactful probably being plugging
some leaks and improved error handling.  Also added better error messages
and some code cleanup to code I'd touched.

The biggest change in the interdiff is the impact of renaming ieot_offset to
ieot_start and ieot_work to ieot_blocks in hopes of making it easier to read
and understand the code.

Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/6caa0bac46
Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v8 
&& git checkout 6caa0bac46


### Interdiff (v7..v8):

diff --git a/read-cache.c b/read-cache.c
index 14402a0738..7acc2c86f4 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1901,7 +1901,7 @@ struct index_entry_offset
 struct index_entry_offset_table
 {
int nr;
-   struct index_entry_offset entries[0];
+   struct index_entry_offset entries[FLEX_ARRAY];
 };
 
 #ifndef NO_PTHREADS
@@ -1935,9 +1935,7 @@ static void *load_index_extensions(void *_data)
 * extension name (4-byte) and section length
 * in 4-byte network byte order.
 */
-   uint32_t extsize;
-   memcpy(, p->mmap + src_offset + 4, 4);
-   extsize = ntohl(extsize);
+   uint32_t extsize = get_be32(p->mmap + src_offset + 4);
if (read_index_extension(p->istate,
 p->mmap + src_offset,
 p->mmap + src_offset + 8,
@@ -2015,8 +2013,8 @@ struct load_cache_entries_thread_data
int offset;
const char *mmap;
struct index_entry_offset_table *ieot;
-   int ieot_offset;/* starting index into the ieot array */
-   int ieot_work;  /* count of ieot entries to process */
+   int ieot_start; /* starting index into the ieot array */
+   int ieot_blocks;/* count of ieot entries to process */
unsigned long consumed; /* return # of bytes in index file processed */
 };
 
@@ -2030,8 +2028,9 @@ static void *load_cache_entries_thread(void *_data)
int i;
 
/* iterate across all ieot blocks assigned to this thread */
-   for (i = p->ieot_offset; i < p->ieot_offset + p->ieot_work; i++) {
-   p->consumed += load_cache_entry_block(p->istate, 
p->ce_mem_pool, p->offset, p->ieot->entries[i].nr, p->mmap, 
p->ieot->entries[i].offset, NULL);
+   for (i = p->ieot_start; i < p->ieot_start + p->ieot_blocks; i++) {
+   p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool,
+   p->offset, p->ieot->entries[i].nr, p->mmap, 
p->ieot->entries[i].offset, NULL);
p->offset += p->ieot->entries[i].nr;
}
return NULL;
@@ -2040,48 +2039,45 @@ static void *load_cache_entries_thread(void *_data)
 static unsigned long load_cache_entries_threaded(struct index_state *istate, 
const char *mmap, size_t mmap_size,
unsigned long src_offset, int nr_threads, struct 
index_entry_offset_table *ieot)
 {
-   int i, offset, ieot_work, ieot_offset, err;
+   int i, offset, ieot_blocks, ieot_start, err;
struct load_cache_entries_thread_data *data;
unsigned long consumed = 0;
-   int nr;
 
/* a little sanity checking */
if (istate->name_hash_initialized)
BUG("the name hash isn't thread safe");
 
mem_pool_init(>ce_mem_pool, 0);
-   data = xcalloc(nr_threads, sizeof(struct 
load_cache_entries_thread_data));
 
/* ensure we have no more threads than we have blocks to process */
if (nr_threads > ieot->nr)
nr_threads = ieot->nr;
-   data = xcalloc(nr_threads, sizeof(struct 
load_cache_entries_thread_data));
+   data = xcalloc(nr_threads, sizeof(*data));
 
-   offset = ieot_offset = 0;
-   ieot_work = DIV_ROUND_UP(ieot->nr, nr_threads);
+   offset = ieot_start = 0;
+   ieot_blocks = DIV_ROUND_UP(ieot->nr, nr_threads);
for (i = 0; i < nr_threads; i++) {
struct load_cache_entries_thread_data *p = [i];
-   int j;
+   int nr, j;
 
-   if (ieot_offset + ieot_work > ieot->nr)
-   ieot_work = ieot->nr - ieot_offset;
+   if (ieot_start + ieot_blocks > ieot->nr)
+   ieot_blocks = ieot->nr - ieot_start;
 
p->istate = istate;
p->offset = offset;
p->mmap = mmap;
p->ieot = ieot;
-   p->ieot_offset = ieot_offset;
-   p->ieot_work = ieot_work;
+   p->ieot_start = ieot_start;
+   p->ieot_blocks = ieot_blocks;
 
/* create a mem_pool for each thread

Re: [PATCH] unpack-trees: allow missing CE_SKIP_WORKTREE objs

2018-10-09 Thread Ben Peart




On 10/9/2018 5:30 AM, Junio C Hamano wrote:

Jonathan Tan  writes:


@@ -1635,6 +1635,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, 
struct unpack_trees_options
o->result.cache_tree = cache_tree();
if (!cache_tree_fully_valid(o->result.cache_tree))
cache_tree_update(>result,
+ 
WRITE_TREE_SKIP_WORKTREE_MISSING_OK |
  WRITE_TREE_SILENT |
  WRITE_TREE_REPAIR);
}


H.  Should this be passing the bit unconditionally?  Shouldn't
it be set only when we are doing lazy clone?  A non-lazy clone that
merely uses sparse checkout has nowhere else to turn to if it loses
a blob object that currently is not necessary to complete a checkout,
unlike a repository with promisor.



I agree.  I believe this logic should only be triggered when running in 
a partial clone repo. Otherwise, we're potentially changing the behavior 
of 'normal' repos unnecessarily.




Re: [PATCH] unpack-trees: allow missing CE_SKIP_WORKTREE objs

2018-10-09 Thread Ben Peart




On 10/8/2018 5:48 PM, Jonathan Tan wrote:

Whenever a sparse checkout occurs, the existence of all blobs in the
index is verified, whether or not they are included or excluded by the
.git/info/sparse-checkout specification. This degrades performance,
significantly in the case of a partial clone, because a lazy fetch
occurs whenever the existence of a missing blob is checked.

At the point of invoking cache_tree_update() in unpack_trees(),
CE_SKIP_WORKTREE is already set on all excluded blobs
(mark_new_skip_worktree() is called twice to set CE_NEW_SKIP_WORKTREE,
then apply_sparse_checkout() is called which copies over
CE_NEW_SKIP_WORKTREE to CE_SKIP_WORKTREE). cache_tree_update() can use
this information to know which blobs are excluded, and thus skip the
checking of these.

Because cache_tree_update() is used from multiple places, this new
behavior is guarded by a new flag WRITE_TREE_SKIP_WORKTREE_MISSING_OK.
Implement this new flag, and teach unpack_trees() to invoke
cache_tree_update() with this new flag.



I wonder if preventing the download of all missing blobs should be 
limited to only the checkout command.  When you looked at the other 
places that call cache_tree_update(), does it make sense that they 
trigger the download of all the missing blobs?  For example, I suspect 
you would not want them all downloaded just to do a merge-recursive.


In full disclosure, we implemented this a couple of years ago [1] for 
GVFS and opted to do the logic a little differently.  We found that we 
didn't want to trigger the download of all missing blobs in 
cache_tree_update() for _any_ command that was executing in a partially 
cloned repo.  This is safe because if any individual blob is actually 
needed, it will get downloaded "on demand" already.


[1] https://github.com/Microsoft/git/commit/ec865500d98



Signed-off-by: Jonathan Tan 
---
  cache-tree.c |  6 +-
  cache-tree.h |  4 
  t/t1090-sparse-checkout-scope.sh | 33 
  unpack-trees.c   |  1 +
  4 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index 5ce51468f0..340caf2d34 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -246,6 +246,8 @@ static int update_one(struct cache_tree *it,
int missing_ok = flags & WRITE_TREE_MISSING_OK;
int dryrun = flags & WRITE_TREE_DRY_RUN;
int repair = flags & WRITE_TREE_REPAIR;
+   int skip_worktree_missing_ok =
+   flags & WRITE_TREE_SKIP_WORKTREE_MISSING_OK;
int to_invalidate = 0;
int i;
  
@@ -356,7 +358,9 @@ static int update_one(struct cache_tree *it,

}
  
  		if (is_null_oid(oid) ||

-   (mode != S_IFGITLINK && !missing_ok && 
!has_object_file(oid))) {
+   (mode != S_IFGITLINK && !missing_ok &&
+!(skip_worktree_missing_ok && ce_skip_worktree(ce)) &&
+!has_object_file(oid))) {
strbuf_release();
if (expected_missing)
return -1;
diff --git a/cache-tree.h b/cache-tree.h
index 0ab6784ffe..655d487619 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -40,6 +40,10 @@ void cache_tree_verify(struct index_state *);
  #define WRITE_TREE_DRY_RUN 4
  #define WRITE_TREE_SILENT 8
  #define WRITE_TREE_REPAIR 16
+/*
+ * Do not check for the presence of cache entry objects with CE_SKIP_WORKTREE.
+ */
+#define WRITE_TREE_SKIP_WORKTREE_MISSING_OK 32
  
  /* error return codes */

  #define WRITE_TREE_UNREADABLE_INDEX (-1)
diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh
index 25d7c700f6..090b7fc3d3 100755
--- a/t/t1090-sparse-checkout-scope.sh
+++ b/t/t1090-sparse-checkout-scope.sh
@@ -63,4 +63,37 @@ test_expect_success 'return to full checkout of master' '
test "$(cat b)" = "modified"
  '
  
+test_expect_success 'in partial clone, sparse checkout only fetches needed blobs' '

+   test_create_repo server &&
+   git clone "file://$(pwd)/server" client &&
+
+   test_config -C server uploadpack.allowfilter 1 &&
+   test_config -C server uploadpack.allowanysha1inwant 1 &&
+   echo a >server/a &&
+   echo bb >server/b &&
+   mkdir server/c &&
+   echo ccc >server/c/c &&
+   git -C server add a b c/c &&
+   git -C server commit -m message &&
+
+   test_config -C client core.sparsecheckout 1 &&
+   test_config -C client extensions.partialclone origin &&
+   echo "!/*" >client/.git/info/sparse-checkout &&
+   echo "/a" >>client/.git/info/sparse-checkout &&
+   git -C client fetch --filter=blob:none origin &&
+   git -C client checkout FETCH_HEAD &&
+
+   git -C client rev-list HEAD \
+   --quiet --objects --missing=print >unsorted_actual &&
+   (
+   printf "?" &&
+   git hash-object server/b &&
+   printf "?" &&
+   git 

Re: t3404.6 breaks on master under GIT_FSMONITOR_TEST

2018-10-08 Thread Ben Peart




On 10/8/2018 10:19 AM, Ævar Arnfjörð Bjarmason wrote:


On Thu, Sep 06 2018, Ævar Arnfjörð Bjarmason wrote:


On Thu, Feb 01 2018, Ævar Arnfjörð Bjarmason wrote:


The GIT_FSMONITOR_TEST variable allows you to roundtrip the fsmonitor
codpath in the whole test suite. On both Debian & CentOS this breaks for
me:

 (cd t && GIT_FSMONITOR_TEST=$PWD/t7519/fsmonitor-all 
./t3404-rebase-interactive.sh -i)

Whereas this works:

 (cd t && GIT_FSMONITOR_TEST=$PWD/t7519/fsmonitor-all 
GIT_SKIP_TESTS=t3404.6 ./t3404-rebase-interactive.sh -i)

The entirety of the rest of the test suite still passes with
GIT_FSMONITOR_TEST.

This has been failing ever since GIT_FSMONITOR_TEST was introduced in
883e248b8a ("fsmonitor: teach git to optionally utilize a file system
monitor to speed up detecting new or changed files.", 2017-09-22). Under
-v -x -i:

 + echo test_must_fail: command succeeded: env 
FAKE_LINES=exec_echo_foo_>file1 1 git rebase -i HEAD^
 test_must_fail: command succeeded: env FAKE_LINES=exec_echo_foo_>file1 1 
git rebase -i HEAD^
 + return 1
 error: last command exited with $?=1
 not ok 6 - rebase -i with the exec command checks tree cleanness
 #
 #   git checkout master &&
 #   set_fake_editor &&
 #   test_must_fail env FAKE_LINES="exec_echo_foo_>file1 1" git rebase 
-i HEAD^ &&

Maybe once this is fixed running the test suite under GIT_FSMONITOR_TEST
would be a useful Travis target, but I don't know the current status of
adding new options to Travis.


*Poke* at this again. Ben, or anyone else with knowledge of fsmonitor:
Can you reproduce this?

This failure along with the one I noted in
https://public-inbox.org/git/87tvn2remn@evledraar.gmail.com/ is
failing the tests on Linux when run with GIT_FSMONITOR_TEST.

I'm looking at this again because SZEDER's patches to the split index
reminded me again that we have these long-standing failures in rare test
modes (see
https://public-inbox.org/git/87va7ireuu@evledraar.gmail.com/ for the
split index discussion).


For what it's worth this is still broken, but more importantly (I'm not
just keeping bumping the same thing) the only thing that's now broken
under fsmonitor. I.e. my skip config is now GIT_SKIP_TESTS="t3404.7"
whereas before 43f1180814 ("git-mv: allow submodules and fsmonitor to
work together", 2018-09-10) I needed to add "t7411.3 t7411.4" to that.



I glanced at this for a few minutes but it wasn't obvious what was 
happening.  It will take some additional effort to dig into and figure 
out the underlying issue.  I haven't forgotten about this - it's still 
on my list, just below some other things I need to get finished up first.


Re: [PATCH v7 7/7] read-cache: load cache entries on worker threads

2018-10-02 Thread Ben Peart




On 10/1/2018 1:09 PM, Duy Nguyen wrote:

On Mon, Oct 1, 2018 at 3:46 PM Ben Peart  wrote:

+/*
+ * A helper function that will load the specified range of cache entries
+ * from the memory mapped file and add them to the given index.
+ */
+static unsigned long load_cache_entry_block(struct index_state *istate,
+   struct mem_pool *ce_mem_pool, int offset, int nr, const 
char *mmap,


Please use unsigned long for offset (here and in the thread_data
struct). We should use off_t instead, but that's out of scope. At
least keep offset type consistent in here.



Unfortunately, this code is littered with different types for size and 
offset.  "int" is the most common but there are also off_t, size_t and 
some unsigned long as well.  Currently all of them are at least 32 bits 
so until we need to have an index larger than 32 bits, we should be OK. 
I agree, fixing them all is outside the scope of this patch.



+   unsigned long start_offset, const struct cache_entry 
*previous_ce)


I don't think you want to pass previous_ce in. You always pass NULL
anyway. And if this function is about loading a block (i.e. at block
boundary) then initial previous_ce _must_ be NULL or things break
horribly.



The function as written can load any arbitrary subset of cache entries 
as long as previous_ce is set correctly.  I currently only use it on 
block boundaries but I don't see any good reason to limit its 
capabilities by moving what code passes the NULL in one function deeper.



@@ -1959,20 +2007,125 @@ static void *load_index_extensions(void *_data)

  #define THREAD_COST(1)

+struct load_cache_entries_thread_data
+{
+   pthread_t pthread;
+   struct index_state *istate;
+   struct mem_pool *ce_mem_pool;
+   int offset;
+   const char *mmap;
+   struct index_entry_offset_table *ieot;
+   int ieot_offset;/* starting index into the ieot array */


If it's an index, maybe just name it ieot_index and we can get rid of
the comment.


+   int ieot_work;  /* count of ieot entries to process */


Maybe instead of saving the whole "ieot" table here. Add

  struct index_entry_offset *blocks;

which points to the starting block for this thread and rename that
mysterious (to me) ieot_work to nr_blocks. The thread will have access
from blocks[0] to blocks[nr_blocks - 1]



Meh. Either way you have to figure out there are a block of entries and 
each thread is going to process some subset of those entries.  You can 
do the base + offset math here or down in the calling function but it 
has to happen (and be understood) either way.


I'll rename ieot_offset to ieot_start and ieot_work to ieot_blocks which 
should hopefully help make it more obvious what they do.



+   unsigned long consumed; /* return # of bytes in index file processed */
+};
+
+/*
+ * A thread proc to run the load_cache_entries() computation
+ * across multiple background threads.
+ */
+static void *load_cache_entries_thread(void *_data)
+{
+   struct load_cache_entries_thread_data *p = _data;
+   int i;
+
+   /* iterate across all ieot blocks assigned to this thread */
+   for (i = p->ieot_offset; i < p->ieot_offset + p->ieot_work; i++) {
+   p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, 
p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL);


Please wrap this long line.


+   p->offset += p->ieot->entries[i].nr;
+   }
+   return NULL;
+}
+
+static unsigned long load_cache_entries_threaded(struct index_state *istate, 
const char *mmap, size_t mmap_size,
+   unsigned long src_offset, int nr_threads, struct 
index_entry_offset_table *ieot)
+{
+   int i, offset, ieot_work, ieot_offset, err;
+   struct load_cache_entries_thread_data *data;
+   unsigned long consumed = 0;
+   int nr;
+
+   /* a little sanity checking */
+   if (istate->name_hash_initialized)
+   BUG("the name hash isn't thread safe");
+
+   mem_pool_init(>ce_mem_pool, 0);
+   data = xcalloc(nr_threads, sizeof(struct 
load_cache_entries_thread_data));


we normally use sizeof(*data) instead of sizeof(struct ...)


+
+   /* ensure we have no more threads than we have blocks to process */
+   if (nr_threads > ieot->nr)
+   nr_threads = ieot->nr;
+   data = xcalloc(nr_threads, sizeof(struct 
load_cache_entries_thread_data));


eh.. reallocate the same "data"?



Thanks, good catch - I hate leaky code.


+
+   offset = ieot_offset = 0;
+   ieot_work = DIV_ROUND_UP(ieot->nr, nr_threads);
+   for (i = 0; i < nr_threads; i++) {
+   struct load_cache_entries_thread_data *p = [i];
+   int j;
+
+   if (ieot_offset + ieot_work > ieot->nr)
+

Re: [PATCH v7 6/7] ieot: add Index Entry Offset Table (IEOT) extension

2018-10-02 Thread Ben Peart




On 10/1/2018 12:27 PM, Duy Nguyen wrote:

On Mon, Oct 1, 2018 at 3:46 PM Ben Peart  wrote:

@@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
 return ondisk_size + entries * per_entry;
  }

+struct index_entry_offset
+{
+   /* starting byte offset into index file, count of index entries in this 
block */
+   int offset, nr;


uint32_t?


+};
+
+struct index_entry_offset_table
+{
+   int nr;
+   struct index_entry_offset entries[0];


Use FLEX_ARRAY. Some compilers are not happy with an array of zero
items if I remember correctly.



Thanks for the warning, I'll update that.


@@ -2523,6 +2551,9 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
 struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
 int drop_cache_tree = istate->drop_cache_tree;
 off_t offset;
+   int ieot_work = 1;
+   struct index_entry_offset_table *ieot = NULL;
+   int nr;


There are a bunch of stuff going on in this function, maybe rename
this to nr_threads or nr_blocks to be less generic.



I can add a nr_threads variable to make this more obvious.



 for (i = removed = extended = 0; i < entries; i++) {
 if (cache[i]->ce_flags & CE_REMOVE)
@@ -2556,7 +2587,38 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
 if (ce_write(, newfd, , sizeof(hdr)) < 0)
 return -1;

+#ifndef NO_PTHREADS
+   if ((nr = git_config_get_index_threads()) != 1) {


Maybe keep this assignment out of "if".


+   int ieot_blocks, cpus;
+
+   /*
+* ensure default number of ieot blocks maps evenly to the
+* default number of threads that will process them
+*/
+   if (!nr) {
+   ieot_blocks = istate->cache_nr / THREAD_COST;
+   cpus = online_cpus();
+   if (ieot_blocks > cpus - 1)
+   ieot_blocks = cpus - 1;


The " - 1" here is for extension thread, yes? Probably worth a comment.


+   } else {
+   ieot_blocks = nr;
+   }
+
+   /*
+* no reason to write out the IEOT extension if we don't
+* have enough blocks to utilize multi-threading
+*/
+   if (ieot_blocks > 1) {
+   ieot = xcalloc(1, sizeof(struct 
index_entry_offset_table)
+   + (ieot_blocks * sizeof(struct 
index_entry_offset)));


Use FLEX_ALLOC_MEM() after you declare ..._table with FLEX_ARRAY.



FLEX_ALLOC_MEM() is focused on variable length "char" data.  All uses of 
FLEX_ARRAY with non char data did the allocation themselves to avoid the 
unnecessary memcpy() that comes with FLEX_ALLOC_MEM.



This ieot needs to be freed also and should be before any "return -1"
in this function.



Good catch. Will do.


+   ieot->nr = 0;
+   ieot_work = DIV_ROUND_UP(entries, ieot_blocks);


Perhaps a better name for ioet_work? This looks like the number of
cache entries per block.


+   }
+   }
+#endif
+
 offset = lseek(newfd, 0, SEEK_CUR) + write_buffer_len;
+   nr = 0;


Eh.. repurpose nr to count cache entries now? It's kinda hard to follow.


 previous_name = (hdr_version == 4) ? _name_buf : NULL;

 for (i = 0; i < entries; i++) {
@@ -2578,11 +2640,31 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,

 drop_cache_tree = 1;
 }
+   if (ieot && i && (i % ieot_work == 0)) {
+   ieot->entries[ieot->nr].nr = nr;
+   ieot->entries[ieot->nr].offset = offset;
+   ieot->nr++;
+   /*
+* If we have a V4 index, set the first byte to an 
invalid
+* character to ensure there is nothing common with the 
previous
+* entry
+*/
+   if (previous_name)
+   previous_name->buf[0] = 0;
+   nr = 0;
+   offset = lseek(newfd, 0, SEEK_CUR) + write_buffer_len;


This only works correctly if the ce_write_entry() from the last
iteration has flushed everything to out to newfd. Maybe it does, but
it's error prone to rely on that in my opinion. Maybe we need an
explicit ce_write_flush() here to make sure.



This logic already takes any unflushed data into account - the offset is 
what has been flushed to disk (lseek) plus the amount still in the 
buffer (write_buffer_len) waiting to be flushed.  I don

Re: [PATCH v7 3/7] eoie: add End of Index Entry (EOIE) extension

2018-10-02 Thread Ben Peart




On 10/1/2018 11:30 AM, Duy Nguyen wrote:

On Mon, Oct 1, 2018 at 3:46 PM Ben Peart  wrote:

@@ -2479,6 +2491,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
 if (ce_write(, newfd, , sizeof(hdr)) < 0)
 return -1;

+   offset = lseek(newfd, 0, SEEK_CUR) + write_buffer_len;


Note, lseek() could in theory return -1 on error. Looking at the error
code list in the man page it's pretty unlikely though, unless



Good catch. I'll add the logic to check for an error.


+static size_t read_eoie_extension(const char *mmap, size_t mmap_size)
+{
+   /*
+* The end of index entries (EOIE) extension is guaranteed to be last
+* so that it can be found by scanning backwards from the EOF.
+*
+* "EOIE"
+* <4-byte length>
+* <4-byte offset>
+* <20-byte hash>
+*/
+   const char *index, *eoie;
+   uint32_t extsize;
+   size_t offset, src_offset;
+   unsigned char hash[GIT_MAX_RAWSZ];
+   git_hash_ctx c;
+
+   /* ensure we have an index big enough to contain an EOIE extension */
+   if (mmap_size < sizeof(struct cache_header) + EOIE_SIZE_WITH_HEADER + 
the_hash_algo->rawsz)


Using sizeof() for on-disk structures could be dangerous because you
don't know how much padding there could be (I'm not sure if it's
actually specified in the C language spec). I've checked, on at least
x86 and amd64, sizeof(struct cache_header) is 12 bytes, but I don't
know if there are any crazy architectures out there that set higher
padding.



This must be safe as the same code has been in do_read_index() and 
verify_index_from() for a long time.


Re: [PATCH v7 5/7] read-cache: load cache extensions on a worker thread

2018-10-02 Thread Ben Peart




On 10/1/2018 11:50 AM, Duy Nguyen wrote:

On Mon, Oct 1, 2018 at 3:46 PM Ben Peart  wrote:

@@ -1890,6 +1891,46 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
  static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
  static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);

+struct load_index_extensions
+{
+#ifndef NO_PTHREADS
+   pthread_t pthread;
+#endif
+   struct index_state *istate;
+   const char *mmap;
+   size_t mmap_size;
+   unsigned long src_offset;
+};
+
+static void *load_index_extensions(void *_data)
+{
+   struct load_index_extensions *p = _data;
+   unsigned long src_offset = p->src_offset;
+
+   while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+   /* After an array of active_nr index entries,
+* there can be arbitrary number of extended
+* sections, each of which is prefixed with
+* extension name (4-byte) and section length
+* in 4-byte network byte order.
+*/
+   uint32_t extsize;
+   memcpy(, p->mmap + src_offset + 4, 4);
+   extsize = ntohl(extsize);


This could be get_be32() so that the next person will not need to do
another cleanup patch.



Good point, it was existing code so I focused on doing the minimal 
change possible but I can clean it up since I'm touching it already.



+   if (read_index_extension(p->istate,
+   p->mmap + src_offset,
+   p->mmap + src_offset + 8,
+   extsize) < 0) {


This alignment is misleading because the conditions are aligned with
the code block below. If you can't align it with the '(', then just
add another tab.



Ditto. I'll make it:

uint32_t extsize = get_be32(p->mmap + src_offset + 4);
if (read_index_extension(p->istate,
 p->mmap + src_offset,
 p->mmap + src_offset + 8,
 extsize) < 0) {
munmap((void *)p->mmap, p->mmap_size);
die(_("index file corrupt"));
}



+   munmap((void *)p->mmap, p->mmap_size);


This made me pause for a bit since we should not need to cast back to
void *. It turns out you need this because mmap pointer is const. But
you don't even need to munmap here. We're dying, the OS will clean
everything up.



I had the same thought about "we're about to die so why bother calling 
munmap() here" but I decided rather than change it, I'd follow the 
existing pattern just in case there was some platform/bug that required 
it.  I apparently doesn't cause harm as it's been that way a long time.



+   die(_("index file corrupt"));
+   }
+   src_offset += 8;
+   src_offset += extsize;
+   }
+
+   return NULL;
+}


Re: [PATCH v7 3/7] eoie: add End of Index Entry (EOIE) extension

2018-10-02 Thread Ben Peart




On 10/1/2018 11:17 AM, SZEDER Gábor wrote:

On Mon, Oct 01, 2018 at 09:45:52AM -0400, Ben Peart wrote:

From: Ben Peart 

The End of Index Entry (EOIE) is used to locate the end of the variable
length index entries and the beginning of the extensions. Code can take
advantage of this to quickly locate the index extensions without having
to parse through all of the index entries.

Because it must be able to be loaded before the variable length cache
entries and other index extensions, this extension must be written last.
The signature for this extension is { 'E', 'O', 'I', 'E' }.

The extension consists of:

- 32-bit offset to the end of the index entries

- 160-bit SHA-1 over the extension types and their sizes (but not
their contents).  E.g. if we have "TREE" extension that is N-bytes
long, "REUC" extension that is M-bytes long, followed by "EOIE",
then the hash would be:

SHA-1("TREE" +  +
"REUC" + )

Signed-off-by: Ben Peart 


I think the commit message should explicitly mention that this this
extension

   - will always be written and why,
   - but is optional, so other Git implementations not supporting it will
 have no troubles reading the index,
   - and that it is written even to the shared index and why, and that
 because of this the index checksums in t1700 had to be updated.



Sure, I'll add that additional information to the commit message on the 
next spin.


[PATCH v7 6/7] ieot: add Index Entry Offset Table (IEOT) extension

2018-10-01 Thread Ben Peart
From: Ben Peart 

This patch enables addressing the CPU cost of loading the index by adding
additional data to the index that will allow us to efficiently multi-
thread the loading and conversion of cache entries.

It accomplishes this by adding an (optional) index extension that is a
table of offsets to blocks of cache entries in the index file.  To make
this work for V4 indexes, when writing the cache entries, it periodically
"resets" the prefix-compression by encoding the current entry as if the
path name for the previous entry is completely different and saves the
offset of that entry in the IEOT.  Basically, with V4 indexes, it
generates offsets into blocks of prefix-compressed entries.

Signed-off-by: Ben Peart 
---
 Documentation/technical/index-format.txt |  18 +++
 read-cache.c | 173 +++
 2 files changed, 191 insertions(+)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index 6bc2d90f7f..7c4d67aa6a 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -337,3 +337,21 @@ The remaining data of each directory block is grouped by 
type:
 
SHA-1("TREE" +  +
"REUC" + )
+
+== Index Entry Offset Table
+
+  The Index Entry Offset Table (IEOT) is used to help address the CPU
+  cost of loading the index by enabling multi-threading the process of
+  converting cache entries from the on-disk format to the in-memory format.
+  The signature for this extension is { 'I', 'E', 'O', 'T' }.
+
+  The extension consists of:
+
+  - 32-bit version (currently 1)
+
+  - A number of index offset entries each consisting of:
+
+- 32-bit offset from the begining of the file to the first cache entry
+   in this block of entries.
+
+- 32-bit count of cache entries in this block
diff --git a/read-cache.c b/read-cache.c
index 77083ab8bb..9557376e78 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -45,6 +45,7 @@
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
 #define CACHE_EXT_FSMONITOR 0x46534D4E   /* "FSMN" */
 #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
+#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -1696,6 +1697,7 @@ static int read_index_extension(struct index_state 
*istate,
read_fsmonitor_extension(istate, data, sz);
break;
case CACHE_EXT_ENDOFINDEXENTRIES:
+   case CACHE_EXT_INDEXENTRYOFFSETTABLE:
/* already handled in do_read_index() */
break;
default:
@@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
return ondisk_size + entries * per_entry;
 }
 
+struct index_entry_offset
+{
+   /* starting byte offset into index file, count of index entries in this 
block */
+   int offset, nr;
+};
+
+struct index_entry_offset_table
+{
+   int nr;
+   struct index_entry_offset entries[0];
+};
+
+#ifndef NO_PTHREADS
+static struct index_entry_offset_table *read_ieot_extension(const char *mmap, 
size_t mmap_size, size_t offset);
+static void write_ieot_extension(struct strbuf *sb, struct 
index_entry_offset_table *ieot);
+#endif
+
 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
 
@@ -1931,6 +1950,15 @@ static void *load_index_extensions(void *_data)
return NULL;
 }
 
+/*
+ * Mostly randomly chosen maximum thread counts: we
+ * cap the parallelism to online_cpus() threads, and we want
+ * to have at least 1 cache entries per thread for it to
+ * be worth starting a thread.
+ */
+
+#define THREAD_COST(1)
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -2523,6 +2551,9 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
int drop_cache_tree = istate->drop_cache_tree;
off_t offset;
+   int ieot_work = 1;
+   struct index_entry_offset_table *ieot = NULL;
+   int nr;
 
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -2556,7 +2587,38 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
if (ce_write(, newfd, , sizeof(hdr)) < 0)
return -1;
 
+#ifndef NO_PTHREADS
+   if ((nr = git_config_get_index_threads()) != 1) {
+   int ieot_blocks, cpus;
+
+   /*
+* ensure default number of ieot blocks 

[PATCH v7 5/7] read-cache: load cache extensions on a worker thread

2018-10-01 Thread Ben Peart
From: Ben Peart 

This patch helps address the CPU cost of loading the index by loading
the cache extensions on a worker thread in parallel with loading the cache
entries.

In some cases, loading the extensions takes longer than loading the
cache entries so this patch utilizes the new EOIE to start the thread to
load the extensions before loading all the cache entries in parallel.

This is possible because the current extensions don't access the cache
entries in the index_state structure so are OK that they don't all exist
yet.

The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED
extensions don't even get a pointer to the index so don't have access to the
cache entries.

CACHE_EXT_LINK only uses the index_state to initialize the split index.
CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last
update and dirty flags.

I used p0002-read-cache.sh to generate some performance data:

Test w/100,000 files reduced the time by 0.53%
Test w/1,000,000 files reduced the time by 27.78%

Signed-off-by: Ben Peart 
---
 read-cache.c | 97 +++-
 1 file changed, 81 insertions(+), 16 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index af2605a168..77083ab8bb 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -23,6 +23,7 @@
 #include "split-index.h"
 #include "utf8.h"
 #include "fsmonitor.h"
+#include "thread-utils.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 
@@ -1890,6 +1891,46 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
 
+struct load_index_extensions
+{
+#ifndef NO_PTHREADS
+   pthread_t pthread;
+#endif
+   struct index_state *istate;
+   const char *mmap;
+   size_t mmap_size;
+   unsigned long src_offset;
+};
+
+static void *load_index_extensions(void *_data)
+{
+   struct load_index_extensions *p = _data;
+   unsigned long src_offset = p->src_offset;
+
+   while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) {
+   /* After an array of active_nr index entries,
+* there can be arbitrary number of extended
+* sections, each of which is prefixed with
+* extension name (4-byte) and section length
+* in 4-byte network byte order.
+*/
+   uint32_t extsize;
+   memcpy(, p->mmap + src_offset + 4, 4);
+   extsize = ntohl(extsize);
+   if (read_index_extension(p->istate,
+   p->mmap + src_offset,
+   p->mmap + src_offset + 8,
+   extsize) < 0) {
+   munmap((void *)p->mmap, p->mmap_size);
+   die(_("index file corrupt"));
+   }
+   src_offset += 8;
+   src_offset += extsize;
+   }
+
+   return NULL;
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -1900,6 +1941,11 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
const char *mmap;
size_t mmap_size;
const struct cache_entry *previous_ce = NULL;
+   struct load_index_extensions p;
+   size_t extension_offset = 0;
+#ifndef NO_PTHREADS
+   int nr_threads;
+#endif
 
if (istate->initialized)
return istate->cache_nr;
@@ -1936,6 +1982,30 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
istate->initialized = 1;
 
+   p.istate = istate;
+   p.mmap = mmap;
+   p.mmap_size = mmap_size;
+
+#ifndef NO_PTHREADS
+   nr_threads = git_config_get_index_threads();
+   if (!nr_threads)
+   nr_threads = online_cpus();
+
+   if (nr_threads > 1) {
+   extension_offset = read_eoie_extension(mmap, mmap_size);
+   if (extension_offset) {
+   int err;
+
+   p.src_offset = extension_offset;
+   err = pthread_create(, NULL, 
load_index_extensions, );
+   if (err)
+   die(_("unable to create load_index_extensions 
thread: %s"), strerror(err));
+
+   nr_threads--;
+   }
+   }
+#endif
+
if (istate->version == 4) {
mem_pool_init(>ce_mem_pool,
  
estimate_cache_size_from_compressed(istate->cache_nr));
@@ -1960,22 +2030,17 @@ int do_read_index(struct index_st

[PATCH v7 4/7] config: add new index.threads config setting

2018-10-01 Thread Ben Peart
From: Ben Peart 

Add support for a new index.threads config setting which will be used to
control the threading code in do_read_index().  A value of 0 will tell the
index code to automatically determine the correct number of threads to use.
A value of 1 will make the code single threaded.  A value greater than 1
will set the maximum number of threads to use.

For testing purposes, this setting can be overwritten by setting the
GIT_TEST_INDEX_THREADS= environment variable to a value greater than 0.

Signed-off-by: Ben Peart 
---
 Documentation/config.txt |  7 +++
 config.c | 18 ++
 config.h |  1 +
 t/README |  5 +
 t/t1700-split-index.sh   |  5 +
 5 files changed, 36 insertions(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ad0f4510c3..8fd973b76b 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2413,6 +2413,13 @@ imap::
The configuration variables in the 'imap' section are described
in linkgit:git-imap-send[1].
 
+index.threads::
+   Specifies the number of threads to spawn when loading the index.
+   This is meant to reduce index load time on multiprocessor machines.
+   Specifying 0 or 'true' will cause Git to auto-detect the number of
+   CPU's and set the number of threads accordingly. Specifying 1 or
+   'false' will disable multithreading. Defaults to 'true'.
+
 index.version::
Specify the version with which new index files should be
initialized.  This does not affect existing repositories.
diff --git a/config.c b/config.c
index 3461993f0a..2ee29f6f86 100644
--- a/config.c
+++ b/config.c
@@ -2289,6 +2289,24 @@ int git_config_get_fsmonitor(void)
return 0;
 }
 
+int git_config_get_index_threads(void)
+{
+   int is_bool, val = 0;
+
+   val = git_env_ulong("GIT_TEST_INDEX_THREADS", 0);
+   if (val)
+   return val;
+
+   if (!git_config_get_bool_or_int("index.threads", _bool, )) {
+   if (is_bool)
+   return val ? 0 : 1;
+   else
+   return val;
+   }
+
+   return 0; /* auto */
+}
+
 NORETURN
 void git_die_config_linenr(const char *key, const char *filename, int linenr)
 {
diff --git a/config.h b/config.h
index ab46e0165d..a06027e69b 100644
--- a/config.h
+++ b/config.h
@@ -250,6 +250,7 @@ extern int git_config_get_untracked_cache(void);
 extern int git_config_get_split_index(void);
 extern int git_config_get_max_percent_split_change(void);
 extern int git_config_get_fsmonitor(void);
+extern int git_config_get_index_threads(void);
 
 /* This dies if the configured or default date is in the future */
 extern int git_config_get_expiry(const char *key, const char **output);
diff --git a/t/README b/t/README
index 3ea6c85460..8f5c0620ea 100644
--- a/t/README
+++ b/t/README
@@ -327,6 +327,11 @@ GIT_TEST_COMMIT_GRAPH=, when true, forces the 
commit-graph to
 be written after every 'git commit' command, and overrides the
 'core.commitGraph' setting to true.
 
+GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading
+of the index for the whole test suite by bypassing the default number of
+cache entries and thread minimums. Setting this to 1 will make the
+index loading single threaded.
+
 Naming Tests
 
 
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh
index 8e17f8e7a0..ef9349bd70 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -6,7 +6,12 @@ test_description='split index mode tests'
 
 # We need total control of index splitting here
 sane_unset GIT_TEST_SPLIT_INDEX
+
+# Testing a hard coded SHA against an index with an extension
+# that can vary from run to run is problematic so we disable
+# those extensions.
 sane_unset GIT_FSMONITOR_TEST
+sane_unset GIT_TEST_INDEX_THREADS
 
 test_expect_success 'enable split index' '
git config splitIndex.maxPercentChange 100 &&
-- 
2.18.0.windows.1



[PATCH v7 7/7] read-cache: load cache entries on worker threads

2018-10-01 Thread Ben Peart
From: Ben Peart 

This patch helps address the CPU cost of loading the index by utilizing
the Index Entry Offset Table (IEOT) to divide loading and conversion of
the cache entries across multiple threads in parallel.

I used p0002-read-cache.sh to generate some performance data:

Test w/100,000 files reduced the time by 32.24%
Test w/1,000,000 files reduced the time by -4.77%

Note that on the 1,000,000 files case, multi-threading the cache entry parsing
does not yield a performance win.  This is because the cost to parse the
index extensions in this repo, far outweigh the cost of loading the cache
entries.

The high cost of parsing the index extensions is driven by the cache tree
and the untracked cache extensions. As this is currently the longest pole,
any reduction in this time will reduce the overall index load times so is
worth further investigation in another patch series.

Signed-off-by: Ben Peart 
---
 read-cache.c | 224 +++
 1 file changed, 189 insertions(+), 35 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 9557376e78..14402a0738 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1720,7 +1720,8 @@ int read_index(struct index_state *istate)
return read_index_from(istate, get_index_file(), get_git_dir());
 }
 
-static struct cache_entry *create_from_disk(struct index_state *istate,
+static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
+   unsigned int version,
struct ondisk_cache_entry *ondisk,
unsigned long *ent_size,
const struct cache_entry 
*previous_ce)
@@ -1737,7 +1738,7 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
 * number of bytes to be stripped from the end of the previous name,
 * and the bytes to append to the result, to come up with its name.
 */
-   int expand_name_field = istate->version == 4;
+   int expand_name_field = version == 4;
 
/* On-disk flags are just 16 bits */
flags = get_be16(>flags);
@@ -1761,16 +1762,17 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
const unsigned char *cp = (const unsigned char *)name;
size_t strip_len, previous_len;
 
-   previous_len = previous_ce ? previous_ce->ce_namelen : 0;
+   /* If we're at the begining of a block, ignore the previous 
name */
strip_len = decode_varint();
-   if (previous_len < strip_len) {
-   if (previous_ce)
+   if (previous_ce) {
+   previous_len = previous_ce->ce_namelen;
+   if (previous_len < strip_len)
die(_("malformed name field in the index, near 
path '%s'"),
-   previous_ce->name);
-   else
-   die(_("malformed name field in the index in the 
first path"));
+   previous_ce->name);
+   copy_len = previous_len - strip_len;
+   } else {
+   copy_len = 0;
}
-   copy_len = previous_len - strip_len;
name = (const char *)cp;
}
 
@@ -1780,7 +1782,7 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
len += copy_len;
}
 
-   ce = mem_pool__ce_alloc(istate->ce_mem_pool, len);
+   ce = mem_pool__ce_alloc(ce_mem_pool, len);
 
ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec);
ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec);
@@ -1950,6 +1952,52 @@ static void *load_index_extensions(void *_data)
return NULL;
 }
 
+/*
+ * A helper function that will load the specified range of cache entries
+ * from the memory mapped file and add them to the given index.
+ */
+static unsigned long load_cache_entry_block(struct index_state *istate,
+   struct mem_pool *ce_mem_pool, int offset, int nr, const 
char *mmap,
+   unsigned long start_offset, const struct cache_entry 
*previous_ce)
+{
+   int i;
+   unsigned long src_offset = start_offset;
+
+   for (i = offset; i < offset + nr; i++) {
+   struct ondisk_cache_entry *disk_ce;
+   struct cache_entry *ce;
+   unsigned long consumed;
+
+   disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
+   ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, 
, previous_ce);
+   set_index_entry(istate, i, ce);
+
+   src_offset += consumed;
+   previous_ce = ce;
+   }
+   return src_offset - start_offset;
+}

[PATCH v7 0/7] speed up index load through parallelization

2018-10-01 Thread Ben Peart
Thanks for all the feedback.

The biggest change since the last version is how this patch series interacts
with the split-index feature.  With a split index, most of the cache entries
are stored in the shared index so would benefit from multi-threaded parsing.
To enable that, the EOIE and IEOT extensions are now written into the shared
index (rather than being stripped out like the other extensions).

Because of this, I can now update the tests in t1700-split-index.sh to have
updated SHA values that include the EOIE extension instead of disabling the
extension.

Using p0002-read-cache.sh to generate some performance numbers shows how
each of the various patches contribute to the overall performance win on a
particularly large repo.

Repo w/3M files  Baseline  Optimize V4   Extensions  Entries
--
0002.1: read_cache   693.29655.65 -5.4%  470.71 -32.1%   399.62 -42.4%

Note how this cuts nearly 300ms off the index load time!

Base Ref: master
Web-Diff: https://github.com/benpeart/git/commit/c1125a5d9a
Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v7 
&& git checkout c1125a5d9a


### Patches

Ben Peart (6):
  read-cache: clean up casting and byte decoding
  eoie: add End of Index Entry (EOIE) extension
  config: add new index.threads config setting
  read-cache: load cache extensions on a worker thread
  ieot: add Index Entry Offset Table (IEOT) extension
  read-cache: load cache entries on worker threads

Nguyễn Thái Ngọc Duy (1):
  read-cache.c: optimize reading index format v4

 Documentation/config.txt |   7 +
 Documentation/technical/index-format.txt |  41 ++
 config.c |  18 +
 config.h |   1 +
 read-cache.c | 749 +++
 t/README |   5 +
 t/t1700-split-index.sh   |  13 +-
 7 files changed, 715 insertions(+), 119 deletions(-)


base-commit: fe8321ec057f9231c26c29b364721568e58040f7
-- 
2.18.0.windows.1




[PATCH v7 2/7] read-cache: clean up casting and byte decoding

2018-10-01 Thread Ben Peart
From: Ben Peart 

This patch does a clean up pass to minimize the casting required to work
with the memory mapped index (mmap).

It also makes the decoding of network byte order more consistent by using
get_be32() where possible.

Signed-off-by: Ben Peart 
---
 read-cache.c | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 583a4fb1f8..6ba99e2c96 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1650,7 +1650,7 @@ int verify_index_checksum;
 /* Allow fsck to force verification of the cache entry order. */
 int verify_ce_order;
 
-static int verify_hdr(struct cache_header *hdr, unsigned long size)
+static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 {
git_hash_ctx c;
unsigned char hash[GIT_MAX_RAWSZ];
@@ -1674,7 +1674,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned 
long size)
 }
 
 static int read_index_extension(struct index_state *istate,
-   const char *ext, void *data, unsigned long sz)
+   const char *ext, const char *data, unsigned 
long sz)
 {
switch (CACHE_EXT(ext)) {
case CACHE_EXT_TREE:
@@ -1889,8 +1889,8 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
int fd, i;
struct stat st;
unsigned long src_offset;
-   struct cache_header *hdr;
-   void *mmap;
+   const struct cache_header *hdr;
+   const char *mmap;
size_t mmap_size;
const struct cache_entry *previous_ce = NULL;
 
@@ -1918,7 +1918,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
die_errno("unable to map index file");
close(fd);
 
-   hdr = mmap;
+   hdr = (const struct cache_header *)mmap;
if (verify_hdr(hdr, mmap_size) < 0)
goto unmap;
 
@@ -1943,7 +1943,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
struct cache_entry *ce;
unsigned long consumed;
 
-   disk_ce = (struct ondisk_cache_entry *)((char *)mmap + 
src_offset);
+   disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
ce = create_from_disk(istate, disk_ce, , previous_ce);
set_index_entry(istate, i, ce);
 
@@ -1961,21 +1961,20 @@ int do_read_index(struct index_state *istate, const 
char *path, int must_exist)
 * in 4-byte network byte order.
 */
uint32_t extsize;
-   memcpy(, (char *)mmap + src_offset + 4, 4);
-   extsize = ntohl(extsize);
+   extsize = get_be32(mmap + src_offset + 4);
if (read_index_extension(istate,
-(const char *) mmap + src_offset,
-(char *) mmap + src_offset + 8,
+mmap + src_offset,
+mmap + src_offset + 8,
 extsize) < 0)
goto unmap;
src_offset += 8;
src_offset += extsize;
}
-   munmap(mmap, mmap_size);
+   munmap((void *)mmap, mmap_size);
return istate->cache_nr;
 
 unmap:
-   munmap(mmap, mmap_size);
+   munmap((void *)mmap, mmap_size);
die("index file corrupt");
 }
 
-- 
2.18.0.windows.1



[PATCH v7 3/7] eoie: add End of Index Entry (EOIE) extension

2018-10-01 Thread Ben Peart
From: Ben Peart 

The End of Index Entry (EOIE) is used to locate the end of the variable
length index entries and the beginning of the extensions. Code can take
advantage of this to quickly locate the index extensions without having
to parse through all of the index entries.

Because it must be able to be loaded before the variable length cache
entries and other index extensions, this extension must be written last.
The signature for this extension is { 'E', 'O', 'I', 'E' }.

The extension consists of:

- 32-bit offset to the end of the index entries

- 160-bit SHA-1 over the extension types and their sizes (but not
their contents).  E.g. if we have "TREE" extension that is N-bytes
long, "REUC" extension that is M-bytes long, followed by "EOIE",
then the hash would be:

SHA-1("TREE" +  +
"REUC" + )

Signed-off-by: Ben Peart 
---
 Documentation/technical/index-format.txt |  23 
 read-cache.c | 152 +--
 t/t1700-split-index.sh   |   8 +-
 3 files changed, 171 insertions(+), 12 deletions(-)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index db3572626b..6bc2d90f7f 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -314,3 +314,26 @@ The remaining data of each directory block is grouped by 
type:
 
   - An ewah bitmap, the n-th bit indicates whether the n-th index entry
 is not CE_FSMONITOR_VALID.
+
+== End of Index Entry
+
+  The End of Index Entry (EOIE) is used to locate the end of the variable
+  length index entries and the begining of the extensions. Code can take
+  advantage of this to quickly locate the index extensions without having
+  to parse through all of the index entries.
+
+  Because it must be able to be loaded before the variable length cache
+  entries and other index extensions, this extension must be written last.
+  The signature for this extension is { 'E', 'O', 'I', 'E' }.
+
+  The extension consists of:
+
+  - 32-bit offset to the end of the index entries
+
+  - 160-bit SHA-1 over the extension types and their sizes (but not
+   their contents).  E.g. if we have "TREE" extension that is N-bytes
+   long, "REUC" extension that is M-bytes long, followed by "EOIE",
+   then the hash would be:
+
+   SHA-1("TREE" +  +
+   "REUC" + )
diff --git a/read-cache.c b/read-cache.c
index 6ba99e2c96..af2605a168 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -43,6 +43,7 @@
 #define CACHE_EXT_LINK 0x6c696e6b/* "link" */
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
 #define CACHE_EXT_FSMONITOR 0x46534D4E   /* "FSMN" */
+#define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -1693,6 +1694,9 @@ static int read_index_extension(struct index_state 
*istate,
case CACHE_EXT_FSMONITOR:
read_fsmonitor_extension(istate, data, sz);
break;
+   case CACHE_EXT_ENDOFINDEXENTRIES:
+   /* already handled in do_read_index() */
+   break;
default:
if (*ext < 'A' || 'Z' < *ext)
return error("index uses %.4s extension, which we do 
not understand",
@@ -1883,6 +1887,9 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
return ondisk_size + entries * per_entry;
 }
 
+static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
+static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -2190,11 +2197,15 @@ static int ce_write(git_hash_ctx *context, int fd, void 
*data, unsigned int len)
return 0;
 }
 
-static int write_index_ext_header(git_hash_ctx *context, int fd,
- unsigned int ext, unsigned int sz)
+static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx 
*eoie_context,
+ int fd, unsigned int ext, unsigned int sz)
 {
ext = htonl(ext);
sz = htonl(sz);
+   if (eoie_context) {
+   the_hash_algo->update_fn(eoie_context, , 4);
+   the_hash_algo->update_fn(eoie_context, , 4);
+   }
return ((ce_write(context, fd, , 4) < 0) ||
(ce_write(context, fd, , 4) < 0)) ? -1 : 0;
 }
@@ -2437,7 +2448,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
 {
uint64_t start = getnanotime();
int newfd = tempfile->fd;
-   git_hash_ctx c;

[PATCH v7 1/7] read-cache.c: optimize reading index format v4

2018-10-01 Thread Ben Peart
From: Nguyễn Thái Ngọc Duy 

Index format v4 requires some more computation to assemble a path
based on a previous one. The current code is not very efficient
because

 - it doubles memory copy, we assemble the final path in a temporary
   first before putting it back to a cache_entry

 - strbuf_remove() in expand_name_field() is not exactly a good fit
   for stripping a part at the end, _setlen() would do the same job
   and is much cheaper.

 - the open-coded loop to find the end of the string in
   expand_name_field() can't beat an optimized strlen()

This patch avoids the temporary buffer and writes directly to the new
cache_entry, which addresses the first two points. The last point
could also be avoided if the total string length fits in the first 12
bits of ce_flags, if not we fall back to strlen().

Running "test-tool read-cache 100" on webkit.git (275k files), reading
v2 only takes 4.226 seconds, while v4 takes 5.711 seconds, 35% more
time. The patch reduces read time on v4 to 4.319 seconds.

Signed-off-by: Nguyễn Thái Ngọc Duy 
---
 read-cache.c | 128 ---
 1 file changed, 60 insertions(+), 68 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 8d04d78a58..583a4fb1f8 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1713,63 +1713,24 @@ int read_index(struct index_state *istate)
return read_index_from(istate, get_index_file(), get_git_dir());
 }
 
-static struct cache_entry *cache_entry_from_ondisk(struct mem_pool *mem_pool,
-  struct ondisk_cache_entry 
*ondisk,
-  unsigned int flags,
-  const char *name,
-  size_t len)
-{
-   struct cache_entry *ce = mem_pool__ce_alloc(mem_pool, len);
-
-   ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec);
-   ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec);
-   ce->ce_stat_data.sd_ctime.nsec = get_be32(>ctime.nsec);
-   ce->ce_stat_data.sd_mtime.nsec = get_be32(>mtime.nsec);
-   ce->ce_stat_data.sd_dev   = get_be32(>dev);
-   ce->ce_stat_data.sd_ino   = get_be32(>ino);
-   ce->ce_mode  = get_be32(>mode);
-   ce->ce_stat_data.sd_uid   = get_be32(>uid);
-   ce->ce_stat_data.sd_gid   = get_be32(>gid);
-   ce->ce_stat_data.sd_size  = get_be32(>size);
-   ce->ce_flags = flags & ~CE_NAMEMASK;
-   ce->ce_namelen = len;
-   ce->index = 0;
-   hashcpy(ce->oid.hash, ondisk->sha1);
-   memcpy(ce->name, name, len);
-   ce->name[len] = '\0';
-   return ce;
-}
-
-/*
- * Adjacent cache entries tend to share the leading paths, so it makes
- * sense to only store the differences in later entries.  In the v4
- * on-disk format of the index, each on-disk cache entry stores the
- * number of bytes to be stripped from the end of the previous name,
- * and the bytes to append to the result, to come up with its name.
- */
-static unsigned long expand_name_field(struct strbuf *name, const char *cp_)
-{
-   const unsigned char *ep, *cp = (const unsigned char *)cp_;
-   size_t len = decode_varint();
-
-   if (name->len < len)
-   die("malformed name field in the index");
-   strbuf_remove(name, name->len - len, len);
-   for (ep = cp; *ep; ep++)
-   ; /* find the end */
-   strbuf_add(name, cp, ep - cp);
-   return (const char *)ep + 1 - cp_;
-}
-
-static struct cache_entry *create_from_disk(struct mem_pool *mem_pool,
+static struct cache_entry *create_from_disk(struct index_state *istate,
struct ondisk_cache_entry *ondisk,
unsigned long *ent_size,
-   struct strbuf *previous_name)
+   const struct cache_entry 
*previous_ce)
 {
struct cache_entry *ce;
size_t len;
const char *name;
unsigned int flags;
+   size_t copy_len;
+   /*
+* Adjacent cache entries tend to share the leading paths, so it makes
+* sense to only store the differences in later entries.  In the v4
+* on-disk format of the index, each on-disk cache entry stores the
+* number of bytes to be stripped from the end of the previous name,
+* and the bytes to append to the result, to come up with its name.
+*/
+   int expand_name_field = istate->version == 4;
 
/* On-disk flags are just 16 bits */
flags = get_be16(>flags);
@@ -1789,21 +1750,54 @@ static struct cache_entry *create_from_disk(struct 
mem_pool *mem_pool,
else
name = ondisk->name;
 
-   if (!previous_name) {
-   /* v3 and earlier */
-   if (len == CE_NAMEMASK)
-   len = strlen(name);
-   ce = cache_entry_from_ondisk(mem_pool, 

Re: [PATCH v6 4/7] config: add new index.threads config setting

2018-10-01 Thread Ben Peart




On 9/28/2018 6:15 PM, Junio C Hamano wrote:

Ramsay Jones  writes:


     if (!nr) {
     ieot_blocks = istate->cache_nr / THREAD_COST;
-   if (ieot_blocks < 1)
-   ieot_blocks = 1;
     cpus = online_cpus();
     if (ieot_blocks > cpus - 1)
     ieot_blocks = cpus - 1;


So, am I reading this correctly - you need cpus > 2 before an
IEOT extension block is written out?

OK.


Why should we be even calling online_cpus() in this codepath to
write the index in a single thread to begin with?

The number of cpus that readers would use to read this index file
has nothing to do with the number of cpus available to this
particular writer process.



As I mentioned in my other reply, this is optimizing for the most common 
case where the index is read from the same machine that wrote it and the 
user is taking the default settings (ie index.threads=true).


Aligning the number of blocks to the number of threads that will be 
processing them avoids situations where one thread may have up to double 
the work to do as the other threads (for example, if there were 3 blocks 
to be processed by 2 threads).


Re: [PATCH v6 4/7] config: add new index.threads config setting

2018-09-28 Thread Ben Peart




On 9/28/2018 1:07 PM, Junio C Hamano wrote:

Ben Peart  writes:


Why does multithreading have to be disabled in this test?


If multi-threading is enabled, it will write out the IEOT extension
which changes the SHA and causes the test to fail.


I think it is a design mistake to let the writing processes's
capability decide what is written in the file to be read later by a
different process, which possibly may have different capability.  If
you are not writing with multiple threads, it should not matter if
that writer process is capable of and configured to spawn 8 threads
if the process were reading the file---as it is not reading the file
it is writing right now.

I can understand if the design is to write IEOT only if the
resulting index is expected to become large enough (above an
arbitrary threshold like 100k entries) to matter.  I also can
understand if IEOT is omitted when the repository configuration says
that no process is allowed to read the index with multi-threaded
codepath in that repository.



There are two different paths which determine how many blocks are 
written to the IEOT.  The first is the default path.  On this path, the 
number of blocks is determined by the number of cache entries divided by 
the THREAD_COST.  If there are sufficient entries to make it faster to 
use threading, then it will automatically use enough blocks to optimize 
the performance of reading the entries across multiple threads.


I currently cap the maximum number of blocks to be the number of cores 
that would be available to process them on that same machine purely as 
an optimization.  The majority of the time, the index will be read from 
the same machine that it was written on so this works well.  Before I 
added that logic, you would usually end up with more blocks than 
available threads which meant some threads had more to do than the other 
threads and resulted in worse performance.  For example, 4 blocks across 
3 threads results in the 1st thread having twice as much work to do as 
the other threads.


If the index is copied to a machine with a different number of cores, it 
will still all work - it just may not be optimal for that machine.  This 
is self correcting because as soon as the index is written out, it will 
be optimized for that machine.


If the "automatically try to make it perform optimally" logic doesn't 
work for some reason, we have path #2.


The second path is when the user specifies a specific number of blocks 
via the GIT_TEST_INDEX_THREADS= environment variable or the 
index.threads= config setting.  If they ask for n blocks, they will 
get n blocks.  This is the "I know what I'm doing and want to control 
the behavior" path.


I just added one additional test (see patch below) to avoid a divide by 
zero bug and simplify things a bit.  With this change, if there are 
fewer than two blocks, the IEOT extension is not written out as it isn't 
needed.  The load would be single threaded anyway so there is no reason 
to write out a IEOT extensions that won't be used.




diff --git a/read-cache.c b/read-cache.c
index f5d766088d..a1006fa824 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2751,18 +2751,23 @@ static int do_write_index(struct index_state 
*istate, struct tempfile *tempfil

e,
 */
if (!nr) {
ieot_blocks = istate->cache_nr / THREAD_COST;
-   if (ieot_blocks < 1)
-   ieot_blocks = 1;
cpus = online_cpus();
if (ieot_blocks > cpus - 1)
ieot_blocks = cpus - 1;
} else {
ieot_blocks = nr;
}
-   ieot = xcalloc(1, sizeof(struct index_entry_offset_table)
-   + (ieot_blocks * sizeof(struct 
index_entry_offset)));

-   ieot->nr = 0;
-   ieot_work = DIV_ROUND_UP(entries, ieot_blocks);
+
+   /*
+* no reason to write out the IEOT extension if we don't
+* have enough blocks to utilize multi-threading
+*/
+   if (ieot_blocks > 1) {
+   ieot = xcalloc(1, sizeof(struct 
index_entry_offset_table)
+   + (ieot_blocks * sizeof(struct 
index_entry_offset)));

+   ieot->nr = 0;
+   ieot_work = DIV_ROUND_UP(entries, ieot_blocks);
+   }
}
 #endif



Re: [PATCH v6 3/7] eoie: add End of Index Entry (EOIE) extension

2018-09-28 Thread Ben Peart




On 9/27/2018 8:19 PM, SZEDER Gábor wrote:

On Wed, Sep 26, 2018 at 03:54:38PM -0400, Ben Peart wrote:

The End of Index Entry (EOIE) is used to locate the end of the variable


Nit: perhaps start with:

   The End of Index Entry (EOIE) optional extension can be used to ...

to make it clearer for those who don't immediately realize the
significance of the upper case 'E' in the extension's signature.


length index entries and the beginning of the extensions. Code can take
advantage of this to quickly locate the index extensions without having
to parse through all of the index entries.

Because it must be able to be loaded before the variable length cache
entries and other index extensions, this extension must be written last.
The signature for this extension is { 'E', 'O', 'I', 'E' }.

The extension consists of:

- 32-bit offset to the end of the index entries

- 160-bit SHA-1 over the extension types and their sizes (but not
their contents).  E.g. if we have "TREE" extension that is N-bytes
long, "REUC" extension that is M-bytes long, followed by "EOIE",
then the hash would be:

SHA-1("TREE" +  +
"REUC" + )

Signed-off-by: Ben Peart 
---
  Documentation/technical/index-format.txt |  23 
  read-cache.c | 151 +--
  t/README |   5 +
  t/t1700-split-index.sh   |   1 +
  4 files changed, 172 insertions(+), 8 deletions(-)




diff --git a/t/README b/t/README
index 3ea6c85460..aa33ac4f26 100644
--- a/t/README
+++ b/t/README
@@ -327,6 +327,11 @@ GIT_TEST_COMMIT_GRAPH=, when true, forces the 
commit-graph to
  be written after every 'git commit' command, and overrides the
  'core.commitGraph' setting to true.
  
+GIT_TEST_DISABLE_EOIE= disables writing the EOIE extension.

+This is used to allow tests 1, 4-9 in t1700-split-index.sh to succeed
+as they currently hard code SHA values for the index which are no longer
+valid due to the addition of the EOIE extension.


Is this extension enabled by default?  The commit message doesn't
explicitly say so, but I don't see any way to turn it on or off, while
there is this new GIT_TEST environment variable to disable it for one
particular test, so it seems so.  If that's indeed the case, then
wouldn't it be better to update those hard-coded SHA1 values in t1700
instead?



Yes, it is enabled by default and the only way to disable it is the 
GIT_TEST_DISABLE_EOIE environment variable.


The tests in t1700-split-index.sh assume that there are no extensions in 
the index file so anything that adds an extension, will break one or 
more of the tests.


First in 'enable split index', they hard code SHA values assuming there 
are no extensions. If some option adds an extension, these hard coded 
values no longer match and the test fails.


Later in 'disable split index' they save off the SHA of the index with 
split-index turned off and then in later tests, compare it to the SHA of 
the shared index.  Because extensions are stripped when the shared index 
is written out this only works if there were not extensions in the 
original index.


I'll document this behavior and reasoning in the test directly.

This did cause me to reexamine how EOIE and IEOT behave when split index 
is turned on.  These two extensions help most with a large index.  When 
split index is turned on, the large index is actually the shared index 
as the index is now the smaller set of deltas.


Currently, the extensions are stripped out of the shared index which 
means they are not available when they are needed to quickly load the 
shared index.  I'll see if I can update the patch so that these 
extensions are still written out and available in the shared index to 
speed up when it is loaded.


Thanks!


  Naming Tests
  
  
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh

index be22398a85..1f168378c8 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -7,6 +7,7 @@ test_description='split index mode tests'
  # We need total control of index splitting here
  sane_unset GIT_TEST_SPLIT_INDEX
  sane_unset GIT_FSMONITOR_TEST
+GIT_TEST_DISABLE_EOIE=true; export GIT_TEST_DISABLE_EOIE
  
  test_expect_success 'enable split index' '

git config splitIndex.maxPercentChange 100 &&
--
2.18.0.windows.1



Re: [PATCH v3 3/5] fsmonitor: update GIT_TEST_FSMONITOR support

2018-09-28 Thread Ben Peart




On 9/28/2018 10:21 AM, Ben Peart wrote:



On 9/28/2018 6:01 AM, SZEDER Gábor wrote:

On Tue, Sep 18, 2018 at 11:29:35PM +, Ben Peart wrote:

diff --git a/t/README b/t/README
index 56a417439c..47165f7eab 100644
--- a/t/README
+++ b/t/README
@@ -319,6 +319,10 @@ GIT_TEST_OE_DELTA_SIZE= exercises the 
uncommon pack-objects code

  path where deltas larger than this limit require extra memory
  allocation for bookkeeping.
+GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor
+code path for utilizing a file system monitor to speed up detecting
+new or changed files.


Here you tell us to set GIT_TEST_FSMONITOR to an absolute path, and we
are good to go.


diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 756beb0d8e..d77012ea6d 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -8,7 +8,7 @@ test_description='git status with file system watcher'
  # To run the entire git test suite using fsmonitor:
  #
  # copy t/t7519/fsmonitor-all to a location in your path and then set
-# GIT_FSMONITOR_TEST=fsmonitor-all and run your tests.
+# GIT_TEST_FSMONITOR=fsmonitor-all and run your tests.


But this old comment is different, suggesting copying that script to
our $PATH.

I prefer your instructions above, because it's only a single step,
and, more importantly, it won't pollute my $PATH.  I think this
comment should be updated to make the advices in both places
consistent.  Or perhaps even removed, now that all GIT_TEST variables
are documented in the same place?



I prefer the suggestion to simply remove this text from the test script 
now that there is documentation for it in the t/README file.


Junio, can you squash in the following patch or would you prefer I 
reroll the entire series?


Thanks,

Ben

From 393007340dc1baf3539ab727e0a8128e7c408a27 Mon Sep 17 00:00:00 2001
From: Ben Peart 
Date: Fri, 28 Sep 2018 10:23:18 -0400
Subject: fixup! fsmonitor: remove outdated instructions from test

Remove the outdated instructions on how to run the test suite utilizing
fsmonitor now that it is properly documented in t/README.

Signed-off-by: Ben Peart 
---

Notes:
Base Ref: git-test-cleanup-v3
Web-Diff: https://github.com/benpeart/git/commit/393007340d
Checkout: git fetch https://github.com/benpeart/git 
git-test-cleanup-v1 && git checkout 393007340d


 t/t7519-status-fsmonitor.sh | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 8308d6d5b1..3f0dd98010 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -4,13 +4,6 @@ test_description='git status with file system watcher'

 . ./test-lib.sh

-#
-# To run the entire git test suite using fsmonitor:
-#
-# copy t/t7519/fsmonitor-all to a location in your path and then set
-# GIT_TEST_FSMONITOR=fsmonitor-all and run your tests.
-#
-
 # Note, after "git reset --hard HEAD" no extensions exist other than 
'TREE'

 # "git update-index --fsmonitor" can be used to get the extension written
 # before testing the results.

base-commit: 043246d9369fb851c5c2b922466f77fc7ef0327b
--
2.18.0.windows.1



Re: [PATCH v3 3/5] fsmonitor: update GIT_TEST_FSMONITOR support

2018-09-28 Thread Ben Peart




On 9/28/2018 6:01 AM, SZEDER Gábor wrote:

On Tue, Sep 18, 2018 at 11:29:35PM +, Ben Peart wrote:

diff --git a/t/README b/t/README
index 56a417439c..47165f7eab 100644
--- a/t/README
+++ b/t/README
@@ -319,6 +319,10 @@ GIT_TEST_OE_DELTA_SIZE= exercises the uncommon 
pack-objects code
  path where deltas larger than this limit require extra memory
  allocation for bookkeeping.
  
+GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor

+code path for utilizing a file system monitor to speed up detecting
+new or changed files.


Here you tell us to set GIT_TEST_FSMONITOR to an absolute path, and we
are good to go.


diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh
index 756beb0d8e..d77012ea6d 100755
--- a/t/t7519-status-fsmonitor.sh
+++ b/t/t7519-status-fsmonitor.sh
@@ -8,7 +8,7 @@ test_description='git status with file system watcher'
  # To run the entire git test suite using fsmonitor:
  #
  # copy t/t7519/fsmonitor-all to a location in your path and then set
-# GIT_FSMONITOR_TEST=fsmonitor-all and run your tests.
+# GIT_TEST_FSMONITOR=fsmonitor-all and run your tests.


But this old comment is different, suggesting copying that script to
our $PATH.

I prefer your instructions above, because it's only a single step,
and, more importantly, it won't pollute my $PATH.  I think this
comment should be updated to make the advices in both places
consistent.  Or perhaps even removed, now that all GIT_TEST variables
are documented in the same place?



I prefer the suggestion to simply remove this text from the test script 
now that there is documentation for it in the t/README file.


Re: [PATCH v6 4/7] config: add new index.threads config setting

2018-09-28 Thread Ben Peart




On 9/27/2018 8:26 PM, SZEDER Gábor wrote:

On Wed, Sep 26, 2018 at 03:54:39PM -0400, Ben Peart wrote:

Add support for a new index.threads config setting which will be used to
control the threading code in do_read_index().  A value of 0 will tell the
index code to automatically determine the correct number of threads to use.
A value of 1 will make the code single threaded.  A value greater than 1
will set the maximum number of threads to use.

For testing purposes, this setting can be overwritten by setting the
GIT_TEST_INDEX_THREADS= environment variable to a value greater than 0.

Signed-off-by: Ben Peart 
---



diff --git a/t/README b/t/README
index aa33ac4f26..0fcecf4500 100644
--- a/t/README
+++ b/t/README
@@ -332,6 +332,11 @@ This is used to allow tests 1, 4-9 in t1700-split-index.sh 
to succeed
  as they currently hard code SHA values for the index which are no longer
  valid due to the addition of the EOIE extension.
  
+GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading

+of the index for the whole test suite by bypassing the default number of
+cache entries and thread minimums. Settting this to 1 will make the


s/ttt/tt/


+index loading single threaded.
+
  Naming Tests
  
  
diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh

index 1f168378c8..ab205954cf 100755
--- a/t/t1700-split-index.sh
+++ b/t/t1700-split-index.sh
@@ -8,6 +8,7 @@ test_description='split index mode tests'
  sane_unset GIT_TEST_SPLIT_INDEX
  sane_unset GIT_FSMONITOR_TEST
  GIT_TEST_DISABLE_EOIE=true; export GIT_TEST_DISABLE_EOIE
+GIT_TEST_INDEX_THREADS=1; export GIT_TEST_INDEX_THREADS


Why does multithreading have to be disabled in this test?



If multi-threading is enabled, it will write out the IEOT extension 
which changes the SHA and causes the test to fail.  I will update the 
logic in this case to not write out the IEOT extension as it isn't needed.



  test_expect_success 'enable split index' '
git config splitIndex.maxPercentChange 100 &&
--
2.18.0.windows.1



Re: [PATCH] read-cache: fix division by zero core-dump

2018-09-27 Thread Ben Peart




On 9/27/2018 6:24 PM, Ramsay Jones wrote:


commit 225df8a468 ("ieot: add Index Entry Offset Table (IEOT)
extension", 2018-09-26) added a 'DIV_ROUND_UP(entries, ieot_blocks)
expression, where ieot_blocks was set to zero for a single cpu
platform. This caused an SIGFPE and a core dump in practically
every test in the test-suite, until test t4056-diff-order.sh, which
then went into an infinite loop!

Signed-off-by: Ramsay Jones 
---

Hi Ben,

Could you please squash this into the relevant commits on your
'bp/read-cache-parallel' branch. (The first hunk fixes a sparse
warning about using an integer as a NULL pointer).



Absolutely - thanks for the patch.

I don't know how long it's been since I've been on a single core CPU - 
I'm sad for you. ;-)



Thanks!

ATB,
Ramsay Jones

  read-cache.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 6755d58877..40f096f70a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2141,7 +2141,7 @@ int do_read_index(struct index_state *istate, const char 
*path, int must_exist)
size_t extension_offset = 0;
  #ifndef NO_PTHREADS
int nr_threads, cpus;
-   struct index_entry_offset_table *ieot = 0;
+   struct index_entry_offset_table *ieot = NULL;
  #endif
  
  	if (istate->initialized)

@@ -2771,7 +2771,7 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
if (ieot_blocks < 1)
ieot_blocks = 1;
cpus = online_cpus();
-   if (ieot_blocks > cpus - 1)
+   if (cpus > 1 && ieot_blocks > cpus - 1)
ieot_blocks = cpus - 1;
} else {
ieot_blocks = nr;



Re: Git Test Coverage Report (Tuesday, Sept 25)

2018-09-27 Thread Ben Peart




On 9/26/2018 2:54 PM, Derrick Stolee wrote:

On 9/26/2018 2:43 PM, Thomas Gummerer wrote:

On 09/26, Derrick Stolee wrote:

This is a bit tricky to do, but I will investigate. For some things, the
values can conflict with each other (GIT_TEST_SPLIT_INDEX doesn't play
nicely with other index options, I think).

Just commenting on this point.  I think all the index options should
be playing nicely with eachother.  I occasionally run the test suite
with some of them turned on, and if something failed that was always
an actual bug.  The different modes can be used in different
combinations in the wild as well, so we should get them to interact
nicely in the test suite.


Thanks! I'm still working out details on this, since the test suite is 
broken with GIT_TEST_COMMIT_GRAPH due to a verbosity issue [1], which I 
forgot until I ran the tests with the variable on.


I'll re-run with these variables:

GIT_TEST_SPLIT_INDEX=1

GIT_TEST_FULL_IN_PACK_ARRAY=1

GIT_TEST_OE_SIZE=128

GIT_TEST_OE_DELTA_SIZE=128

GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES=1

GIT_TEST_FSMONITOR=$PWD/t/t7519/fsmonitor-all

GIT_TEST_INDEX_VERSION=4

GIT_TEST_PRELOAD_INDEX=1

GIT_TEST_DISABLE_EOIE=1

GIT_TEST_INDEX_THREADS=1



Because the test repos are so small (ie smaller than 10K files), the 
test suite already executes as if GIT_TEST_INDEX_THREADS=1 is set (ie 
single threaded).  I added the test variable so that the multi-threading 
code could be forced to execute in spite of the default minimum number 
of files logic.


I'd recommend you set GIT_TEST_INDEX_THREADS=3 instead. (3 because 1 
thread is used for loading the index extensions and we need 2 or more 
threads available to exercise multi-threaded loading of the index entries).



Thanks,

-Stolee

[1] 
https://public-inbox.org/git/60aae3d6-35b2-94fb-afd7-6978e935a...@gmail.com/ 





Re: Git Test Coverage Report (Tuesday, Sept 25)

2018-09-27 Thread Ben Peart




On 9/26/2018 2:44 PM, Derrick Stolee wrote:

On 9/26/2018 1:59 PM, Junio C Hamano wrote:

Derrick Stolee  writes:


In an effort to ensure new code is reasonably covered by the test
suite, we now have contrib/coverage-diff.sh to combine the gcov output
from 'make coverage-test ; make coverage-report' with the output from
'git diff A B' to discover _new_ lines of code that are not covered.

This report takes the output of these results after running on four
branches:

         pu: 80e728fa913dc3a1165b6dec9a7afa6052a86325

        jch: 0c10634844314ab89666ed0a1c7d36dde7ac9689

       next: 76f2f5c1e34c4dbef1029e2984c2892894c444ce

 master: fe8321ec057f9231c26c29b364721568e58040f7

master@{1}: 2d3b1c576c85b7f5db1f418907af00ab88e0c303

I ran the test suite on each of these branches on an Ubuntu Linux VM,
and I'm missing some dependencies (like apache, svn, and perforce) so
not all tests are run.

Thanks.


master@{1}..master:

builtin/remote.c
5025425dfff (   Shulhan 2018-09-13 20:18:33 +0700
864)    return error(_("No such remote: '%s'"), name);
commit-reach.c
b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700
559)    continue;
b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700
569)    from->objects[i].item->flags |= assign_flag;
b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700
570)    continue;
b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700
576)    result = 0;
b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700
577)    goto cleanup;
cat: compat#mingw.c.gcov: No such file or directory
ll-merge.c
d64324cb60e (Torsten Bögershausen   2018-09-12 21:32:02
+0200   379)    marker_size =
DEFAULT_CONFLICT_MARKER_SIZE;
remote-curl.c
c3b9bc94b9b (Elijah Newren  2018-09-05 10:03:07 -0700
181)    options.filter = xstrdup(value);

As I think the data presented here is very valuable, let me ignore
the findings of this specific run (which will be fixed by individual
authors as/if necessary), and focus on the way the data is presented
(which will affect the ease of consumption by authors of future
commits).

These wrapped blame output lines are harder to view.  Can we have
this in plain/text without format=flowed at least?


Perhaps removing the middle columns of data and just " ) 
" would be easier? We could also remove tabs to save space. For 
example:


builtin/remote.c
5025425dfff  864) return error(_("No such remote: '%s'"), name);

commit-reach.c
b67f6b26e35 559) continue;
b67f6b26e35 569) from->objects[i].item->flags |= assign_flag;
b67f6b26e35 570) continue;
b67f6b26e35 576) result = 0;
b67f6b26e35 577) goto cleanup;

ll-merge.c
d64324cb60e 379) marker_size = DEFAULT_CONFLICT_MARKER_SIZE;

remote-curl.c
c3b9bc94b9b  181) options.filter = xstrdup(value);

This does still pad the data by a bit, but should be more readable. Most 
"uncovered" code will be indented at least one level.


We do lose the author information, but keen readers could identify code 
they are interested in by filename and then look up the commit by OID 
later.




I personally find the author data very useful as it makes it trivial for 
me to scan for and find changes I'm responsible for.  Just scanning the 
output of the mail and looking for file names I may have changed lately 
is much more laborious - meaning I'm much less likely to actually do it :-).


Perhaps a reasonable compromise would be to put the author name once 
with the block of changes (like you are doing for the file name) rather 
than on every line that changed and wasn't executed.




I personally do not mind a monospaced and non-wrapping website, just
I do not mind visiting travis-ci.org for recent results from time to
time.  Others may differ.

There is an error message from "cat" in it, by the way.
Thanks! I'll add an 'if' statement when there is no gcov file. This 
happens for the compat layers that are not compiled in and for the 
t/helper directory, it seems.



preload-index.c
ae9af12287b (Nguyễn Thái Ngọc Duy   2018-09-15 19:56:04
+0200   73) struct progress_data *pd =
p->progress;
ae9af12287b (Nguyễn Thái Ngọc Duy   2018-09-15 19:56:04
+0200   75) pthread_mutex_lock(>mutex);
ae9af12287b (Nguyễn Thái Ngọc Duy   2018-09-15 19:56:04
+0200   76) pd->n += last_nr - nr;
ae9af12287b (Nguyễn Thái Ngọc Duy   2018-09-15 19:56:04
+0200   77) display_progress(pd->progress, pd->n);
ae9af12287b (Nguyễn Thái Ngọc Duy   2018-09-15 19:56:04
+0200   78) pthread_mutex_unlock(>mutex);
ae9af12287b (Nguyễn Thái Ngọc Duy   2018-09-15 19:56:04
+0200   79) last_nr = nr;

I wonder how much we can save the effort that is needed to scan the
output if we somehow coalesce these consecutive lines that are
attributed to the same commit.


It could be possible to group consecutive lines together, but hopefully 
reducing 

[PATCH v6 7/7] read-cache: load cache entries on worker threads

2018-09-26 Thread Ben Peart
This patch helps address the CPU cost of loading the index by utilizing
the Index Entry Offset Table (IEOT) to divide loading and conversion of
the cache entries across multiple threads in parallel.

I used p0002-read-cache.sh to generate some performance data:

Test w/100,000 files reduced the time by 32.24%
Test w/1,000,000 files reduced the time by -4.77%

Note that on the 1,000,000 files case, multi-threading the cache entry parsing
does not yield a performance win.  This is because the cost to parse the
index extensions in this repo, far outweigh the cost of loading the cache
entries.

The high cost of parsing the index extensions is driven by the cache tree
and the untracked cache extensions. As this is currently the longest pole,
any reduction in this time will reduce the overall index load times so is
worth further investigation in another patch series.

Signed-off-by: Ben Peart 
---
 read-cache.c | 224 +++
 1 file changed, 189 insertions(+), 35 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 9b0554d4e6..f5d766088d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1720,7 +1720,8 @@ int read_index(struct index_state *istate)
return read_index_from(istate, get_index_file(), get_git_dir());
 }
 
-static struct cache_entry *create_from_disk(struct index_state *istate,
+static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool,
+   unsigned int version,
struct ondisk_cache_entry *ondisk,
unsigned long *ent_size,
const struct cache_entry 
*previous_ce)
@@ -1737,7 +1738,7 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
 * number of bytes to be stripped from the end of the previous name,
 * and the bytes to append to the result, to come up with its name.
 */
-   int expand_name_field = istate->version == 4;
+   int expand_name_field = version == 4;
 
/* On-disk flags are just 16 bits */
flags = get_be16(>flags);
@@ -1761,16 +1762,17 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
const unsigned char *cp = (const unsigned char *)name;
size_t strip_len, previous_len;
 
-   previous_len = previous_ce ? previous_ce->ce_namelen : 0;
+   /* If we're at the begining of a block, ignore the previous 
name */
strip_len = decode_varint();
-   if (previous_len < strip_len) {
-   if (previous_ce)
+   if (previous_ce) {
+   previous_len = previous_ce->ce_namelen;
+   if (previous_len < strip_len)
die(_("malformed name field in the index, near 
path '%s'"),
-   previous_ce->name);
-   else
-   die(_("malformed name field in the index in the 
first path"));
+   previous_ce->name);
+   copy_len = previous_len - strip_len;
+   } else {
+   copy_len = 0;
}
-   copy_len = previous_len - strip_len;
name = (const char *)cp;
}
 
@@ -1780,7 +1782,7 @@ static struct cache_entry *create_from_disk(struct 
index_state *istate,
len += copy_len;
}
 
-   ce = mem_pool__ce_alloc(istate->ce_mem_pool, len);
+   ce = mem_pool__ce_alloc(ce_mem_pool, len);
 
ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec);
ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec);
@@ -1950,6 +1952,52 @@ static void *load_index_extensions(void *_data)
return NULL;
 }
 
+/*
+ * A helper function that will load the specified range of cache entries
+ * from the memory mapped file and add them to the given index.
+ */
+static unsigned long load_cache_entry_block(struct index_state *istate,
+   struct mem_pool *ce_mem_pool, int offset, int nr, const 
char *mmap,
+   unsigned long start_offset, const struct cache_entry 
*previous_ce)
+{
+   int i;
+   unsigned long src_offset = start_offset;
+
+   for (i = offset; i < offset + nr; i++) {
+   struct ondisk_cache_entry *disk_ce;
+   struct cache_entry *ce;
+   unsigned long consumed;
+
+   disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset);
+   ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, 
, previous_ce);
+   set_index_entry(istate, i, ce);
+
+   src_offset += consumed;
+   previous_ce = ce;
+   }
+   return src_offset - start_offset;
+}
+
+stati

[PATCH v6 6/7] ieot: add Index Entry Offset Table (IEOT) extension

2018-09-26 Thread Ben Peart
This patch enables addressing the CPU cost of loading the index by adding
additional data to the index that will allow us to efficiently multi-
thread the loading and conversion of cache entries.

It accomplishes this by adding an (optional) index extension that is a
table of offsets to blocks of cache entries in the index file.  To make
this work for V4 indexes, when writing the cache entries, it periodically
"resets" the prefix-compression by encoding the current entry as if the
path name for the previous entry is completely different and saves the
offset of that entry in the IEOT.  Basically, with V4 indexes, it
generates offsets into blocks of prefix-compressed entries.

Signed-off-by: Ben Peart 
---
 Documentation/technical/index-format.txt |  18 +++
 read-cache.c | 166 +++
 2 files changed, 184 insertions(+)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index 6bc2d90f7f..7c4d67aa6a 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -337,3 +337,21 @@ The remaining data of each directory block is grouped by 
type:
 
SHA-1("TREE" +  +
"REUC" + )
+
+== Index Entry Offset Table
+
+  The Index Entry Offset Table (IEOT) is used to help address the CPU
+  cost of loading the index by enabling multi-threading the process of
+  converting cache entries from the on-disk format to the in-memory format.
+  The signature for this extension is { 'I', 'E', 'O', 'T' }.
+
+  The extension consists of:
+
+  - 32-bit version (currently 1)
+
+  - A number of index offset entries each consisting of:
+
+- 32-bit offset from the begining of the file to the first cache entry
+   in this block of entries.
+
+- 32-bit count of cache entries in this block
diff --git a/read-cache.c b/read-cache.c
index 8da21c9273..9b0554d4e6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -45,6 +45,7 @@
 #define CACHE_EXT_UNTRACKED 0x554E5452   /* "UNTR" */
 #define CACHE_EXT_FSMONITOR 0x46534D4E   /* "FSMN" */
 #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */
+#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -1696,6 +1697,7 @@ static int read_index_extension(struct index_state 
*istate,
read_fsmonitor_extension(istate, data, sz);
break;
case CACHE_EXT_ENDOFINDEXENTRIES:
+   case CACHE_EXT_INDEXENTRYOFFSETTABLE:
/* already handled in do_read_index() */
break;
default:
@@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, 
unsigned int entries)
return ondisk_size + entries * per_entry;
 }
 
+struct index_entry_offset
+{
+   /* starting byte offset into index file, count of index entries in this 
block */
+   int offset, nr;
+};
+
+struct index_entry_offset_table
+{
+   int nr;
+   struct index_entry_offset entries[0];
+};
+
+#ifndef NO_PTHREADS
+static struct index_entry_offset_table *read_ieot_extension(const char *mmap, 
size_t mmap_size, size_t offset);
+static void write_ieot_extension(struct strbuf *sb, struct 
index_entry_offset_table *ieot);
+#endif
+
 static size_t read_eoie_extension(const char *mmap, size_t mmap_size);
 static void write_eoie_extension(struct strbuf *sb, git_hash_ctx 
*eoie_context, size_t offset);
 
@@ -1931,6 +1950,15 @@ static void *load_index_extensions(void *_data)
return NULL;
 }
 
+/*
+ * Mostly randomly chosen maximum thread counts: we
+ * cap the parallelism to online_cpus() threads, and we want
+ * to have at least 1 cache entries per thread for it to
+ * be worth starting a thread.
+ */
+
+#define THREAD_COST(1)
+
 /* remember to discard_cache() before reading a different cache! */
 int do_read_index(struct index_state *istate, const char *path, int must_exist)
 {
@@ -2523,6 +2551,9 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
int drop_cache_tree = istate->drop_cache_tree;
off_t offset;
+   int ieot_work = 1;
+   struct index_entry_offset_table *ieot = NULL;
+   int nr;
 
for (i = removed = extended = 0; i < entries; i++) {
if (cache[i]->ce_flags & CE_REMOVE)
@@ -2556,7 +2587,33 @@ static int do_write_index(struct index_state *istate, 
struct tempfile *tempfile,
if (ce_write(, newfd, , sizeof(hdr)) < 0)
return -1;
 
+#ifndef NO_PTHREADS
+   if (!strip_extensions && (nr = git_config_get_index_threads()) != 1) {
+   int ieot_blocks, cpus;
+
+   /*
+* ensure default number of ieot

  1   2   3   4   5   6   7   >