Re: [PATCH v1] teach git to support a virtual (partially populated) work directory
On 11/28/2018 8:31 AM, SZEDER Gábor wrote: On Tue, Nov 27, 2018 at 02:50:57PM -0500, Ben Peart wrote: diff --git a/t/t1092-virtualworkdir.sh b/t/t1092-virtualworkdir.sh new file mode 100755 index 00..0cdfe9b362 --- /dev/null +++ b/t/t1092-virtualworkdir.sh @@ -0,0 +1,393 @@ +#!/bin/sh + +test_description='virtual work directory tests' + +. ./test-lib.sh + +# We need total control of the virtual work directory hook +sane_unset GIT_TEST_VIRTUALWORKDIR + +clean_repo () { + rm .git/index && + git -c core.virtualworkdir=false reset --hard HEAD && + git -c core.virtualworkdir=false clean -fd && + touch untracked.txt && We would usually run '>untracked.txt' instead, sparing the external process. A further nit is that a function called 'clean_repo' creates new untracked files... Thanks, all good suggestions I've incorporated for the next iteration. + touch dir1/untracked.txt && + touch dir2/untracked.txt +} + +test_expect_success 'setup' ' + mkdir -p .git/hooks/ && + cat > .gitignore <<-\EOF && CodingGuidelines suggest no space between redirection operator and filename. + .gitignore + expect* + actual* + EOF + touch file1.txt && + touch file2.txt && + mkdir -p dir1 && + touch dir1/file1.txt && + touch dir1/file2.txt && + mkdir -p dir2 && + touch dir2/file1.txt && + touch dir2/file2.txt && + git add . && + git commit -m "initial" && + git config --local core.virtualworkdir true +' +test_expect_success 'verify files not listed are ignored by git clean -f -x' ' + clean_repo && I find it odd to clean the repo right after setting it up; but then again, 'clean_repo' not only cleans, but also creates new files. Perhaps rename it to 'reset_repo'? Dunno. + write_script .git/hooks/virtual-work-dir <<-\EOF && + printf "untracked.txt\0" + printf "dir1/\0" + EOF + mkdir -p dir3 && + touch dir3/untracked.txt && + git clean -f -x && + test -f file1.txt && Please use the 'test_path_is_file', ... + test -f file2.txt && + test ! -f untracked.txt && ... 'test_path_is_missing', and ... + test -d dir1 && ... 'test_path_is_dir' helpers, respectively, because they print informative error messages on failure. + test -f dir1/file1.txt && + test -f dir1/file2.txt && + test ! -f dir1/untracked.txt && + test -f dir2/file1.txt && + test -f dir2/file2.txt && + test -f dir2/untracked.txt && + test -d dir3 && + test -f dir3/untracked.txt +'
Re: [PATCH v1] mem_pool: add GIT_TRACE_MEMPOOL support
On 11/28/2018 4:37 AM, Johannes Schindelin wrote: Hi Ben, On Tue, 27 Nov 2018, Ben Peart wrote: From: Ben Peart Add tracing around initializing and discarding mempools. In discard report on the amount of memory unused in the current block to help tune setting the initial_size. Signed-off-by: Ben Peart --- Looks good. My only question: should we also trace calls to _alloc(), _calloc() and _combine()? I was trying to tune the initial size in my use of the mem_pool and so found this tracing useful to see how much memory was actually being used. I'm inclined to only add tracing as it is needed rather that proactively because we think it _might_ be needed. I suspect _alloc() and _calloc() would get very noisy and not add much value. Ciao, Johannes Notes: Base Ref: * git-trace-mempool Web-Diff: https://github.com/benpeart/git/commit/9ac84bbca2 Checkout: git fetch https://github.com/benpeart/git git-trace-mempool-v1 && git checkout 9ac84bbca2 mem-pool.c | 5 + 1 file changed, 5 insertions(+) diff --git a/mem-pool.c b/mem-pool.c index a2841a4a9a..065389aaec 100644 --- a/mem-pool.c +++ b/mem-pool.c @@ -5,6 +5,7 @@ #include "cache.h" #include "mem-pool.h" +static struct trace_key trace_mem_pool = TRACE_KEY_INIT(MEMPOOL); #define BLOCK_GROWTH_SIZE 1024*1024 - sizeof(struct mp_block); /* @@ -48,12 +49,16 @@ void mem_pool_init(struct mem_pool **mem_pool, size_t initial_size) mem_pool_alloc_block(pool, initial_size, NULL); *mem_pool = pool; + trace_printf_key(_mem_pool, "mem_pool (%p): init (%"PRIuMAX") initial size\n", + pool, (uintmax_t)initial_size); } void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory) { struct mp_block *block, *block_to_free; + trace_printf_key(_mem_pool, "mem_pool (%p): discard (%"PRIuMAX") unused\n", + mem_pool, (uintmax_t)(mem_pool->mp_block->end - mem_pool->mp_block->next_free)); block = mem_pool->mp_block; while (block) { base-commit: bb75be6cb916297f271c846f2f9caa3daaaec718 -- 2.18.0.windows.1
[PATCH v1] mem_pool: add GIT_TRACE_MEMPOOL support
From: Ben Peart Add tracing around initializing and discarding mempools. In discard report on the amount of memory unused in the current block to help tune setting the initial_size. Signed-off-by: Ben Peart --- Notes: Base Ref: * git-trace-mempool Web-Diff: https://github.com/benpeart/git/commit/9ac84bbca2 Checkout: git fetch https://github.com/benpeart/git git-trace-mempool-v1 && git checkout 9ac84bbca2 mem-pool.c | 5 + 1 file changed, 5 insertions(+) diff --git a/mem-pool.c b/mem-pool.c index a2841a4a9a..065389aaec 100644 --- a/mem-pool.c +++ b/mem-pool.c @@ -5,6 +5,7 @@ #include "cache.h" #include "mem-pool.h" +static struct trace_key trace_mem_pool = TRACE_KEY_INIT(MEMPOOL); #define BLOCK_GROWTH_SIZE 1024*1024 - sizeof(struct mp_block); /* @@ -48,12 +49,16 @@ void mem_pool_init(struct mem_pool **mem_pool, size_t initial_size) mem_pool_alloc_block(pool, initial_size, NULL); *mem_pool = pool; + trace_printf_key(_mem_pool, "mem_pool (%p): init (%"PRIuMAX") initial size\n", + pool, (uintmax_t)initial_size); } void mem_pool_discard(struct mem_pool *mem_pool, int invalidate_memory) { struct mp_block *block, *block_to_free; + trace_printf_key(_mem_pool, "mem_pool (%p): discard (%"PRIuMAX") unused\n", + mem_pool, (uintmax_t)(mem_pool->mp_block->end - mem_pool->mp_block->next_free)); block = mem_pool->mp_block; while (block) { base-commit: bb75be6cb916297f271c846f2f9caa3daaaec718 -- 2.18.0.windows.1
[PATCH v1] teach git to support a virtual (partially populated) work directory
From: Ben Peart To make git perform well on the very largest repos, we must make git operations O(modified) instead of O(size of repo). This takes advantage of the fact that the number of files a developer has modified (especially in very large repos) is typically a tiny fraction of the overall repo size. We accomplished this by utilizing the existing internal logic for the skip worktree bit and excludes to tell git to ignore all files and folders other than those that have been modified. This logic is driven by an external process that monitors writes to the repo and communicates the list of files and folders with changes to git via the virtual work directory hook in this patch. The external process maintains a list of files and folders that have been modified. When git runs, it requests the list of files and folders that have been modified via the virtual work directory hook. Git then sets/clears the skip-worktree bit on the cache entries and builds a hashmap of the modified files/folders that is used by the excludes logic to avoid scanning the entire repo looking for changes and untracked files. With this system, we have been able to make local git command performance on extremely large repos (millions of files, 1/2 million folders) entirely manageable (30 second checkout, 3.5 seconds status, 4 second add, 7 second commit, etc). On index load, clear/set the skip worktree bits based on the virtual work directory data. Use virtual work directory data to update skip-worktree bit in unpack-trees. Use virtual work directory data to exclude files and folders not explicitly requested. Signed-off-by: Ben Peart --- I believe I've incorporated all the feedback from the RFC. Renaming the feature, updating the setting to be a boolean with a hard coded hook name, labeling the feature "experimental," and only calling get_dtype() if the feature is turned on. If there are other suggestions on how to ensure this is a useful and general purpose feature please let me know. Notes: Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/65c3ca2e5f Checkout: git fetch https://github.com/benpeart/git virtual-workdir-v1 && git checkout 65c3ca2e5f Documentation/config/core.txt | 9 + Documentation/githooks.txt| 23 ++ Makefile | 1 + cache.h | 1 + config.c | 32 ++- config.h | 1 + dir.c | 26 ++- environment.c | 1 + read-cache.c | 2 + t/t1092-virtualworkdir.sh | 393 ++ unpack-trees.c| 23 +- virtualworkdir.c | 314 +++ virtualworkdir.h | 25 +++ 13 files changed, 843 insertions(+), 8 deletions(-) create mode 100755 t/t1092-virtualworkdir.sh create mode 100644 virtualworkdir.c create mode 100644 virtualworkdir.h diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index d0e6635fe0..49b7699a4e 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -68,6 +68,15 @@ core.fsmonitor:: avoiding unnecessary processing of files that have not changed. See the "fsmonitor-watchman" section of linkgit:githooks[5]. +core.virtualWorkDir:: + Please regard this as an experimental feature. + If set to true, utilize the virtual-work-dir hook to identify all + files and directories that are present in the working directory. + Git will only track and update files listed in the virtual work + directory. Using the virtual work directory will supersede the + sparse-checkout settings which will be ignored. + See the "virtual-work-dir" section of linkgit:githooks[6]. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 959044347e..9888d504b4 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -485,6 +485,29 @@ The exit status determines whether git will use the data from the hook to limit its search. On error, it will fall back to verifying all files and folders. +virtual-work-dir + + +Please regard this as an experimental feature. + +The "Virtual Work Directory" hook allows populating the working directory +sparsely. The virtual work directory data is typically automatically +generated by an external process. Git will limit what files it checks for +changes as well as which directories are checked for untracked files based +on the path names given. Git will also only update those files listed in the +virtual work directory. + +The hook is invoked when the configuration option core.virtualWorkDir is +set to true. The hook takes one argument, a version (currently
Re: [PATCH 2/5] ieot: default to not writing IEOT section
On 11/26/2018 2:59 PM, Stefan Beller wrote: +static int record_ieot(void) +{ + int val; + Initialize stack val to zero to ensure proper default. I don't think that is needed here, as we only use `val` when we first write to it via git_config_get_bool. Did you spot this via code review and thought of defensive programming or is there a tool that has a false positive here? Code review and defensive programming. I had to review the code in git_config_get_bool() to see if it always initialized the val even if it didn't find the requested config variable (esp since we don't pass in a default value for this function like we do others). + if (!git_config_get_bool("index.recordoffsettable", )) + return val; + return 0; +}
Re: [PATCH 1/5] eoie: default to not writing EOIE section
On 11/20/2018 1:11 AM, Jonathan Nieder wrote: Since 3b1d9e04 (eoie: add End of Index Entry (EOIE) extension, 2018-10-10) Git defaults to writing the new EOIE section when writing out an index file. Usually that is a good thing because it improves threaded performance, but when a Git repository is shared with older versions of Git, it produces a confusing warning: $ git status ignoring EOIE extension HEAD detached at 371ed0defa nothing to commit, working tree clean Let's introduce the new index extension more gently. First we'll roll out the new version of Git that understands it, and then once sufficiently many users are using such a version, we can flip the default to writing it by default. Introduce a '[index] recordEndOfIndexEntries' configuration variable to allow interested users to benefit from this index extension early. Signed-off-by: Jonathan Nieder --- Rebased. No other change from v1. As Jonathan pointed out, it would be nice to have tests here. Ben, any advice for how I could write some in a followup change? E.g. does Derrick Stolee's tracing based testing trick apply here? I suppose a 'test-dump-eoie' could be written along the lines of test-dump-fsmonitor or test-dump-untracked-cache. Unlike those, there isn't much state to dump other than the existence of the extension and the offset. That could be used to test that the new settings are working properly.
Re: [PATCH 5/5] index: offer advice for unknown index extensions
On 11/20/2018 4:26 AM, Ævar Arnfjörð Bjarmason wrote: On Tue, Nov 20 2018, Jonathan Nieder wrote: Just commenting here on the end-state of this since it's easier than each patch at a time: First, do we still need to be doing %.4s instead of just %s? It would be easier for translators / to understand what's going on if it were just %s. I.e. "this is the extension name" v.s. "this is the first 4 bytes of whatever it is...". return error("index uses %.4s extension, which we do not understand", ext); Missing _(). Not the fault of this series, but something to fix while we're at it. Also not the fault of this series, the "is this upper case" test is unportable, but this is probably the tip of the iceberg for git not working on EBCDIC systems. This message should say something like "Index uses the mandatory %s extension" to clarify and distinguish it from the below. We don't understand the upper-case one either, but the important distinction is that one is mandatory, and the other can be dropped. The two messages should make this clear. Also, having advice() for that case is even more valuable since we have a hard error at this point. So something like: "This is likely due to the index having been written by a future version of Git. All-lowercase index extensions are mandatory, as opposed to optional all-uppercase ones which we'll drop with a warning if we see them". I agree that we should have different messages for mandatory and optional extensions. I don't think we should try and educate the end user on the implementation detail that git makes lower cases mandatory and upper case optional (ie drop the 'All-lowercase..." part). They will never see the lower vs upper case difference and can't do anything about it anyway. trace_printf("ignoring %.4s extension\n", ext); + if (advice_unknown_index_extension) { + warning(_("ignoring optional %.4s index extension"), ext); Should start with upper-case. Good that it says "optional". + advise(_("This is likely due to the file having been written by a newer\n" +"version of Git than is reading it. You can upgrade Git to\n" +"take advantage of performance improvements from the updated\n" +"file format.\n" Let's not promise performance improvements with this extension in a future version. We have no idea what the extension is, yeah right now it's going to be true for the extension that prompted this patch series, but may not be in the future. So just something like this for the last sentence: You can try upgrading Git to use this new index format. Agree - not all are guaranteed to be perf related. +"\n" +"Run \"%s\"\n" +"to suppress this message."), + "git config advice.unknownIndexExtension false"); Somewhat of an aside, but if I grep: git grep -C10 'git config advice\..*false' -- '*.[ch]' There's a few existing examples of this, but the majority of advice() messages don't say in the message how you can turn these off. Do we think this a message users would especially like to turn off? I have the opposite impression, it's a one-off in most cases, although not in the case where an editor has an embedded git. I think it would make sense to add this sort of thing to the advice() API, i.e.: advice_with_config_hint(_(""), "unknownIndexExtension"); Which would then know how to consistently print this advice about how to turn off the warning. I like this. I personally never knew you could turn of the "spent xxx seconds finding untracked files" advice until I worked on this patch series. This would help make that feature more discoverable.
Re: [PATCH 4/5] index: make index.threads=true enable ieot and eoie
On 11/20/2018 1:14 AM, Jonathan Nieder wrote: If a user explicitly sets [index] threads = true to read the index using multiple threads, ensure that index writes include the offset table by default to make that possible. This ensures that the user's intent of turning on threading is respected. In other words, permit the following configurations: - index.threads and index.recordOffsetTable unspecified: do not write the offset table yet (to avoid alarming the user with "ignoring IEOT extension" messages when an older version of Git accesses the repository) but do make use of multiple threads to read the index if the supporting offset table is present. This can also be requested explicitly by setting index.threads=true, 0, or >1 and index.recordOffsetTable=false. - index.threads=false or 1: do not write the offset table, and do not make use of the offset table. One can set index.recordOffsetTable=false as well, to be more explicit. - index.threads=true, 0, or >1 and index.recordOffsetTable unspecified: write the offset table and make use of threads at read time. This can also be requested by setting index.threads=true, 0, >1, or unspecified and index.recordOffsetTable=true. Fortunately the complication is temporary: once most Git installations have upgraded to a version with support for the IEOT and EOIE extensions, we can flip the defaults for index.recordEndOfIndexEntries and index.recordOffsetTable to true and eliminate the settings. This looks good. I think this provides good default behavior while enabling fine grained control to those who want/need it. I'm looking forward to the day when we can turn it back on by default so that people can take advantage of the speed improvements.
Re: [PATCH 2/5] ieot: default to not writing IEOT section
On 11/20/2018 1:12 AM, Jonathan Nieder wrote: As with EOIE, popular versions of Git do not support the new IEOT extension yet. When accessing a Git repository written by a more modern version of Git, they correctly ignore the unrecognized section, but in the process they loudly warn ignoring IEOT extension resulting in confusion for users. Introduce the index extension more gently by not writing it yet in this first version with support for it. Soon, once sufficiently many users are running a modern version of Git, we can flip the default so users benefit from this index extension by default. Introduce a '[index] recordOffsetTable' configuration variable to control whether the new index extension is written. Signed-off-by: Jonathan Nieder --- As with patch 1/5, no change from v1 other than rebasing. Documentation/config/index.txt | 7 +++ read-cache.c | 11 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index 8e138aba7a..de44183235 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -5,6 +5,13 @@ index.recordEndOfIndexEntries:: reading the index using Git versions before 2.20. Defaults to 'false'. +index.recordOffsetTable:: + Specifies whether the index file should include an "Index Entry + Offset Table" section. This reduces index load time on + multiprocessor machines but produces a message "ignoring IEOT + extension" when reading the index using Git versions before 2.20. + Defaults to 'false'. + index.threads:: Specifies the number of threads to spawn when loading the index. This is meant to reduce index load time on multiprocessor machines. diff --git a/read-cache.c b/read-cache.c index 1e9c772603..f3d5638d9e 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2698,6 +2698,15 @@ static int record_eoie(void) return 0; } +static int record_ieot(void) +{ + int val; + Initialize stack val to zero to ensure proper default. + if (!git_config_get_bool("index.recordoffsettable", )) + return val; + return 0; +} + /* * On success, `tempfile` is closed. If it is the temporary file * of a `struct lock_file`, we will therefore effectively perform @@ -2761,7 +2770,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, else nr_threads = 1; - if (nr_threads != 1) { + if (nr_threads != 1 && record_ieot()) { int ieot_blocks, cpus; /*
Re: [PATCH 1/5] eoie: default to not writing EOIE section
On 11/20/2018 1:11 AM, Jonathan Nieder wrote: Since 3b1d9e04 (eoie: add End of Index Entry (EOIE) extension, 2018-10-10) Git defaults to writing the new EOIE section when writing out an index file. Usually that is a good thing because it improves threaded performance, but when a Git repository is shared with older versions of Git, it produces a confusing warning: $ git status ignoring EOIE extension HEAD detached at 371ed0defa nothing to commit, working tree clean Let's introduce the new index extension more gently. First we'll roll out the new version of Git that understands it, and then once sufficiently many users are using such a version, we can flip the default to writing it by default. Introduce a '[index] recordEndOfIndexEntries' configuration variable to allow interested users to benefit from this index extension early. Signed-off-by: Jonathan Nieder --- Rebased. No other change from v1. As Jonathan pointed out, it would be nice to have tests here. Ben, any advice for how I could write some in a followup change? E.g. does Derrick Stolee's tracing based testing trick apply here? Documentation/config/index.txt | 7 +++ read-cache.c | 11 ++- t/t1700-split-index.sh | 11 +++ 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt index 4b94b6bedc..8e138aba7a 100644 --- a/Documentation/config/index.txt +++ b/Documentation/config/index.txt @@ -1,3 +1,10 @@ +index.recordEndOfIndexEntries:: + Specifies whether the index file should include an "End Of Index + Entry" section. This reduces index load time on multiprocessor + machines but produces a message "ignoring EOIE extension" when + reading the index using Git versions before 2.20. Defaults to + 'false'. + index.threads:: Specifies the number of threads to spawn when loading the index. This is meant to reduce index load time on multiprocessor machines. diff --git a/read-cache.c b/read-cache.c index 4ca81286c0..1e9c772603 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2689,6 +2689,15 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile rollback_lock_file(lockfile); } +static int record_eoie(void) +{ + int val; I believe you are going to want to initialize val to 0 here as it is on the stack so is not guaranteed to be zero. + + if (!git_config_get_bool("index.recordendofindexentries", )) + return val; + return 0; +} + /* * On success, `tempfile` is closed. If it is the temporary file * of a `struct lock_file`, we will therefore effectively perform @@ -2936,7 +2945,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, * read. Write it out regardless of the strip_extensions parameter as we need it * when loading the shared index. */ - if (offset) { + if (offset && record_eoie()) { struct strbuf sb = STRBUF_INIT; write_eoie_extension(, _c, offset); diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 2ac47aa0e4..0cbac64e28 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -25,14 +25,17 @@ test_expect_success 'enable split index' ' git update-index --split-index && test-tool dump-split-index .git/index >actual && indexversion=$(test-tool index-version <.git/index) && + + # NEEDSWORK: Stop hard-coding checksums. if test "$indexversion" = "4" then - own=3527df833c6c100d3d1d921a9a782d62a8be4b58 - base=746f7ab2ed44fb839efdfbffcf399d0b113fb4cb + own=432ef4b63f32193984f339431fd50ca796493569 + base=508851a7f0dfa8691e9f69c7f055865389012491 else - own=5e9b60117ece18da410ddecc8b8d43766a0e4204 - base=4370042739b31cd17a5c5cd6043a77c9a00df113 + own=8299b0bcd1ac364e5f1d7768efb62fa2da79a339 + base=39d890139ee5356c7ef572216cebcd27aa41f9df fi && + cat >expect <<-EOF && own $own base $base
Re: Git Test Coverage Report (v2.20.0-rc0)
On 11/19/2018 10:40 AM, Derrick Stolee wrote: The test coverage reports started mid-way through this release cycle, so I thought it would be good to do a full review of the new uncovered code since the last release. I eliminated most of the uncovered code due to the following cases: 1. Code was only moved or refactored. 2. Code was related to unusual error conditions (e.g. open_pack_index() fails) The comments below are intended only to point out potential directions to improve test coverage. Some of it is for me to do! Thanks, -Stolee On 11/18/2018 9:54 PM, Derrick Stolee wrote: There are a lot of lines introduced by the IEOT extension in these commits: > Ben Peart 3255089ad: ieot: add Index Entry Offset Table (IEOT) extension > Ben Peart 3b1d9e045: eoie: add End of Index Entry (EOIE) extension > Ben Peart 77ff1127a: read-cache: load cache entries on worker threads > Ben Peart abb4bb838: read-cache: load cache extensions on a worker thread > Ben Peart c780b9cfe: config: add new index.threads config setting > Ben Peart d1664e73a: add: speed up cmd_add() by utilizing read_cache_preload() > Ben Peart fa655d841: checkout: optimize "git checkout -b " These should be hit if you run the test suite with GIT_TEST_INDEX_THREADS=2. Without that, the indexes for the various tests are too small to trigger multi-threaded index reads/writes. From t/README: GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading of the index for the whole test suite by bypassing the default number of cache entries and thread minimums. Setting this to 1 will make the index loading single threaded.
Re: [PATCH 3/3] index: do not warn about unrecognized extensions
On 11/13/2018 10:24 PM, Junio C Hamano wrote: Jonathan Nieder writes: We cannot change the past, but for index extensions of the future, there is a straightforward improvement: silence that message except when tracing. This way, the message is still available when debugging, but in everyday use it does not show up so (once most Git users have this patch) we can turn on new optional extensions right away without alarming people. That argument ignores the "let the users know they are using a stale version when they did use (either by accident or deliberately) a more recent one" value, though. Even if we consider that this is only for debugging, I am not sure if tracing is the right place to add. As long as the "optional extensions can be ignored without affecting the correctness" rule holds, there is nothing gained by letting these messages shown for debugging purposes Having recently written a couple of patches that utilize an optional extension - I actually found the warning to be a helpful debugging tool and would like to see them enabled via tracing. It would also be helpful to see the opposite - I'm looking for an optional extension but it is missing. The most common scenario was when I'd be testing my changes in different repos and forget that I needed to force an updated index to be written that contained the extension I was trying to test. The "silently ignore the optional extension" behavior is good for end users but as a developer, I'd like to be able to have it yell at me via tracing. :-) IMHO - if an end user has to turn on tracing, I view that as a failure on our part. No end user should have to understand the inner workings of git to be able to use it effectively. and if there is such a bug (e.g. we introduced an optional extension but the code that wrote an index with an optional extension wrote the non-optional part in such a way that it cannot be correctly handled without the extension that is supposed to be optional) we'd probably want to let users notice without having to explicitly go into a debugging session. If Googling for "ignoring FNCY ext" gives "discard your index with 'reset HEAD', because an index file with FNCY ext cannot be read without understanding it", that may prevent damages from happening in the first place. On the other hand, hiding it behind tracing would mean the user first need to exprience an unknown breakage first and then has to enable tracing among other 47 different things to diagnose and drill down to the root cause.
Re: [PATCH 2/3] ieot: default to not writing IEOT section
On 11/13/2018 4:08 PM, Jonathan Nieder wrote: Hi again, Ben Peart wrote: On 11/13/2018 1:18 PM, Jonathan Nieder wrote: Ben Peart wrote: Why introduce a new setting to disable writing the IEOT extension instead of just using the existing index.threads setting? If index.threads=1 then the IEOT extension isn't written which (I believe) will accomplish the same goal. Do you mean defaulting to index.threads=1? I don't think that would be a good default, but if you have a different change in mind then I'd be happy to hear it. Or do you mean that if the user has explicitly specified index.threads=true, then that should imply index.recordOffsetTable=true so users only have to set one setting to turn it on? I can imagine that working well. Reading the index with multiple threads requires the EOIE and IEOT extensions to exist in the index. If either extension doesn't exist, then the code falls back to the single threaded path. That means you can't have both 1) no warning for old versions of git and 2) multi-threaded reading for new versions of git. If you set index.threads=1, that will prevent the IEOT extension from being written and there will be no "ignoring IEOT extension" warning in older versions of git. With this patch 'as is' you would have to set both index.threads=true and index.recordOffsetTable=true to get multi-threaded index reads. If either is set to false, it will silently drop back to single threaded reads. Sorry, I'm still not understanding what you're proposing. What would be - the default behavior - the mechanism for changing that behavior under your proposal? I consider index.threads=1 to be a bad default. I would understand if you are saying that that should be the default, and I tried to propose a different way to achieve what you're looking for in the quoted reply above (but I'm not understanding whether you like that proposal or not). Today, both the write logic (ie should we write out the IEOT extension) and the read logic (should I use the IEOT, if available, and do multi-threaded reading) are controlled by the single "index.threads" setting. I would like to keep the settings as simple as possible to prevent user confusion. If we have two different settings (index.threads and index.recordoffsettable) then the only combination that will result in the user actually getting multi-threaded reads is when they are both set to true. Any other combination will silently fail. I think it would be confusing if you set index.threads=true and got no error message but didn't get multi-threaded reads either (or vice versa). If you want to prevent any of the scary "ignoring IEOT extension" from ever happening then your only option is to turn off the IEOT writing by default. The downside is that people have to discover and turn it on if they want the improvements. This can be achieved by changing the default for index.threads from "true" to "false." diff --git a/config.c b/config.c index 2ee29f6f86..86f5c14294 100644 --- a/config.c +++ b/config.c @@ -2291,7 +2291,7 @@ int git_config_get_fsmonitor(void) int git_config_get_index_threads(void) { - int is_bool, val = 0; + int is_bool, val = 1; val = git_env_ulong("GIT_TEST_INDEX_THREADS", 0); if (val) If you want to provide a way for a concerned user to disable the message after the first time they have seen it, then they can be instructed to run 'git config --global index.threads false' There is no way to get multi-threaded reads and NOT get the scary message with older versions of git. Multi-threaded reads require the IEOT extension to be written into the index and the existence of the IEOT extension in the index will always generate the scary warning. Jonathan
Re: [PATCH 2/3] ieot: default to not writing IEOT section
On 11/13/2018 1:18 PM, Jonathan Nieder wrote: Hi, Ben Peart wrote: On 11/12/2018 7:39 PM, Jonathan Nieder wrote: As with EOIE, popular versions of Git do not support the new IEOT extension yet. When accessing a Git repository written by a more modern version of Git, they correctly ignore the unrecognized section, but in the process they loudly warn ignoring IEOT extension resulting in confusion for users. Introduce the index extension more gently by not writing it yet in this first version with support for it. [...] Introduce a '[index] recordOffsetTable' configuration variable to control whether the new index extension is written. Why introduce a new setting to disable writing the IEOT extension instead of just using the existing index.threads setting? If index.threads=1 then the IEOT extension isn't written which (I believe) will accomplish the same goal. Do you mean defaulting to index.threads=1? I don't think that would be a good default, but if you have a different change in mind then I'd be happy to hear it. Or do you mean that if the user has explicitly specified index.threads=true, then that should imply index.recordOffsetTable=true so users only have to set one setting to turn it on? I can imagine that working well. Reading the index with multiple threads requires the EOIE and IEOT extensions to exist in the index. If either extension doesn't exist, then the code falls back to the single threaded path. That means you can't have both 1) no warning for old versions of git and 2) multi-threaded reading for new versions of git. If you set index.threads=1, that will prevent the IEOT extension from being written and there will be no "ignoring IEOT extension" warning in older versions of git. With this patch 'as is' you would have to set both index.threads=true and index.recordOffsetTable=true to get multi-threaded index reads. If either is set to false, it will silently drop back to single threaded reads. Thanks, Jonathan
Re: [PATCH 3/3] index: do not warn about unrecognized extensions
On 11/12/2018 7:40 PM, Jonathan Nieder wrote: Documentation/technical/index-format explains: 4-byte extension signature. If the first byte is 'A'..'Z' the extension is optional and can be ignored. This allows gracefully introducing a new index extension without having to rely on all readers having support for it. Mandatory extensions start with a lowercase letter and optional ones start with a capital. Thus the versions of Git acting on a shared local repository do not have to upgrade in lockstep. We almost obey that convention, but there is a problem: when encountering an unrecognized optional extension, we write ignoring FNCY extension to stderr, which alarms users. This means that in practice we have had to introduce index extensions in two steps: first add read support, and then a while later, start writing by default. This delays when users can benefit from improvements to the index format. We cannot change the past, but for index extensions of the future, there is a straightforward improvement: silence that message except when tracing. This way, the message is still available when debugging, but in everyday use it does not show up so (once most Git users have this patch) we can turn on new optional extensions right away without alarming people. The best patch of the bunch. Glad to see it. I'm fine with doing this via advise.unknownIndexExtension as well. Who knows, someone may actually want to see this and not have tracing turned on. I don't know who but it is possible :-) Signed-off-by: Jonathan Nieder --- Thanks for reading. Thoughts? Sincerely, Jonathan read-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/read-cache.c b/read-cache.c index 290bd54708..65530a68c2 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1720,7 +1720,7 @@ static int read_index_extension(struct index_state *istate, if (*ext < 'A' || 'Z' < *ext) return error("index uses %.4s extension, which we do not understand", ext); - fprintf(stderr, "ignoring %.4s extension\n", ext); + trace_printf("ignoring %.4s extension\n", ext); break; } return 0;
Re: [PATCH 2/3] ieot: default to not writing IEOT section
On 11/12/2018 7:39 PM, Jonathan Nieder wrote: As with EOIE, popular versions of Git do not support the new IEOT extension yet. When accessing a Git repository written by a more modern version of Git, they correctly ignore the unrecognized section, but in the process they loudly warn ignoring IEOT extension resulting in confusion for users. Introduce the index extension more gently by not writing it yet in this first version with support for it. Soon, once sufficiently many users are running a modern version of Git, we can flip the default so users benefit from this index extension by default. Introduce a '[index] recordOffsetTable' configuration variable to control whether the new index extension is written. Why introduce a new setting to disable writing the IEOT extension instead of just using the existing index.threads setting? If index.threads=1 then the IEOT extension isn't written which (I believe) will accomplish the same goal. Signed-off-by: Jonathan Nieder --- Documentation/config.txt | 7 +++ read-cache.c | 11 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index d702379db4..cc66fb7de3 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2195,6 +2195,13 @@ index.recordEndOfIndexEntries:: reading the index using Git versions before 2.20. Defaults to 'false'. +index.recordOffsetTable:: + Specifies whether the index file should include an "Index Entry + Offset Table" section. This reduces index load time on + multiprocessor machines but produces a message "ignoring IEOT + extension" when reading the index using Git versions before 2.20. + Defaults to 'false'. + index.threads:: Specifies the number of threads to spawn when loading the index. This is meant to reduce index load time on multiprocessor machines. diff --git a/read-cache.c b/read-cache.c index 4bfe93c4c2..290bd54708 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2707,6 +2707,15 @@ static int record_eoie(void) return 0; } +static int record_ieot(void) +{ + int val; + + if (!git_config_get_bool("index.recordoffsettable", )) + return val; + return 0; +} + /* * On success, `tempfile` is closed. If it is the temporary file * of a `struct lock_file`, we will therefore effectively perform @@ -2767,7 +2776,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, #ifndef NO_PTHREADS nr_threads = git_config_get_index_threads(); - if (nr_threads != 1) { + if (nr_threads != 1 && record_ieot()) { int ieot_blocks, cpus; /*
Re: [PATCH 1/3] eoie: default to not writing EOIE section
On 11/12/2018 8:05 PM, Junio C Hamano wrote: Jonathan Nieder writes: Since 3b1d9e04 (eoie: add End of Index Entry (EOIE) extension, 2018-10-10) Git defaults to writing the new EOIE section when writing out an index file. Usually that is a good thing because it improves threaded performance, but when a Git repository is shared with older versions of Git, it produces a confusing warning: $ git status ignoring EOIE extension HEAD detached at 371ed0defa nothing to commit, working tree clean Let's introduce the new index extension more gently. First we'll roll out the new version of Git that understands it, and then once sufficiently many users are using such a version, we can flip the default to writing it by default. Introduce a '[index] recordEndOfIndexEntries' configuration variable to allow interested users to benefit from this index extension early. Thanks. I am in principle OK with this approach. In fact, I suspect that the default may want to be dynamically determined, and we give this knob to let the users further force their preference. When no extension that benefits from multi-threading is written, the default can stay "no" in future versions of Git, for example. While I can understand the user confusion the warning about ignoring an extension could cause I guess I'm a little surprised that people would see it that often. To see the warning means they are running a new version of git in the same repo as they are running an old version of git. I just haven't ever experienced that (I only ever have one copy of git installed) so am surprised it comes up often enough to warrant this change. That said, if it _is_ that much of an issue, this patch makes sense and provides a way to more gracefully transition into this feature. Even if we had some logic to dynamically determine whether to write it or not, we'd still want to avoid confusing users when it did get written out. diff --git a/Documentation/config.txt b/Documentation/config.txt index 41a9ff2b6a..d702379db4 100644 The timing is a bit unfortunate for any topic to touch this file, and contrib/cocci would not help us in this case X-<. diff --git a/read-cache.c b/read-cache.c index f3a848d61c..4bfe93c4c2 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2698,6 +2698,15 @@ void update_index_if_able(struct index_state *istate, struct lock_file *lockfile rollback_lock_file(lockfile); } +static int record_eoie(void) +{ + int val; + + if (!git_config_get_bool("index.recordendofindexentries", )) + return val; + return 0; +} Unconditionally defaulting to no in this round is perfectly fine. Let's make a mental note that this is the place to decide dynamic default in the future when we want to. It would probably have to ask around various "extension writing" helpers if they want to have a say in the outcome (e.g. if there are very many cache entries in the istate, the entry offset table may want to be written and otherwise not). @@ -2945,7 +2954,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, * read. Write it out regardless of the strip_extensions parameter as we need it * when loading the shared index. */ - if (offset) { + if (offset && record_eoie()) { struct strbuf sb = STRBUF_INIT; write_eoie_extension(, _c, offset); diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 2ac47aa0e4..0cbac64e28 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -25,14 +25,17 @@ test_expect_success 'enable split index' ' git update-index --split-index && test-tool dump-split-index .git/index >actual && indexversion=$(test-tool index-version <.git/index) && + + # NEEDSWORK: Stop hard-coding checksums. Also let's stop hard-coding the assumption that the new knob is off by default. Ideally, you'd want to test both cases, right? Perhaps you'd call "git update-index --split-index" we see in the precontext twice, with "-c VAR=false" and "-c VAR=true", to prepare "actual.without-eoie" and "actual.with-eoie", or something like that? Thanks. if test "$indexversion" = "4" then - own=3527df833c6c100d3d1d921a9a782d62a8be4b58 - base=746f7ab2ed44fb839efdfbffcf399d0b113fb4cb + own=432ef4b63f32193984f339431fd50ca796493569 + base=508851a7f0dfa8691e9f69c7f055865389012491 else - own=5e9b60117ece18da410ddecc8b8d43766a0e4204 - base=4370042739b31cd17a5c5cd6043a77c9a00df113 + own=8299b0bcd1ac364e5f1d7768efb62fa2da79a339 + base=39d890139ee5356c7ef572216cebcd27aa41f9df fi && + cat >expect <<-EOF && own $own base $base
Re: [RFC v1] Add virtual file system settings and hook proc
On 11/4/2018 4:01 PM, brian m. carlson wrote: On Sun, Nov 04, 2018 at 07:34:01AM +0100, Duy Nguyen wrote: On Wed, Oct 31, 2018 at 9:53 PM Ben Peart wrote: It's more than a dynamic sparse-checkout because the same list is also used to exclude any file/folder not listed. That means any file not listed won't ever be updated by git (like in 'checkout' for example) so 'stale' files could be left in the working directory. It also means git won't find new/untracked files unless they are specifically added to the list. OK. I'm not at all interested in carrying maintenance burden for some software behind closed doors. I could see values in having a more flexible sparse checkout but this now seems like very tightly designed for GVFS. So unless there's another use case (preferably open source) for this, I don't think this should be added in git.git. I should point out that VFS for Git is an open-source project and will likely have larger use than just at Microsoft. There are both Windows and Mac clients and there are plans for a Linux client as well. Ideally, it would work with an unmodified upstream Git, which is (I assume) why Ben is sending this series. Personally, I don't love the current name used in this series. I don't see this patch as introducing a virtual file system in the Unix sense of that word, and I think calling it that in Git core will be confusing to Unix users. I would prefer to see it as a hook (maybe called "sparse-checkout" or "sparse-exclude"; better names are okay), and simply turn it on based on whether or not there's an appropriate hook file there and whether core.sparseCheckout is on (or possibly with hook.sparseExclude or something). With a design more like that, I don't see a problem with it in principle. I'm really bad at naming so am happy to choose something else that will be more descriptive to the community at large. The name came from the fact that we started with the (equally awful) 'VFS for Git' and this was the big enabling feature in git so for better or worse it got saddled with the same 'VFS' name. In other feedback it was suggested to not add a core.vfs setting that was the path to the hook and I like that. I can change it to core.sparseExclude (unless someone has something better) and hard code the hook name for now. I do like the idea of having config based hooks so when I get some time I will put together a patch series to implement that.
Re: [RFC v1] Add virtual file system settings and hook proc
On 11/5/2018 10:22 AM, Duy Nguyen wrote: On Sun, Nov 4, 2018 at 10:01 PM brian m. carlson wrote: On Sun, Nov 04, 2018 at 07:34:01AM +0100, Duy Nguyen wrote: On Wed, Oct 31, 2018 at 9:53 PM Ben Peart wrote: It's more than a dynamic sparse-checkout because the same list is also used to exclude any file/folder not listed. That means any file not listed won't ever be updated by git (like in 'checkout' for example) so 'stale' files could be left in the working directory. It also means git won't find new/untracked files unless they are specifically added to the list. OK. I'm not at all interested in carrying maintenance burden for some software behind closed doors. I could see values in having a more flexible sparse checkout but this now seems like very tightly designed for GVFS. So unless there's another use case (preferably open source) for this, I don't think this should be added in git.git. I should point out that VFS for Git is an open-source project and will likely have larger use than just at Microsoft. There are both Windows and Mac clients and there are plans for a Linux client as well. Ideally, it would work with an unmodified upstream Git, which is (I assume) why Ben is sending this series. Ah I didn't know that. Thank you. I'll have to look at this GVFS some time then. If we're going to support GVFS though, I think there should be a big (RFC perhaps) series that includes everything to at least give an overview what the end game looks like. Then it could be split up into smaller series. We've always had the goal of not needing a fork at all and are continually working to bring the list of differences to zero but in the mean time, you can see the entire set of changes we've made here [1]. If you look, most of them are changes we are already in process of submitting to git (ie midx, tracing, etc) or patches we fast tracked from master to our branch (your unpack_trees() optimizations for example). Most of the others are small tweaks and features for performance or to smooth integration issues. This RFC contains the core changes that were required to enable VFS for Git. [1] https://github.com/git-for-windows/git/compare/master...Microsoft:gvfs-2.19.1
Re: [RFC v1] Add virtual file system settings and hook proc
On 11/5/2018 10:26 AM, Duy Nguyen wrote: On Mon, Nov 5, 2018 at 12:40 PM Ævar Arnfjörð Bjarmason wrote: On Sun, Nov 04 2018, Duy Nguyen wrote: On Wed, Oct 31, 2018 at 9:53 PM Ben Peart wrote: +core.virtualFilesystem:: + If set, the value of this variable is used as a command which + will identify all files and directories that are present in + the working directory. Git will only track and update files + listed in the virtual file system. Using the virtual file system + will supersede the sparse-checkout settings which will be ignored. + See the "virtual file system" section of linkgit:githooks[6]. It sounds like "virtual file system" is just one of the use cases for this feature, which is more about a dynamic source of sparse-checkout bits. Perhaps name the config key with something along sparse checkout instead of naming it after a use case. It's more than a dynamic sparse-checkout because the same list is also used to exclude any file/folder not listed. That means any file not listed won't ever be updated by git (like in 'checkout' for example) so 'stale' files could be left in the working directory. It also means git won't find new/untracked files unless they are specifically added to the list. OK. I'm not at all interested in carrying maintenance burden for some software behind closed doors. I could see values in having a more flexible sparse checkout but this now seems like very tightly designed for GVFS. So unless there's another use case (preferably open source) for this, I don't think this should be added in git.git. I haven't looked at the patch in any detail beyond skimming it, and you're more familiar with this area... But in principle I'm very interested in getting something like this in git.git, even if we were to assume GVFS was a 100% proprietary implementation. I have nothing against building a GVFS-like solution. If what's submitted can be the building blocks for that, great! But if it was just for GVFS (and it was not available to everybody) then no thank you. Not only is VFS for Git open source and is/will be supported on Windows, Mac and Linux, the interface being proposed is quite generic so should be usable for other implementations. To use it, you just need to provide a hook that will return a list of files git should pay attention to (using a subset of the existing sparse-checkout format). If you see anything that would make using it difficult for other solutions to use, let's fix it now!
Re: [RFC v1] Add virtual file system settings and hook proc
On 11/4/2018 7:02 PM, Junio C Hamano wrote: Ben Peart writes: + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, istate, pathname, pathlen); We try to defer paying cost to determine unknown *dtype as late as possible by having this call in last_exclude_matching_from_list(), and not here. If we are doing this, we probably should update the callpaths that call last_exclude_matching_from_list() to make the caller responsible for doing get_dtype() and drop the lazy finding of dtype from the callee. Alternatively, the new "is excluded from vfs" helper can learn to do the lazy get_dtype() just like the existing last_exclude_matching_from_list() does. I suspect the latter may be simpler. In is_excluded_from_virtualfilesystem() dtype can't be lazy because it is always needed (which is why I test and die if it isn't known). You make a call to that function even when virtual-file-system hook is not in use, i.e. instead of the caller saying if (is_vfs_in_use()) { *dtype = get_dtype(...); if (is_excluded_from_vfs(...) > 0) return 1; } your caller makes an unconditional call to is_excluded_from_vfs(). Isn't that the only reason why you break the laziness of determining dtype? You can keep the caller simple by making an unconditional call, but maintain the laziness by updating the callig convention to pass dtype (not *dtype) to the function, e.g.. if (is_excluded_from_vfs(pathname, pathlen, dtype) > 0) return 1; and then at the beginning of the helper if (is_vfs_in_use()) return -1; /* undetermined */ *dtype = get_dtype(...); ... whatever logic it has now ... no? Oops! You're right, I docall get_dtype() even if the vfs isn't in use. I'll add an additional test to avoid doing that. Thank you. I did look into moving the delay load logic into is_excluded_from_vfs() but get_dtype() is static to dir.c and I'd prefer to keep it that way if possible. Your comments are all feedback on the code - how it was implemented, style, etc. Any thoughts on whether this is something we could/should merge into master (after any necessary cleanup)? Would anyone else find this interesting/helpful? I am pretty much neutral. Not strongly opposed to it, but not all that interested until seeing its integration with the "userland" to see how the whole thing works ;-)
[PATCH v1] refresh_index: remove unnecessary calls to preload_index()
From: Ben Peart With refresh_index() learning to utilize preload_index() to speed up its operation there is no longer any benefit to having the caller preload the index first. Remove those unneeded calls by calling read_index() instead of the preload variant. There is no measurable performance impact of this patch - the 2nd call to preload_index() bails out quickly but there is no reason to call it twice. Signed-off-by: Ben Peart --- Notes: Base Ref: Web-Diff: https://github.com/benpeart/git/commit/384f7fed53 Checkout: git fetch https://github.com/benpeart/git no-index-preload-v1 && git checkout 384f7fed53 builtin/commit.c | 2 +- builtin/describe.c | 2 +- builtin/update-index.c | 2 +- sequencer.c| 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 074bd9a551..96d336ec3d 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1363,7 +1363,7 @@ int cmd_status(int argc, const char **argv, const char *prefix) if (status_format != STATUS_FORMAT_PORCELAIN && status_format != STATUS_FORMAT_PORCELAIN_V2) progress_flag = REFRESH_PROGRESS; - read_index_preload(_index, , progress_flag); + read_index(_index); refresh_index(_index, REFRESH_QUIET|REFRESH_UNMERGED|progress_flag, , NULL, NULL); diff --git a/builtin/describe.c b/builtin/describe.c index c48c34e866..cc118448ee 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -629,7 +629,7 @@ int cmd_describe(int argc, const char **argv, const char *prefix) struct argv_array args = ARGV_ARRAY_INIT; int fd, result; - read_cache_preload(NULL); + read_cache(); refresh_index(_index, REFRESH_QUIET|REFRESH_UNMERGED, NULL, NULL, NULL); fd = hold_locked_index(_lock, 0); diff --git a/builtin/update-index.c b/builtin/update-index.c index 07c10bcb7d..0e1dcf0438 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -782,7 +782,7 @@ struct refresh_params { static int refresh(struct refresh_params *o, unsigned int flag) { setup_work_tree(); - read_cache_preload(NULL); + read_cache(); *o->has_errors |= refresh_cache(o->flags | flag); return 0; } diff --git a/sequencer.c b/sequencer.c index 9e1ab3a2a7..ab2048ac3a 100644 --- a/sequencer.c +++ b/sequencer.c @@ -1919,7 +1919,7 @@ static int read_and_refresh_cache(struct replay_opts *opts) { struct lock_file index_lock = LOCK_INIT; int index_fd = hold_locked_index(_lock, 0); - if (read_index_preload(_index, NULL, 0) < 0) { + if (read_index(_index) < 0) { rollback_lock_file(_lock); return error(_("git %s: failed to read the index"), _(action_name(opts))); base-commit: 095c8dc8c2a9d61783dbae79a7f6e8d80092696f -- 2.18.0.windows.1
Re: [PATCH v1] add: speed up cmd_add() by utilizing read_cache_preload()
On 11/2/2018 11:23 AM, Junio C Hamano wrote: Ben Peart writes: From: Ben Peart During an "add", a call is made to run_diff_files() which calls check_remove() for each index-entry. The preload_index() code distributes some of the costs across multiple threads. Nice. I peeked around and noticed that we already do this in builtin_diff_index() before running run_diff_index() when !cached, and builtin_diff_files(), of course. Thanks for double checking! Because the files checked are restricted to pathspec, adding individual files makes no measurable impact but on a Windows repo with ~200K files, 'git add .' drops from 6.3 seconds to 3.3 seconds for a 47% savings. ;-) diff --git a/builtin/add.c b/builtin/add.c index ad49806ebf..f65c172299 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -445,11 +445,6 @@ int cmd_add(int argc, const char **argv, const char *prefix) return 0; } - if (read_cache() < 0) - die(_("index file corrupt")); - - die_in_unpopulated_submodule(_index, prefix); - /* * Check the "pathspec '%s' did not match any files" block * below before enabling new magic. It is not explained why this is not a mere s/read_cache/&_preload/ in the log message. I can see it is because you wanted to make the pathspec available to preload to further cut down the preloaded paths, and I do not think it has any unintended (negatie) side effect to parse the pathspec before populating the in-core index, but that would have been a good thing to mention in the proposed log message. That is correct. parse_pathspec() was after read_cache() because it _used_ to use the index to determine whether a pathspec is in a submodule. That was fixed by Brandon in Aug 2017 when he cleaned up all submodule config code so it is safe to move read_cache_preload() after the call to parse_pathspec(). How about this for a revised commit message? During an "add", a call is made to run_diff_files() which calls check_remove() for each index-entry. The preload_index() code distributes some of the costs across multiple threads. Limit read_cache_preload() to pathspec, so that it doesn't process more entries than are needed and end up slowing things down instead of speeding them up. Because the files checked are restricted to pathspec, adding individual files makes no measurable impact but on a Windows repo with ~200K files, 'git add .' drops from 6.3 seconds to 3.3 seconds for a 47% savings. @@ -459,6 +454,10 @@ int cmd_add(int argc, const char **argv, const char *prefix) PATHSPEC_SYMLINK_LEADING_PATH, prefix, argv); + if (read_cache_preload() < 0) + die(_("index file corrupt")); + + die_in_unpopulated_submodule(_index, prefix); die_path_inside_submodule(_index, ); if (add_new_files) { base-commit: 4ede3d42dfb57f9a41ac96a1f216c62eb7566cc2
[PATCH v1] add: speed up cmd_add() by utilizing read_cache_preload()
From: Ben Peart During an "add", a call is made to run_diff_files() which calls check_remove() for each index-entry. The preload_index() code distributes some of the costs across multiple threads. Because the files checked are restricted to pathspec, adding individual files makes no measurable impact but on a Windows repo with ~200K files, 'git add .' drops from 6.3 seconds to 3.3 seconds for a 47% savings. Signed-off-by: Ben Peart --- Notes: Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/fc4830b545 Checkout: git fetch https://github.com/benpeart/git add-preload-index-v1 && git checkout fc4830b545 builtin/add.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/builtin/add.c b/builtin/add.c index ad49806ebf..f65c172299 100644 --- a/builtin/add.c +++ b/builtin/add.c @@ -445,11 +445,6 @@ int cmd_add(int argc, const char **argv, const char *prefix) return 0; } - if (read_cache() < 0) - die(_("index file corrupt")); - - die_in_unpopulated_submodule(_index, prefix); - /* * Check the "pathspec '%s' did not match any files" block * below before enabling new magic. @@ -459,6 +454,10 @@ int cmd_add(int argc, const char **argv, const char *prefix) PATHSPEC_SYMLINK_LEADING_PATH, prefix, argv); + if (read_cache_preload() < 0) + die(_("index file corrupt")); + + die_in_unpopulated_submodule(_index, prefix); die_path_inside_submodule(_index, ); if (add_new_files) { base-commit: 4ede3d42dfb57f9a41ac96a1f216c62eb7566cc2 -- 2.18.0.windows.1
Re: [RFC v1] Add virtual file system settings and hook proc
On 10/31/2018 3:11 PM, Duy Nguyen wrote: not really a review, just a couple quick notes.. Perfect! As an RFC, I'm more looking for high level thoughts/notes than a style/syntax code review. On Tue, Oct 30, 2018 at 9:40 PM Ben Peart wrote: From: Ben Peart On index load, clear/set the skip worktree bits based on the virtual file system data. Use virtual file system data to update skip-worktree bit in unpack-trees. Use virtual file system data to exclude files and folders not explicitly requested. Signed-off-by: Ben Peart --- We have taken several steps to make git perform well on very large repos. Some of those steps include: improving underlying algorithms, utilizing multi-threading where possible, and simplifying the behavior of some commands. These changes typically benefit all git repos to varying degrees. While these optimizations all help, they are insufficient to provide adequate performance on the very large repos we often work with. To make git perform well on the very largest repos, we had to make more significant changes. The biggest performance win by far is the work we have done to make git operations O(modified) instead of O(size of repo). This takes advantage of the fact that the number of files a developer has modified is a tiny fraction of the overall repo size. We accomplished this by utilizing the existing internal logic for the skip worktree bit and excludes to tell git to ignore all files and folders other than those that have been modified. This logic is driven by an external process that monitors writes to the repo and communicates the list of files and folders with changes to git via the virtual file system hook in this patch. The external process maintains a list of files and folders that have been modified. When git runs, it requests the list of files and folders that have been modified via the virtual file system hook. Git then sets/clears the skip-worktree bit on the cache entries and builds a hashmap of the modified files/folders that is used by the excludes logic to avoid scanning the entire repo looking for changes and untracked files. With this system, we have been able to make local git command performance on extremely large repos (millions of files, 1/2 million folders) entirely manageable (30 second checkout, 3.5 seconds status, 4 second add, 7 second commit, etc). Our desire is to eliminate all custom patches in our fork of git. To that end, I'm submitting this as an RFC to see how much interest there is and how much willingness to take this type of change into git. Most of these paragraphs (perhaps except the last one) should be part of the commit message. You describe briefly what the patch does but it's even more important to say why you want to do it. +core.virtualFilesystem:: + If set, the value of this variable is used as a command which + will identify all files and directories that are present in + the working directory. Git will only track and update files + listed in the virtual file system. Using the virtual file system + will supersede the sparse-checkout settings which will be ignored. + See the "virtual file system" section of linkgit:githooks[6]. It sounds like "virtual file system" is just one of the use cases for this feature, which is more about a dynamic source of sparse-checkout bits. Perhaps name the config key with something along sparse checkout instead of naming it after a use case. It's more than a dynamic sparse-checkout because the same list is also used to exclude any file/folder not listed. That means any file not listed won't ever be updated by git (like in 'checkout' for example) so 'stale' files could be left in the working directory. It also means git won't find new/untracked files unless they are specifically added to the list. This is a hook. I notice we start to avoid adding real hooks and just add config keys instead. Eventually we should have config-based hooks, but if we're going to add more like this, I think these should be in a separate section, hook.virtualFileSystem or something. That is a great idea. I don't personally like specifying the hook as the 'flag' for whether a feature should be used. I'd rather have it be a bool (enable the feature? true/false) and 1) either have the hook name hard coded (like most existing hooks) or 2) as you suggest add a consistent way to have config-based hooks. Config based hooks could also help provide a consistent way to configure them using GIT_TEST_* environment variables for testing. I don't think the superseding makes sense. There's no reason this could not be used in combination with $GIT_DIR/info/sparse-checkout. If you don't want both, disable the other. One last note. Since this is related to filesystem. Shouldn't it be part of fsmonitor (the protocol, not the implementation)? Then watchman user could use it to. To get this to work properly takes a lot mo
Re: [RFC v1] Add virtual file system settings and hook proc
On 10/30/2018 7:07 PM, Junio C Hamano wrote: Ben Peart writes: diff --git a/config.c b/config.c index 4051e38823..96e05ee0f1 100644 --- a/config.c +++ b/config.c ... @@ -2307,6 +2311,37 @@ int git_config_get_index_threads(void) return 0; /* auto */ } +int git_config_get_virtualfilesystem(void) +{ + if (git_config_get_pathname("core.virtualfilesystem", _virtualfilesystem)) + core_virtualfilesystem = getenv("GIT_TEST_VIRTUALFILESYSTEM"); + + if (core_virtualfilesystem && !*core_virtualfilesystem) + core_virtualfilesystem = NULL; + + if (core_virtualfilesystem) { + /* +* Some git commands spawn helpers and redirect the index to a different +* location. These include "difftool -d" and the sequencer +* (i.e. `git rebase -i`, `git cherry-pick` and `git revert`) and others. +* In those instances we don't want to update their temporary index with +* our virtualization data. +*/ + char *default_index_file = xstrfmt("%s/%s", the_repository->gitdir, "index"); + int should_run_hook = !strcmp(default_index_file, the_repository->index_file); + + free(default_index_file); + if (should_run_hook) { + /* virtual file system relies on the sparse checkout logic so force it on */ + core_apply_sparse_checkout = 1; + return 1; + } + core_virtualfilesystem = NULL; It would be a small leak if this came from config_get_pathname(), but if it came from $GIT_TEST_VFS env, we cannot free it X-<. A helper function called *_get_X() that does not return X as its return value or updating the location pointed by its *dst parameter, and instead only stores its finding in a global variable feels somewhat odd. It smells more like "find out", "probe", "check", etc. I agree. Frankly, I think it should just return whether it should be used or not (bool) and the hook name should be fixed. I got push back when I did that for fsmonitor so I made this the same in an effort to head off that same feedback. diff --git a/dir.c b/dir.c index 47c2fca8dc..3097b0e446 100644 --- a/dir.c +++ b/dir.c @@ -21,6 +21,7 @@ #include "ewah/ewok.h" #include "fsmonitor.h" #include "submodule-config.h" +#include "virtualfilesystem.h" /* * Tells read_directory_recursive how a file or directory should be treated. @@ -1109,6 +1110,18 @@ int is_excluded_from_list(const char *pathname, struct exclude_list *el, struct index_state *istate) { struct exclude *exclude; + + /* +* The virtual file system data is used to prevent git from traversing +* any part of the tree that is not in the virtual file system. Return +* 1 to exclude the entry if it is not found in the virtual file system, +* else fall through to the regular excludes logic as it may further exclude. +*/ This comment will sit better immediately in front of the call to "is excluded from vfs?" helper function. + if (*dtype == DT_UNKNOWN) + *dtype = get_dtype(NULL, istate, pathname, pathlen); We try to defer paying cost to determine unknown *dtype as late as possible by having this call in last_exclude_matching_from_list(), and not here. If we are doing this, we probably should update the callpaths that call last_exclude_matching_from_list() to make the caller responsible for doing get_dtype() and drop the lazy finding of dtype from the callee. Alternatively, the new "is excluded from vfs" helper can learn to do the lazy get_dtype() just like the existing last_exclude_matching_from_list() does. I suspect the latter may be simpler. In is_excluded_from_virtualfilesystem() dtype can't be lazy because it is always needed (which is why I test and die if it isn't known). I considered doing the test/call to get_dtype() within is_excluded_from_virtualfilesystem() but didn't like making it dependent on istate just so I could move the get_dtype() call in one level. It is functionally identical so I can easily move it in if that is preferred. + if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0) + return 1; + exclude = last_exclude_matching_from_list(pathname, pathlen, basename, dtype, el, istate); if (exclude) @@ -1324,8 +1337,20 @@ struct exclude *last_exclude_matching(struct dir_struct *dir, int is_excluded(struct dir_struct *dir, struct index_state *istate, const char *pathname, int *dtype_p) { - struct exclude *exclude = - last_exclude_matchi
[RFC v1] Add virtual file system settings and hook proc
From: Ben Peart On index load, clear/set the skip worktree bits based on the virtual file system data. Use virtual file system data to update skip-worktree bit in unpack-trees. Use virtual file system data to exclude files and folders not explicitly requested. Signed-off-by: Ben Peart --- We have taken several steps to make git perform well on very large repos. Some of those steps include: improving underlying algorithms, utilizing multi-threading where possible, and simplifying the behavior of some commands. These changes typically benefit all git repos to varying degrees. While these optimizations all help, they are insufficient to provide adequate performance on the very large repos we often work with. To make git perform well on the very largest repos, we had to make more significant changes. The biggest performance win by far is the work we have done to make git operations O(modified) instead of O(size of repo). This takes advantage of the fact that the number of files a developer has modified is a tiny fraction of the overall repo size. We accomplished this by utilizing the existing internal logic for the skip worktree bit and excludes to tell git to ignore all files and folders other than those that have been modified. This logic is driven by an external process that monitors writes to the repo and communicates the list of files and folders with changes to git via the virtual file system hook in this patch. The external process maintains a list of files and folders that have been modified. When git runs, it requests the list of files and folders that have been modified via the virtual file system hook. Git then sets/clears the skip-worktree bit on the cache entries and builds a hashmap of the modified files/folders that is used by the excludes logic to avoid scanning the entire repo looking for changes and untracked files. With this system, we have been able to make local git command performance on extremely large repos (millions of files, 1/2 million folders) entirely manageable (30 second checkout, 3.5 seconds status, 4 second add, 7 second commit, etc). Our desire is to eliminate all custom patches in our fork of git. To that end, I'm submitting this as an RFC to see how much interest there is and how much willingness to take this type of change into git. Notes: Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/c06b290d2f Checkout: git fetch https://github.com/benpeart/git virtual-filesystem-v1 && git checkout c06b290d2f Documentation/config.txt | 8 + Documentation/githooks.txt | 20 ++ Makefile | 1 + cache.h | 1 + config.c | 37 +++- config.h | 1 + dir.c| 34 +++- environment.c| 1 + read-cache.c | 2 + t/README | 3 + t/t1092-virtualfilesystem.sh | 354 +++ t/t1092/virtualfilesystem| 23 +++ unpack-trees.c | 23 ++- virtualfilesystem.c | 308 ++ virtualfilesystem.h | 25 +++ 15 files changed, 833 insertions(+), 8 deletions(-) create mode 100755 t/t1092-virtualfilesystem.sh create mode 100755 t/t1092/virtualfilesystem create mode 100644 virtualfilesystem.c create mode 100644 virtualfilesystem.h diff --git a/Documentation/config.txt b/Documentation/config.txt index 09e95e9e98..dd4b834375 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -441,6 +441,14 @@ core.fsmonitor:: avoiding unnecessary processing of files that have not changed. See the "fsmonitor-watchman" section of linkgit:githooks[5]. +core.virtualFilesystem:: + If set, the value of this variable is used as a command which + will identify all files and directories that are present in + the working directory. Git will only track and update files + listed in the virtual file system. Using the virtual file system + will supersede the sparse-checkout settings which will be ignored. + See the "virtual file system" section of linkgit:githooks[6]. + core.trustctime:: If false, the ctime differences between the index and the working tree are ignored; useful when the inode change time diff --git a/Documentation/githooks.txt b/Documentation/githooks.txt index 959044347e..87f9ad2a77 100644 --- a/Documentation/githooks.txt +++ b/Documentation/githooks.txt @@ -492,6 +492,26 @@ This hook is invoked by `git-p4 submit`. It takes no parameters and nothing from standard input. Exiting with non-zero status from this script prevent `git-p4 submit` from launching. Run `git-p4 submit --help` for details. +virtualFilesystem +~~ + +"Virtual File System" allows populating the working directory sparsely. +The projection data is typically automatically generated by an externa
[PATCH v1] speed up refresh_index() by utilizing preload_index()
From: Ben Peart Speed up refresh_index() by utilizing preload_index() to do most of the work spread across multiple threads. This works because most cache entries will get marked CE_UPTODATE so that refresh_cache_ent() can bail out early when called from within refresh_index(). On a Windows repo with ~200K files, this drops refresh times from 6.64 seconds to 2.87 seconds for a savings of 57%. Signed-off-by: Ben Peart --- Notes: Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/feee1054c2 Checkout: git fetch https://github.com/benpeart/git refresh-index-multithread-preload-v1 && git checkout feee1054c2 cache.h | 3 +++ preload-index.c | 8 read-cache.c| 6 ++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cache.h b/cache.h index f7fabdde8f..883099db08 100644 --- a/cache.h +++ b/cache.h @@ -659,6 +659,9 @@ extern int daemonize(void); /* Initialize and use the cache information */ struct lock_file; extern int read_index(struct index_state *); +extern void preload_index(struct index_state *index, + const struct pathspec *pathspec, + unsigned int refresh_flags); extern int read_index_preload(struct index_state *, const struct pathspec *pathspec, unsigned int refresh_flags); diff --git a/preload-index.c b/preload-index.c index 9e7152ab14..222792ccbc 100644 --- a/preload-index.c +++ b/preload-index.c @@ -9,7 +9,7 @@ #include "progress.h" #ifdef NO_PTHREADS -static void preload_index(struct index_state *index, +void preload_index(struct index_state *index, const struct pathspec *pathspec, unsigned int refresh_flags) { @@ -100,9 +100,9 @@ static void *preload_thread(void *_data) return NULL; } -static void preload_index(struct index_state *index, - const struct pathspec *pathspec, - unsigned int refresh_flags) +void preload_index(struct index_state *index, + const struct pathspec *pathspec, + unsigned int refresh_flags) { int threads, i, work, offset; struct thread_data data[MAX_PARALLEL]; diff --git a/read-cache.c b/read-cache.c index d57958233e..53733d651d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1496,6 +1496,12 @@ int refresh_index(struct index_state *istate, unsigned int flags, typechange_fmt = (in_porcelain ? "T\t%s\n" : "%s needs update\n"); added_fmt = (in_porcelain ? "A\t%s\n" : "%s needs update\n"); unmerged_fmt = (in_porcelain ? "U\t%s\n" : "%s: needs merge\n"); + /* +* Use the multi-threaded preload_index() to refresh most of the +* cache entries quickly then in the single threaded loop below, +* we only have to do the special cases that are left. +*/ + preload_index(istate, pathspec, 0); for (i = 0; i < istate->cache_nr; i++) { struct cache_entry *ce, *new_entry; int cache_errno = 0; base-commit: c670b1f876521c9f7cd40184bf7ed05aad843433 -- 2.9.2.gvfs.2.27918.g0990287eef
Re: [PATCH v2 06/10] preload-index.c: remove #ifdef NO_PTHREADS
On 10/29/2018 1:26 PM, Duy Nguyen wrote: On Mon, Oct 29, 2018 at 6:21 PM Ben Peart wrote: @@ -114,6 +104,9 @@ static void preload_index(struct index_state *index, threads = index->cache_nr / THREAD_COST; if ((index->cache_nr > 1) && (threads < 2) && git_env_bool("GIT_TEST_PRELOAD_INDEX", 0)) threads = 2; + cpus = online_cpus(); + if (threads > cpus) + threads = cpus; if (threads < 2) return; trace_performance_enter(); Capping the number of threads to online_cpus() does not always make sense. In this case (or at least the original use case where we stat() over nfs) we want to issue as many requests to nfs server as possible to reduce latency and it has nothing to do with how many cores we have. Using more threads than cores might make sense since threads are blocked by nfs client, but we still want to send more to the server. That makes sense. Some test will be necessary then. I guess HAVE_THREADS is functionally the same as online_cpus() == 1. For some reason, I prefer the latter - probably because I'm typically already calling it and so it doesn't feel like a 'special' value/test I have to remember. I just know I need to make sure the logic works correctly with any number of cps greater than zero. :-)
Re: [PATCH 09/10] read-cache.c: remove #ifdef NO_PTHREADS
On 10/29/2018 1:21 PM, Duy Nguyen wrote: On Mon, Oct 29, 2018 at 6:05 PM Ben Peart wrote: @@ -2756,8 +2745,11 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; -#ifndef NO_PTHREADS - nr_threads = git_config_get_index_threads(); + if (HAVE_THREADS) This shouldn't be needed either. My assumption was that if someone explicitly asked for multiple threads we're better off failing than silently ignoring their request. The default/automatic setting will detect the number of cpus and behave correctly. No. I can have ~/.gitconfig shared between different machines. One with multithread support and one without. I should not have to fall back to conditional includes in order to use the same config file on the machine without multithread. If you want to share the ~/.gitconfig across machines that have different numbers of CPUs, it makes more sense to me to leave the setting at the "auto" setting rather than specifically overriding it to a number that won't work on at least one of your machines. Then it all "just works" without the need to conditional includes.
Re: [PATCH v2 08/10] read-cache.c: remove #ifdef NO_PTHREADS
On 10/29/2018 10:30 AM, Jeff King wrote: On Sat, Oct 27, 2018 at 07:30:06PM +0200, Nguyễn Thái Ngọc Duy wrote: -#ifndef NO_PTHREADS - nr_threads = git_config_get_index_threads(); + if (HAVE_THREADS) { + nr_threads = git_config_get_index_threads(); - /* TODO: does creating more threads than cores help? */ - if (!nr_threads) { - nr_threads = istate->cache_nr / THREAD_COST; - cpus = online_cpus(); - if (nr_threads > cpus) - nr_threads = cpus; + /* TODO: does creating more threads than cores help? */ + if (!nr_threads) { + nr_threads = istate->cache_nr / THREAD_COST; + cpus = online_cpus(); + if (nr_threads > cpus) + nr_threads = cpus; + } + } else { + nr_threads = 1; } I'd have thought we could just rely on online_cpus() returning 1 here to avoid having to ask "do we even have thread support?". But I guess that TODO comment implies that we might one day two 2 threads on a single CPU. -Peff And here is the patch I created when testing out the idea of removing NO_PTHREADS. It doesn't require any 'HAVE_THREADS' tests. diff --git a/read-cache.c b/read-cache.c index 1df5c16dbc..1f088799fc 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1920,19 +1920,15 @@ struct index_entry_offset_table struct index_entry_offset entries[FLEX_ARRAY]; }; -#ifndef NO_PTHREADS static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset ); static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot); -#endif static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); struct load_index_extensions { -#ifndef NO_PTHREADS pthread_t pthread; -#endif struct index_state *istate; const char *mmap; size_t mmap_size; @@ -2010,8 +2006,6 @@ static unsigned long load_all_cache_entries(struct index_state *istate, return consumed; } -#ifndef NO_PTHREADS - /* * Mostly randomly chosen maximum thread counts: we * cap the parallelism to online_cpus() threads, and we want @@ -2122,7 +2116,6 @@ static unsigned long load_cache_entries_threaded(struct index_state *istate, con return consumed; } -#endif /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) @@ -2135,10 +2128,8 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) size_t mmap_size; struct load_index_extensions p; size_t extension_offset = 0; -#ifndef NO_PTHREADS int nr_threads, cpus; struct index_entry_offset_table *ieot = NULL; -#endif if (istate->initialized) return istate->cache_nr; @@ -2181,16 +2172,12 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist ) src_offset = sizeof(*hdr); -#ifndef NO_PTHREADS nr_threads = git_config_get_index_threads(); - - /* TODO: does creating more threads than cores help? */ - if (!nr_threads) { + if (!nr_threads) nr_threads = istate->cache_nr / THREAD_COST; - cpus = online_cpus(); - if (nr_threads > cpus) - nr_threads = cpus; - } + cpus = online_cpus(); + if (nr_threads > cpus) + nr_threads = cpus; if (nr_threads > 1) { extension_offset = read_eoie_extension(mmap, mmap_size); @@ -2219,22 +2206,16 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist ) } else { src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset); } -#else - src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset); -#endif istate->timestamp.sec = st.st_mtime; istate->timestamp.nsec = ST_MTIME_NSEC(st); /* if we created a thread, join it otherwise load the extensions on the primary thread */ -#ifndef NO_PTHREADS if (extension_offset) { int ret = pthread_join(p.pthread, NULL); if (ret) die(_("unable to join load_index_extensions thread: %s"), strerror(ret)); - } -#endif - if (!extension_offset) { + } else { p.src_offset = src_offset; load_index_extensions(); } @@ -2756,7 +2737,6 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; -#ifndef NO_PTHREADS nr_threads =
Re: [PATCH v2 06/10] preload-index.c: remove #ifdef NO_PTHREADS
On 10/27/2018 1:30 PM, Nguyễn Thái Ngọc Duy wrote: Signed-off-by: Nguyễn Thái Ngọc Duy --- preload-index.c | 15 ++- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/preload-index.c b/preload-index.c index 9e7152ab14..0e24886aca 100644 --- a/preload-index.c +++ b/preload-index.c @@ -7,17 +7,7 @@ #include "fsmonitor.h" #include "config.h" #include "progress.h" - -#ifdef NO_PTHREADS -static void preload_index(struct index_state *index, - const struct pathspec *pathspec, - unsigned int refresh_flags) -{ - ; /* nothing */ -} -#else - -#include +#include "thread-utils.h" /* * Mostly randomly chosen maximum thread counts: we @@ -108,7 +98,7 @@ static void preload_index(struct index_state *index, struct thread_data data[MAX_PARALLEL]; struct progress_data pd; - if (!core_preload_index) + if (!HAVE_THREADS || !core_preload_index) return; threads = index->cache_nr / THREAD_COST; @@ -151,7 +141,6 @@ static void preload_index(struct index_state *index, trace_performance_leave("preload index"); } -#endif int read_index_preload(struct index_state *index, const struct pathspec *pathspec, In the theory that code speaks louder than comments, here is the patch I used when testing out the idea of getting rid of NO_PTHREADS: git diff head~1 preload-index.c diff --git a/preload-index.c b/preload-index.c index 9e7152ab14..266bc9d8d7 100644 --- a/preload-index.c +++ b/preload-index.c @@ -7,17 +7,7 @@ #include "fsmonitor.h" #include "config.h" #include "progress.h" - -#ifdef NO_PTHREADS -static void preload_index(struct index_state *index, - const struct pathspec *pathspec, - unsigned int refresh_flags) -{ - ; /* nothing */ -} -#else - -#include +#include "thread-utils.h" /* * Mostly randomly chosen maximum thread counts: we @@ -104,7 +94,7 @@ static void preload_index(struct index_state *index, const struct pathspec *pathspec, unsigned int refresh_flags) { - int threads, i, work, offset; + int cpus, threads, i, work, offset; struct thread_data data[MAX_PARALLEL]; struct progress_data pd; @@ -114,6 +104,9 @@ static void preload_index(struct index_state *index, threads = index->cache_nr / THREAD_COST; if ((index->cache_nr > 1) && (threads < 2) && git_env_bool("GIT_TEST_PRELOAD_INDEX", 0)) threads = 2; + cpus = online_cpus(); + if (threads > cpus) + threads = cpus; if (threads < 2) return; trace_performance_enter(); @@ -151,7 +144,6 @@ static void preload_index(struct index_state *index, trace_performance_leave("preload index"); } -#endif int read_index_preload(struct index_state *index, const struct pathspec *pathspec,
Re: [PATCH v2 08/10] read-cache.c: remove #ifdef NO_PTHREADS
On 10/29/2018 10:30 AM, Jeff King wrote: On Sat, Oct 27, 2018 at 07:30:06PM +0200, Nguyễn Thái Ngọc Duy wrote: -#ifndef NO_PTHREADS - nr_threads = git_config_get_index_threads(); + if (HAVE_THREADS) { + nr_threads = git_config_get_index_threads(); - /* TODO: does creating more threads than cores help? */ - if (!nr_threads) { - nr_threads = istate->cache_nr / THREAD_COST; - cpus = online_cpus(); - if (nr_threads > cpus) - nr_threads = cpus; + /* TODO: does creating more threads than cores help? */ + if (!nr_threads) { + nr_threads = istate->cache_nr / THREAD_COST; + cpus = online_cpus(); + if (nr_threads > cpus) + nr_threads = cpus; + } + } else { + nr_threads = 1; } I'd have thought we could just rely on online_cpus() returning 1 here to avoid having to ask "do we even have thread support?". But I guess that TODO comment implies that we might one day two 2 threads on a single CPU. -Peff See my earlier response - the HAVE_THREADS tests are not needed. We can remove the "TODO" comment - I tested it and it wasn't faster to have more threads than CPUs.
Re: [PATCH 09/10] read-cache.c: remove #ifdef NO_PTHREADS
On 10/27/2018 3:10 AM, Nguyễn Thái Ngọc Duy wrote: Signed-off-by: Nguyễn Thái Ngọc Duy --- read-cache.c | 49 ++--- 1 file changed, 18 insertions(+), 31 deletions(-) diff --git a/read-cache.c b/read-cache.c index d57958233e..ba870bc3fd 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1920,19 +1920,15 @@ struct index_entry_offset_table struct index_entry_offset entries[FLEX_ARRAY]; }; -#ifndef NO_PTHREADS static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset); static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot); -#endif static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); struct load_index_extensions { -#ifndef NO_PTHREADS pthread_t pthread; -#endif struct index_state *istate; const char *mmap; size_t mmap_size; @@ -2010,8 +2006,6 @@ static unsigned long load_all_cache_entries(struct index_state *istate, return consumed; } -#ifndef NO_PTHREADS - /* * Mostly randomly chosen maximum thread counts: we * cap the parallelism to online_cpus() threads, and we want @@ -2122,7 +2116,6 @@ static unsigned long load_cache_entries_threaded(struct index_state *istate, con return consumed; } -#endif /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) @@ -2135,10 +2128,8 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) size_t mmap_size; struct load_index_extensions p; size_t extension_offset = 0; -#ifndef NO_PTHREADS int nr_threads, cpus; struct index_entry_offset_table *ieot = NULL; -#endif if (istate->initialized) return istate->cache_nr; @@ -2181,15 +2172,18 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) src_offset = sizeof(*hdr); -#ifndef NO_PTHREADS - nr_threads = git_config_get_index_threads(); + if (HAVE_THREADS) { In this case, nr_threads is already capped to online_cpus() below so this HAVE_THREADS test isn't needed. + nr_threads = git_config_get_index_threads(); - /* TODO: does creating more threads than cores help? */ - if (!nr_threads) { - nr_threads = istate->cache_nr / THREAD_COST; - cpus = online_cpus(); - if (nr_threads > cpus) - nr_threads = cpus; + /* TODO: does creating more threads than cores help? */ + if (!nr_threads) { + nr_threads = istate->cache_nr / THREAD_COST; + cpus = online_cpus(); + if (nr_threads > cpus) + nr_threads = cpus; + } + } else { + nr_threads = 1; } if (nr_threads > 1) { @@ -2219,21 +2213,16 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) } else { src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset); } -#else - src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset); -#endif istate->timestamp.sec = st.st_mtime; istate->timestamp.nsec = ST_MTIME_NSEC(st); /* if we created a thread, join it otherwise load the extensions on the primary thread */ -#ifndef NO_PTHREADS - if (extension_offset) { + if (HAVE_THREADS && extension_offset) { Here extension_offset won't be set if there wasn't a thread created so the HAVE_THREADS test is not needed. int ret = pthread_join(p.pthread, NULL); if (ret) die(_("unable to join load_index_extensions thread: %s"), strerror(ret)); } -#endif if (!extension_offset) { p.src_offset = src_offset; load_index_extensions(); @@ -2756,8 +2745,11 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; -#ifndef NO_PTHREADS - nr_threads = git_config_get_index_threads(); + if (HAVE_THREADS) This shouldn't be needed either. My assumption was that if someone explicitly asked for multiple threads we're better off failing than silently ignoring their request. The default/automatic setting will detect the number of cpus and behave correctly. + nr_threads = git_config_get_index_threads(); + else + nr_threads = 1; + if (nr_threads != 1) { int ieot_blocks, cpus; @@ -2787,7 +2779,6 @@ static int do_write_index(struct index_state *istate, struct tempfile
Re: [PATCH 07/10] preload-index.c: remove #ifdef NO_PTHREADS
On 10/27/2018 3:10 AM, Nguyễn Thái Ngọc Duy wrote: Signed-off-by: Nguyễn Thái Ngọc Duy --- preload-index.c | 15 ++- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/preload-index.c b/preload-index.c index 9e7152ab14..0e24886aca 100644 --- a/preload-index.c +++ b/preload-index.c @@ -7,17 +7,7 @@ #include "fsmonitor.h" #include "config.h" #include "progress.h" - -#ifdef NO_PTHREADS -static void preload_index(struct index_state *index, - const struct pathspec *pathspec, - unsigned int refresh_flags) -{ - ; /* nothing */ -} -#else - -#include +#include "thread-utils.h" /* * Mostly randomly chosen maximum thread counts: we @@ -108,7 +98,7 @@ static void preload_index(struct index_state *index, struct thread_data data[MAX_PARALLEL]; struct progress_data pd; - if (!core_preload_index) + if (!HAVE_THREADS || !core_preload_index) I also "hoped in general that the non-threaded code paths could mostly just collapse into the same as the "threads == 1" cases, and we wouldn't have to "are threads even supported" in a lot of places." In this case, if we cap the threads to online_cpus() the later test for 'if (threads < 2)' would do that. I haven't measured this specific case but in the other cases I have measured, having more threads than cpus did not result in a performance win. return; threads = index->cache_nr / THREAD_COST; @@ -151,7 +141,6 @@ static void preload_index(struct index_state *index, trace_performance_leave("preload index"); } -#endif int read_index_preload(struct index_state *index, const struct pathspec *pathspec,
Re: [PATCH/RFC] thread-utils: better wrapper to avoid #ifdef NO_PTHREADS
On 10/23/2018 4:28 PM, Jeff King wrote: On Thu, Oct 18, 2018 at 08:05:22PM +0200, Nguyễn Thái Ngọc Duy wrote: On Thu, Oct 18, 2018 at 7:09 PM Jeff King wrote: In this particular case though I think we should be able to avoid so much #if if we make a wrapper for pthread api that would return an error or something when pthread is not available. But similar situation may happen elsewhere too. Yeah, I think that is generally the preferred method anyway, just because of readability and simplicity. I've wanted to do this for a while, so let's test the water and see if it's well received. This patch is a proof of concept that adds just enough macros so that I can build index-pack.c on a single thread mode with zero #ifdef related to NO_PTHREADS. Besides readability and simplicity, it reduces the chances of breaking conditional builds (e.g. you rename a variable name but forgot that the variable is in #if block that is not used by your compiler/platform). Yes, I love this. We're already halfway there with things like read_lock() in index-pack and elsewhere, which are conditionally no-ops. The resulting code is much easier to read, I think. I am also very much in favor of this. I updated a couple of places threading is being used that I've been working in (preload-index and read-cache) and both are much simplified using your proof of concept patch. Performance-wise I don't think there is any loss for single thread mode. I rely on compilers recognizing HAVE_THREADS being a constant and remove dead code or at least optimize in favor of non-dead code. Memory-wise, yes we use some more memory in single thread mode. But we don't have zillions of mutexes or thread id, so a bit extra memory does not worry me so much. Yeah, I don't think carrying around a handful of ints is going to be a big deal. Just to be complete, there _is_ an additional cost. Today, code paths that are only executed when there are pthreads available are excluded from the binary (via #ifdef). With this change, those code paths would now be included causing some code bloat to NO_PTHREAD threaded images. One example of this is in read-cache.c where the ieot read/write functions aren't included for NO_PTHREAD but now would be. I also think we may want to make a fundamental shift in our view of thread support. In the early days, it was "well, this is a thing that modern systems can take advantage of for certain commands". But these days I suspect it is more like "there are a handful of legacy systems that do not even support threads". I don't think we should break the build on those legacy systems, but it's probably OK to stop thinking of it as "non-threaded platforms are the default and must pay zero cost" and more as "threaded platforms are the default, and non-threaded ones are OK to pay a small cost as long as they still work". I agree though I'm still curious if there are still no-threaded platforms taking new versions of git. Perhaps we should do the depreciation warning you suggested elsewhere and see how much push back we get. It's unlikely we'd get lucky and be able to stop supporting them completely but it's worth asking! @@ -74,4 +79,29 @@ int init_recursive_mutex(pthread_mutex_t *m) pthread_mutexattr_destroy(); } return ret; +#else + return ENOSYS; +#endif +} I suspect some of these ENOSYS could just become a silent success. ("yep, I initialized your dummy mutex"). But it probably doesn't matter much either way, as we would not generally even bother checking this return. +#ifdef NO_PTHREADS +int dummy_pthread_create(pthread_t *pthread, const void *attr, +void *(*fn)(void *), void *data) +{ + return ENOSYS; } Whereas for this one, ENOSYS makes a lot of sense (we should avoid the threaded code-path anyway when we see that online_cpus()==1, and this would let us know when we mess that up). This highlights something anyone writing multi-threaded code will need to pay attention to that wasn't an issue before. If you attempt to create more threads than online_cpus(), the pthread_create() call will fail and needs to be handled gracefully. One example of this is in preload-index.c where (up to) 20 threads are created irrespective of what online_cpus() returns and if pthread_create() fails, it just dies. The logic would need to be updated for this to work correctly. I still think this is a much simpler issue to deal with than what we have today with having to write/debug multiple code paths but I did want to point it out for completeness. +int dummy_pthread_init(void *data) +{ + /* +* Do nothing. +* +* The main purpose of this function is to break compiler's +* flow analysis or it may realize that functions like +* pthread_mutex_init() is no-op, which means the (static) +* variable is not used/initialized at all and trigger +*
Re: [PATCH v4 2/3] reset: add new reset.quiet config setting
On 10/25/2018 5:26 AM, Junio C Hamano wrote: Junio C Hamano writes: To be honest, I find the second sentence in your rewrite even more confusing. It reads as if `reset.quiet` configuration variable can be used to restore the "show what is yet to be added" behaviour, due to the parenthetical mention of the default behaviour without any configuration. The command reports what is yet to be added to the index after `reset` by default. It can be made to only report errors with the `--quiet` option, or setting `reset.quiet` configuration variable to `true` (the latter can be overriden with `--no-quiet`). That may not be much better, though X-<. In any case, the comments are getting closer to the bikeshedding territory, that can be easily addressed incrementally. I am getting the impression that everbody agrees that the change is desirable, sufficiently documented and properly implemented. Shall we mark it for "Will merge to 'next'" in the what's cooking report and leave further refinements to incremental updates as needed? While not great, I think it is good enough. I don't think either of the last couple of rewrite attempts were clearly better than what is in the latest patch. I'd agree we should merge to 'next' and if someone comes up with something great, we can update it then.
Re: [PATCH v1] load_cache_entries_threaded: remove unused src_offset parameter
On 10/22/2018 7:05 PM, Junio C Hamano wrote: Jeff King writes: If nobody uses it, should we drop the return value, too? Like: Yup. I'm good with that. At one point I also had the additional #ifndef NO_PTHREADS lines but it was starting to get messy with the threaded vs non-threaded code paths so I removed them. I'm fine with which ever people find more readable. It does make me wonder if there are still platforms taking new build of git that don't support threads. Do we still need to write/test/debug/read through the single threaded code paths? Is the diff Peff sent enough or do you want me to send another iteration on the patch? Thanks, Ben diff --git a/read-cache.c b/read-cache.c index 78c9516eb7..4b44a2eae5 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2052,12 +2052,11 @@ static void *load_cache_entries_thread(void *_data) return NULL; } -static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, -int nr_threads, struct index_entry_offset_table *ieot) +static void load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, + int nr_threads, struct index_entry_offset_table *ieot) { int i, offset, ieot_blocks, ieot_start, err; struct load_cache_entries_thread_data *data; - unsigned long consumed = 0; /* a little sanity checking */ if (istate->name_hash_initialized) @@ -2115,12 +2114,9 @@ static unsigned long load_cache_entries_threaded(struct index_state *istate, con if (err) die(_("unable to join load_cache_entries thread: %s"), strerror(err)); mem_pool_combine(istate->ce_mem_pool, p->ce_mem_pool); - consumed += p->consumed; } free(data); - - return consumed; } #endif -Peff
[PATCH v4 0/3] speed up git reset
From: Ben Peart Updated the wording in the documentation and commit messages to (hopefully) make it clearer. Added the warning about 'reset --quiet' to the advice system so that it can be turned off. Base Ref: Web-Diff: https://github.com/benpeart/git/commit/8a2fef45d4 Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v4 && git checkout 8a2fef45d4 ### Patches Ben Peart (3): reset: don't compute unstaged changes after reset when --quiet reset: add new reset.quiet config setting reset: warn when refresh_index() takes more than 2 seconds Documentation/config.txt| 7 +++ Documentation/git-reset.txt | 5 - advice.c| 2 ++ advice.h| 1 + builtin/reset.c | 15 ++- 5 files changed, 28 insertions(+), 2 deletions(-) base-commit: ca63497355222acefcca02b9cbb540a4768f3286 -- 2.18.0.windows.1
[PATCH v4 1/3] reset: don't compute unstaged changes after reset when --quiet
From: Ben Peart When git reset is run with the --quiet flag, don't bother finding any additional unstaged changes as they won't be output anyway. This speeds up the git reset command by avoiding having to lstat() every file looking for changes that aren't going to be reported anyway. The savings can be significant. In a repo on Windows with 200K files "git reset" drops from 7.16 seconds to 0.32 seconds for a savings of 96%. Signed-off-by: Ben Peart --- builtin/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/reset.c b/builtin/reset.c index 11cd0dcb8c..04f0d9b4f5 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -375,7 +375,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (get_git_work_tree()) + if (!quiet && get_git_work_tree()) refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); } else { -- 2.18.0.windows.1
[PATCH v4 3/3] reset: warn when refresh_index() takes more than 2 seconds
From: Ben Peart refresh_index() is done after a reset command as an optimization. Because it can be an expensive call, warn the user if it takes more than 2 seconds and tell them how to avoid it using the --quiet command line option or reset.quiet config setting. Signed-off-by: Ben Peart --- Documentation/config.txt | 4 advice.c | 2 ++ advice.h | 1 + builtin/reset.c | 14 +- 4 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index a2d1b8b116..415db31def 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -333,6 +333,10 @@ advice.*:: commitBeforeMerge:: Advice shown when linkgit:git-merge[1] refuses to merge to avoid overwriting local changes. + resetQuiet:: + Advice to consider using the `--quiet` option to linkgit:git-reset[1] + when the command takes more than 2 seconds to enumerate unstaged + changes after reset. resolveConflict:: Advice shown by various commands when conflicts prevent the operation from being performed. diff --git a/advice.c b/advice.c index 3561cd64e9..5f35656409 100644 --- a/advice.c +++ b/advice.c @@ -12,6 +12,7 @@ int advice_push_needs_force = 1; int advice_status_hints = 1; int advice_status_u_option = 1; int advice_commit_before_merge = 1; +int advice_reset_quiet_warning = 1; int advice_resolve_conflict = 1; int advice_implicit_identity = 1; int advice_detached_head = 1; @@ -65,6 +66,7 @@ static struct { { "statusHints", _status_hints }, { "statusUoption", _status_u_option }, { "commitBeforeMerge", _commit_before_merge }, + { "resetQuiet", _reset_quiet_warning }, { "resolveConflict", _resolve_conflict }, { "implicitIdentity", _implicit_identity }, { "detachedHead", _detached_head }, diff --git a/advice.h b/advice.h index ab24df0fd0..696bf0e7d2 100644 --- a/advice.h +++ b/advice.h @@ -12,6 +12,7 @@ extern int advice_push_needs_force; extern int advice_status_hints; extern int advice_status_u_option; extern int advice_commit_before_merge; +extern int advice_reset_quiet_warning; extern int advice_resolve_conflict; extern int advice_implicit_identity; extern int advice_detached_head; diff --git a/builtin/reset.c b/builtin/reset.c index 3b43aee544..b31a0eae8a 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -25,6 +25,8 @@ #include "submodule.h" #include "submodule-config.h" +#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) + static const char * const git_reset_usage[] = { N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] []"), N_("git reset [-q] [] [--] ..."), @@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (!quiet && get_git_work_tree()) + if (!quiet && get_git_work_tree()) { + uint64_t t_begin, t_delta_in_ms; + + t_begin = getnanotime(); refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); + t_delta_in_ms = (getnanotime() - t_begin) / 100; + if (advice_reset_quiet_warning && t_delta_in_ms > REFRESH_INDEX_DELAY_WARNING_IN_MS) { + printf(_("\nIt took %.2f seconds to enumerate unstaged changes after reset. You can\n" + "use '--quiet' to avoid this. Set the config setting reset.quiet to true\n" + "to make this the default.\n"), t_delta_in_ms / 1000.0); + } + } } else { int err = reset_index(, reset_type, quiet); if (reset_type == KEEP && !err) -- 2.18.0.windows.1
[PATCH v4 2/3] reset: add new reset.quiet config setting
From: Ben Peart Add a reset.quiet config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. Signed-off-by: Ben Peart --- Documentation/config.txt| 3 +++ Documentation/git-reset.txt | 5 - builtin/reset.c | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index f6f4c21a54..a2d1b8b116 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2728,6 +2728,9 @@ rerere.enabled:: `$GIT_DIR`, e.g. if "rerere" was previously used in the repository. +reset.quiet:: + When set to true, 'git reset' will default to the '--quiet' option. + include::sendemail-config.txt[] sequence.editor:: diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 1d697d9962..2dac95c71a 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -95,7 +95,10 @@ OPTIONS -q:: --quiet:: - Be quiet, only report errors. +--no-quiet:: + Be quiet, only report errors. The default behavior is set by the + `reset.quiet` config option. `--quiet` and `--no-quiet` will + override the default behavior. EXAMPLES diff --git a/builtin/reset.c b/builtin/reset.c index 04f0d9b4f5..3b43aee544 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) }; git_config(git_reset_config, NULL); + git_config_get_bool("reset.quiet", ); argc = parse_options(argc, argv, prefix, options, git_reset_usage, PARSE_OPT_KEEP_DASHDASH); -- 2.18.0.windows.1
Re: [PATCH v3 2/3] reset: add new reset.quiet config setting
On 10/22/2018 10:45 AM, Duy Nguyen wrote: On Mon, Oct 22, 2018 at 3:38 PM Ben Peart wrote: From: Ben Peart Add a reset.quiet config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. Signed-off-by: Ben Peart --- Documentation/config.txt| 3 +++ Documentation/git-reset.txt | 4 +++- builtin/reset.c | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index f6f4c21a54..a2d1b8b116 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2728,6 +2728,9 @@ rerere.enabled:: `$GIT_DIR`, e.g. if "rerere" was previously used in the repository. +reset.quiet:: + When set to true, 'git reset' will default to the '--quiet' option. + With 'nd/config-split' topic moving pretty much all config keys out of config.txt, you probably want to do the same for this series: add this in a new file called Documentation/reset-config.txt then include the file here like the sendemail one below. Seems a bit overkill to pull a line of documentation into a separate file and replace it with a line of 'import' logic. Perhaps if/when there is more documentation to pull out that would make more sense. include::sendemail-config.txt[] sequence.editor::
Re: [PATCH v1 2/2] reset: add new reset.quietDefault config setting
On 10/23/2018 5:13 AM, Ævar Arnfjörð Bjarmason wrote: On Wed, Oct 17 2018, Jeff King wrote: On Wed, Oct 17, 2018 at 02:19:59PM -0400, Eric Sunshine wrote: On Wed, Oct 17, 2018 at 12:40 PM Ben Peart wrote: Add a reset.quietDefault config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. As with the previous patch, my knee-jerk reaction is that this really feels wrong being tied to --quiet. It's particularly unintuitive. What I _could_ see, and what would feel more natural is if you add a new option (say, --optimize) which is more general, incorporating whatever optimizations become available in the future, not just this one special-case. A side-effect of --optimize is that it implies --quiet, and that is something which can and should be documented. Heh, I just wrote something very similar elsewhere in the thread. I'm still not sure if it's a dumb idea, but at least we can be dumb together. Same here. I'm in general if favor of having the ability to configure porcelain command-line options, but in this case it seems like it would be more logical to head for something like: core.uiMessaging=[default,exhaustive,lossyButFaster,quiet] Where default would be our current "exhaustive", and this --quiet case would be covered by lossyButFaster, but also things like the "--no-ahead-behind" flag for git-status. This sounds like an easy way to choose a set of default values that we think make sense to get bundled together. That could be a way for users to quickly choose a set of good defaults but I still think you would want find grained control over the individual settings. Coming up with the set of values to bundle together, figuring out the hierarchy of precedence for this new global config->individual config->individual command line, updating the code to make it all work is outside the scope of this particular patch series. Just on this implementation: The usual idiom for flags as config is command.flag=xyz, not command.flagDefault=xyz, so this should be reset.quiet. Thanks, I agree and fixed that in later iterations.
Re: [PATCH v3 2/3] reset: add new reset.quiet config setting
On 10/22/2018 4:06 PM, Jeff King wrote: On Mon, Oct 22, 2018 at 08:13:32PM +0100, Ramsay Jones wrote: -q:: --quiet:: - Be quiet, only report errors. +--no-quiet:: + Be quiet, only report errors. The default behavior respects the + `reset.quiet` config option, or `--no-quiet` if that is not set. Sorry, I can't quite parse this; -q,--quiet and --no-quiet on the command line (should) trump whatever rest.quiet is set to in the configuration. Is that not the case? That is the case, and what was meant by "the default behavior" (i.e., the behavior when none of these is used). Maybe there's a more clear way of saying that. -Peff Is this more clear? -q:: --quiet:: --no-quiet:: Be quiet, only report errors. The default behavior is set by the `reset.quiet` config option. `--quiet` and `--no-quiet` will overwrite the default behavior.
Re: [PATCH v3 3/3] reset: warn when refresh_index() takes more than 2 seconds
On 10/22/2018 8:23 PM, Junio C Hamano wrote: Ben Peart writes: From: Ben Peart refresh_index() is done after a reset command as an optimization. Because it can be an expensive call, warn the user if it takes more than 2 seconds and tell them how to avoid it using the --quiet command line option or reset.quiet config setting. I am moderately negative on this step. It will irritate users who know about and still choose not to use the "--quiet" option, because they want to gain performance in later real work and/or they want to know what paths are now dirty. A working tree that needs long time to refresh will take long time to instead do "cached stat info says it may be modified so let's run 'diff' for real---we may discover that there wasn't any change after all" when a "git diff" is run after a "reset --quiet" that does not refresh; i.e. there would be valid reasons to run "reset" without "--quiet". It feels a bit irresponsible to throw an ad without informing pros-and-cons and to pretend that we are advising on BCP. In general, we do *not* advertise new features randomly like this. Thanks. The previous two steps looks quite sensible. The challenge I'm trying to address is the discoverability of this significant performance win. In earlier review feedback, all mention of this option speeding up reset was removed. I added this patch to enable users to find out it even exists as an option. While I modeled this on the untracked files/--uno and ahead/behind logic, I missed adding this to the 'advice' logic so that it can be turned off and avoid irritating users. I'll send an updated patch that corrects that.
RE: [PATCH v3 1/3] reset: don't compute unstaged changes after reset when --quiet
> -Original Message- > From: Johannes Schindelin > Sent: Monday, October 22, 2018 4:45 PM > To: Ben Peart > Cc: git@vger.kernel.org; gits...@pobox.com; Ben Peart > ; p...@peff.net; sunsh...@sunshineco.com > Subject: Re: [PATCH v3 1/3] reset: don't compute unstaged changes after > reset when --quiet > > Hi Ben, > > On Mon, 22 Oct 2018, Ben Peart wrote: > > > From: Ben Peart > > > > When git reset is run with the --quiet flag, don't bother finding any > > additional unstaged changes as they won't be output anyway. This speeds > up > > the git reset command by avoiding having to lstat() every file looking for > > changes that aren't going to be reported anyway. > > > > The savings can be significant. In a repo with 200K files "git reset" > > drops from 7.16 seconds to 0.32 seconds for a savings of 96%. > > That's very nice! > > Those numbers, just out of curiosity, are they on Windows? Or on Linux? > It's safe to assume all my numbers are on Windows. :-) > Ciao, > Dscho
[PATCH v1] load_cache_entries_threaded: remove unused src_offset parameter
From: Ben Peart Remove the src_offset parameter which is unused as a result of switching to the IEOT table of offsets. Also stop incrementing src_offset in the multi-threaded codepath as it is no longer used and could cause confusion. Signed-off-by: Ben Peart --- Notes: Base Ref: Web-Diff: https://github.com/benpeart/git/commit/7360721408 Checkout: git fetch https://github.com/benpeart/git read-index-multithread-no-src-offset-v1 && git checkout 7360721408 read-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/read-cache.c b/read-cache.c index f9fa6a7979..6db6f0f220 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2037,7 +2037,7 @@ static void *load_cache_entries_thread(void *_data) } static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, - unsigned long src_offset, int nr_threads, struct index_entry_offset_table *ieot) + int nr_threads, struct index_entry_offset_table *ieot) { int i, offset, ieot_blocks, ieot_start, err; struct load_cache_entries_thread_data *data; @@ -2198,7 +2198,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) ieot = read_ieot_extension(mmap, mmap_size, extension_offset); if (ieot) { - src_offset += load_cache_entries_threaded(istate, mmap, mmap_size, src_offset, nr_threads, ieot); + load_cache_entries_threaded(istate, mmap, mmap_size, nr_threads, ieot); free(ieot); } else { src_offset += load_all_cache_entries(istate, mmap, mmap_size, src_offset); base-commit: f58b85df6937e3f3d9ef26bb52a513c8ada17ffc -- 2.18.0.windows.1
Re: [PATCH v8 7/7] read-cache: load cache entries on worker threads
On 10/21/2018 10:14 PM, Junio C Hamano wrote: Jeff King writes: On Wed, Oct 10, 2018 at 11:59:38AM -0400, Ben Peart wrote: +static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, + unsigned long src_offset, int nr_threads, struct index_entry_offset_table *ieot) The src_offset parameter isn't used in this function. In early versions of the series, it was used to feed the p->start_offset field of each load_cache_entries_thread_data. But after the switch to ieot, we don't, and instead feed p->ieot_start. But we always begin that at 0. Is that right (and we can drop the parameter), or should this logic: + offset = ieot_start = 0; + ieot_blocks = DIV_ROUND_UP(ieot->nr, nr_threads); + for (i = 0; i < nr_threads; i++) { [...] be starting at src_offset instead of 0? I think "offset" has nothing to do with the offset into the mmapped region of memory. It is an integer index into a (virtual) array that is a concatenation of ieot->entries[].entries[], and it is correct to count from zero. The value taken from that array using the index is used to compute the offset into the mmapped region. Unlike load_all_cache_entries() called from the other side of the same if() statement in the same caller, this does not depend on the fact that the first index entry in the mmapped region appears immediately after the index-file header. It goes from the offsets into the file that are recorded in the entry offset table that is an index extension, so the sizeof(*hdr) that initializes src_offset is not used by the codepath. The number of bytes consumed, i.e. its return value from the function, is not really used, either, as the caller does not use src_offset for anything other than updating it with "+=" and passing it to this function (which does not use it) when it calls this function (i.e. when ieot extension exists--and by definition when that extension exists extension_offset is not 0, so we do not make the final load_index_extensions() call in the caller that uses src_offset). Thanks for discovering/analyzing this. You're right, I missed removing this when we switched from a single offset to an array of offsets via the IEOT. I'll send a patch to fix both issues shortly.
[PATCH v3 2/3] reset: add new reset.quiet config setting
From: Ben Peart Add a reset.quiet config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. Signed-off-by: Ben Peart --- Documentation/config.txt| 3 +++ Documentation/git-reset.txt | 4 +++- builtin/reset.c | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index f6f4c21a54..a2d1b8b116 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2728,6 +2728,9 @@ rerere.enabled:: `$GIT_DIR`, e.g. if "rerere" was previously used in the repository. +reset.quiet:: + When set to true, 'git reset' will default to the '--quiet' option. + include::sendemail-config.txt[] sequence.editor:: diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 1d697d9962..51a427a34a 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -95,7 +95,9 @@ OPTIONS -q:: --quiet:: - Be quiet, only report errors. +--no-quiet:: + Be quiet, only report errors. The default behavior respects the + `reset.quiet` config option, or `--no-quiet` if that is not set. EXAMPLES diff --git a/builtin/reset.c b/builtin/reset.c index 04f0d9b4f5..3b43aee544 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) }; git_config(git_reset_config, NULL); + git_config_get_bool("reset.quiet", ); argc = parse_options(argc, argv, prefix, options, git_reset_usage, PARSE_OPT_KEEP_DASHDASH); -- 2.18.0.windows.1
[PATCH v3 1/3] reset: don't compute unstaged changes after reset when --quiet
From: Ben Peart When git reset is run with the --quiet flag, don't bother finding any additional unstaged changes as they won't be output anyway. This speeds up the git reset command by avoiding having to lstat() every file looking for changes that aren't going to be reported anyway. The savings can be significant. In a repo with 200K files "git reset" drops from 7.16 seconds to 0.32 seconds for a savings of 96%. Signed-off-by: Ben Peart --- builtin/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/reset.c b/builtin/reset.c index 11cd0dcb8c..04f0d9b4f5 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -375,7 +375,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (get_git_work_tree()) + if (!quiet && get_git_work_tree()) refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); } else { -- 2.18.0.windows.1
[PATCH v3 3/3] reset: warn when refresh_index() takes more than 2 seconds
From: Ben Peart refresh_index() is done after a reset command as an optimization. Because it can be an expensive call, warn the user if it takes more than 2 seconds and tell them how to avoid it using the --quiet command line option or reset.quiet config setting. Signed-off-by: Ben Peart --- builtin/reset.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/builtin/reset.c b/builtin/reset.c index 3b43aee544..d95a27d52e 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -25,6 +25,8 @@ #include "submodule.h" #include "submodule-config.h" +#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) + static const char * const git_reset_usage[] = { N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] []"), N_("git reset [-q] [] [--] ..."), @@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (!quiet && get_git_work_tree()) + if (!quiet && get_git_work_tree()) { + uint64_t t_begin, t_delta_in_ms; + + t_begin = getnanotime(); refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); + t_delta_in_ms = (getnanotime() - t_begin) / 100; + if (t_delta_in_ms > REFRESH_INDEX_DELAY_WARNING_IN_MS) { + printf(_("\nIt took %.2f seconds to enumerate unstaged changes after reset. You can\n" + "use '--quiet' to avoid this. Set the config setting reset.quiet to true\n" + "to make this the default."), t_delta_in_ms / 1000.0); + } + } } else { int err = reset_index(, reset_type, quiet); if (reset_type == KEEP && !err) -- 2.18.0.windows.1
[PATCH v3 0/3] speed up git reset
From: Ben Peart Reworded the documentation for git-reset per review feedback. Base Ref: Web-Diff: https://github.com/benpeart/git/commit/1228898917 Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v3 && git checkout 1228898917 ### Interdiff (v2..v3): diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 1d697d9962..51a427a34a 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -95,7 +95,9 @@ OPTIONS -q:: --quiet:: - Be quiet, only report errors. +--no-quiet:: + Be quiet, only report errors. The default behavior respects the + `reset.quiet` config option, or `--no-quiet` if that is not set. EXAMPLES ### Patches Ben Peart (3): reset: don't compute unstaged changes after reset when --quiet reset: add new reset.quiet config setting reset: warn when refresh_index() takes more than 2 seconds Documentation/config.txt| 3 +++ Documentation/git-reset.txt | 4 +++- builtin/reset.c | 15 ++- 3 files changed, 20 insertions(+), 2 deletions(-) base-commit: ca63497355222acefcca02b9cbb540a4768f3286 -- 2.18.0.windows.1
Re: [PATCH v2 2/3] reset: add new reset.quiet config setting
On 10/19/2018 1:11 PM, Jeff King wrote: On Fri, Oct 19, 2018 at 01:10:34PM -0400, Eric Sunshine wrote: On Fri, Oct 19, 2018 at 12:46 PM Jeff King wrote: On Fri, Oct 19, 2018 at 12:36:44PM -0400, Eric Sunshine wrote: How does the user reverse this for a particular git-reset invocation? There is no --no-quiet or --verbose option. Perhaps you want to use OPT__VERBOSITY() instead of OPT__QUIET() in builtin/reset.c and document that --verbose overrides --quiet and reset.quiet (or something like that). I think OPT__QUIET() provides --no-quiet, since it's really an OPT_COUNTUP() under the hood. Saying "--no-quiet" should reset it back to 0. Okay. In any case, --no-quiet probably ought to be mentioned alongside the "reset.quiet" option (and perhaps in git-reset.txt to as a way to reverse "reset.quiet"). Yes, I'd agree with that. -Peff Makes sense. I'll update the docs to say: -q:: --quiet:: --no-quiet:: Be quiet, only report errors. + With --no-quiet report errors and unstaged changes after reset.
Re: [PATCH v2 2/3] reset: add new reset.quiet config setting
On 10/19/2018 12:46 PM, Jeff King wrote: On Fri, Oct 19, 2018 at 12:36:44PM -0400, Eric Sunshine wrote: On Fri, Oct 19, 2018 at 12:12 PM Ben Peart wrote: Add a reset.quiet config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. Signed-off-by: Ben Peart --- diff --git a/Documentation/config.txt b/Documentation/config.txt @@ -2728,6 +2728,9 @@ rerere.enabled:: +reset.quiet:: + When set to true, 'git reset' will default to the '--quiet' option. How does the user reverse this for a particular git-reset invocation? There is no --no-quiet or --verbose option. Perhaps you want to use OPT__VERBOSITY() instead of OPT__QUIET() in builtin/reset.c and document that --verbose overrides --quiet and reset.quiet (or something like that). I think OPT__QUIET() provides --no-quiet, since it's really an OPT_COUNTUP() under the hood. Saying "--no-quiet" should reset it back to 0. Thanks Peff. That is correct as confirmed by: C:\Repos\VSO\src>git reset --no-quiet Unstaged changes after reset: M init.ps1 It took 6.74 seconds to enumerate unstaged changes after reset. You can use '--quiet' to avoid this. Set the config setting reset.quiet to true to make this the default. -Peff
[PATCH v2 1/3] reset: don't compute unstaged changes after reset when --quiet
From: Ben Peart When git reset is run with the --quiet flag, don't bother finding any additional unstaged changes as they won't be output anyway. This speeds up the git reset command by avoiding having to lstat() every file looking for changes that aren't going to be reported anyway. The savings can be significant. In a repo with 200K files "git reset" drops from 7.16 seconds to 0.32 seconds for a savings of 96%. Signed-off-by: Ben Peart --- builtin/reset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/reset.c b/builtin/reset.c index 11cd0dcb8c..04f0d9b4f5 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -375,7 +375,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (get_git_work_tree()) + if (!quiet && get_git_work_tree()) refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); } else { -- 2.18.0.windows.1
[PATCH v2 3/3] reset: warn when refresh_index() takes more than 2 seconds
From: Ben Peart refresh_index() is done after a reset command as an optimization. Because it can be an expensive call, warn the user if it takes more than 2 seconds and tell them how to avoid it using the --quiet command line option or reset.quiet config setting. Signed-off-by: Ben Peart --- builtin/reset.c | 14 +- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/builtin/reset.c b/builtin/reset.c index 3b43aee544..d95a27d52e 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -25,6 +25,8 @@ #include "submodule.h" #include "submodule-config.h" +#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) + static const char * const git_reset_usage[] = { N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] []"), N_("git reset [-q] [] [--] ..."), @@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (!quiet && get_git_work_tree()) + if (!quiet && get_git_work_tree()) { + uint64_t t_begin, t_delta_in_ms; + + t_begin = getnanotime(); refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); + t_delta_in_ms = (getnanotime() - t_begin) / 100; + if (t_delta_in_ms > REFRESH_INDEX_DELAY_WARNING_IN_MS) { + printf(_("\nIt took %.2f seconds to enumerate unstaged changes after reset. You can\n" + "use '--quiet' to avoid this. Set the config setting reset.quiet to true\n" + "to make this the default."), t_delta_in_ms / 1000.0); + } + } } else { int err = reset_index(, reset_type, quiet); if (reset_type == KEEP && !err) -- 2.18.0.windows.1
[PATCH v2 2/3] reset: add new reset.quiet config setting
From: Ben Peart Add a reset.quiet config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. Signed-off-by: Ben Peart --- Documentation/config.txt | 3 +++ builtin/reset.c | 1 + 2 files changed, 4 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index f6f4c21a54..a2d1b8b116 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2728,6 +2728,9 @@ rerere.enabled:: `$GIT_DIR`, e.g. if "rerere" was previously used in the repository. +reset.quiet:: + When set to true, 'git reset' will default to the '--quiet' option. + include::sendemail-config.txt[] sequence.editor:: diff --git a/builtin/reset.c b/builtin/reset.c index 04f0d9b4f5..3b43aee544 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) }; git_config(git_reset_config, NULL); + git_config_get_bool("reset.quiet", ); argc = parse_options(argc, argv, prefix, options, git_reset_usage, PARSE_OPT_KEEP_DASHDASH); -- 2.18.0.windows.1
[PATCH v2 0/3] speed up git reset
From: Ben Peart This itteration avoids the refresh_index() call completely if 'quiet'. The advantage of this is that "git refresh" without any pathspec is also significantly sped up. Also added a notification if finding unstaged changes after reset takes longer than 2 seconds to make users aware of the option to speed it up if they don't need the unstaged changes after reset to be output. It also renames the new config setting reset.quietDefault to reset.quiet. Base Ref: Web-Diff: https://github.com/benpeart/git/commit/50d3415ef1 Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v2 && git checkout 50d3415ef1 ### Interdiff (v1..v2): diff --git a/Documentation/config.txt b/Documentation/config.txt index a5cf4c019b..a2d1b8b116 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2728,11 +2728,8 @@ rerere.enabled:: `$GIT_DIR`, e.g. if "rerere" was previously used in the repository. -reset.quietDefault:: - Sets the default value of the "quiet" option for the reset command. - Choosing "quiet" can optimize the performance of the reset command - by avoiding the scan of all files in the repo looking for additional - unstaged changes. Defaults to false. +reset.quiet:: + When set to true, 'git reset' will default to the '--quiet' option. include::sendemail-config.txt[] diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 8610309b55..1d697d9962 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -95,9 +95,7 @@ OPTIONS -q:: --quiet:: - Be quiet, only report errors. Can optimize the performance of reset - by avoiding scaning all files in the repo looking for additional - unstaged changes. + Be quiet, only report errors. EXAMPLES diff --git a/builtin/reset.c b/builtin/reset.c index 7d151d48a0..d95a27d52e 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -25,6 +25,8 @@ #include "submodule.h" #include "submodule-config.h" +#define REFRESH_INDEX_DELAY_WARNING_IN_MS (2 * 1000) + static const char * const git_reset_usage[] = { N_("git reset [--mixed | --soft | --hard | --merge | --keep] [-q] []"), N_("git reset [-q] [] [--] ..."), @@ -306,7 +308,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) }; git_config(git_reset_config, NULL); - git_config_get_bool("reset.quietDefault", ); + git_config_get_bool("reset.quiet", ); argc = parse_options(argc, argv, prefix, options, git_reset_usage, PARSE_OPT_KEEP_DASHDASH); @@ -376,9 +378,19 @@ int cmd_reset(int argc, const char **argv, const char *prefix) int flags = quiet ? REFRESH_QUIET : REFRESH_IN_PORCELAIN; if (read_from_tree(, , intent_to_add)) return 1; - if (get_git_work_tree()) - refresh_index(_index, flags, quiet ? : NULL, NULL, + if (!quiet && get_git_work_tree()) { + uint64_t t_begin, t_delta_in_ms; + + t_begin = getnanotime(); + refresh_index(_index, flags, NULL, NULL, _("Unstaged changes after reset:")); + t_delta_in_ms = (getnanotime() - t_begin) / 100; + if (t_delta_in_ms > REFRESH_INDEX_DELAY_WARNING_IN_MS) { + printf(_("\nIt took %.2f seconds to enumerate unstaged changes after reset. You can\n" + "use '--quiet' to avoid this. Set the config setting reset.quiet to true\n" + "to make this the default."), t_delta_in_ms / 1000.0); + } + } } else { int err = reset_index(, reset_type, quiet); if (reset_type == KEEP && !err) ### Patches Ben Peart (3): reset: don't compute unstaged changes after reset when --quiet reset: add new reset.quiet config setting reset: warn when refresh_index() takes more than 2 seconds Documentation/config.txt | 3 +++ builtin/reset.c | 15 ++- 2 files changed, 17 insertions(+), 1 deletion(-) base-commit: ca63497355222acefcca02b9cbb540a4768f3286 -- 2.18.0.windows.1
Re: [PATCH v1 1/2] reset: don't compute unstaged changes after reset when --quiet
On 10/18/2018 2:26 PM, Duy Nguyen wrote: On Thu, Oct 18, 2018 at 8:18 PM Ben Peart wrote: I actually started my effort to speed up reset by attempting to multi-thread refresh_index(). You can see a work in progress at: https://github.com/benpeart/git/pull/new/refresh-index-multithread-gvfs The patch doesn't always work as it is still not thread safe. When it works, it's great but I ran into to many difficulties trying to debug the remaining threading issues (even adding print statements would change the timing and the repro would disappear). It will take a lot of code review to discover and fix the remaining non-thread safe code paths. In addition, the optimized code path that takes advantage of fsmonitor, uses multiple threads, fscache, etc _already exists_ in preload_index(). Trying to recreate all those optimizations in refresh_index() is (as I discovered) a daunting task. Why not make refresh_index() run preload_index() first (or the parallel lstat part to be precise), and only do the heavy content-based refresh in single thread mode? Head smack! Why didn't I think of that? That is a terrific suggestion. Calling preload_index() right before the big for loop in refresh_index() is a trivial and effective way to do the bulk of the updating with the optimized code. After doing that, most of the cache entries can bail out quickly down in refresh_cache_ent() when it tests ce_uptodate(ce). Here are the numbers using that optimization (hot cache, averaged across 3 runs): 0.32 git add asdf 1.67 git reset asdf 1.68 git status 3.67 Total vs without it: 0.32 git add asdf 2.48 git reset asdf 1.50 git status 4.30 Total For a savings in the reset command of 32% and 15% overall. Clearly doing the refresh_index() faster is not as much savings as not doing it at all. Given how simple this patch is, I think it makes sense to do both so that we have optimized each path to is fullest.
Re: [PATCH v1 1/2] reset: don't compute unstaged changes after reset when --quiet
On 10/18/2018 2:36 AM, Jeff King wrote: On Thu, Oct 18, 2018 at 12:40:48PM +0900, Junio C Hamano wrote: Jeff King writes: Whereas for the new config variable, you'd probably set it not because you want it quiet all the time, but because you want to get some time savings. So there it does make sense to me to explain. Other than that, this seems like an obvious and easy win. It does feel a little hacky (you're really losing something in the output, and ideally we'd just be able to give that answer quickly), but this may be OK as a hack in the interim. After "git reset --quiet -- this/area/" with this change, any operation you'd do next that needs to learn if working tree files are different from what is recorded in the index outside that area will have to spend more cycles, because the refresh done by "reset" is now limited to the area. So if your final goal is "make 'reset' as fast as possible", this is an obvious and easy win. For other goals, i.e. "make the overall experience of using Git feel faster", it is not so obvious to me, though. The final goal is to make git faster (especially on larger repos) and this proposal accomplishes that. Let's look at why that is. By scoping down (or eliminating) what refresh_index() has to lstat() at the end of the reset command, clearly the reset command is faster. Yes, the index isn't as "fresh" because not everything was updated but that doesn't typically impact the performance of subsequent commands. On the next command, git still has to lstat() every file because it isn't sure what changes could have happened in the file system. As a result, the overall impact is that we have had to lstat() every file one fewer times between the two commands. A net win overall. In addition, the preload_index() code that does the lstat() command is highly optimized across multiple threads (and on Windows takes advantage of the fscache). This means that it can lstat() every file _much_ faster than the single threaded loop in refresh_index(). This also makes the overall performance of the pair of git commands faster as we got rid of the slow lstat() loop and kept the fast one. Here are some numbers to demonstrate that. These are hot cache numbers as they are easier to generate. Cold cache numbers make the net perf win significantly better as the cost for the reset jumps from 2.43 seconds to 7.16 seconds. 0.32 git add asdf 0.31 git -c reset.quiet=true reset asdf 1.34 git status 1.97 Total 0.32 git add asdf 2.43 git -c reset.quiet=false reset asdf 1.32 git status 4.07 Total Note the status command after the reset doesn't really change as it still must lstat() every file (the 0.02 difference is well within the variability of run to run differences). FWIW, none of these numbers are using fsmonitor. There was additional discussion about whether this should be tied to the "--quiet" option and how it should be documented. One option would be to change the default behavior of reset so that it doesn't do the refresh_index() call at all. This speeds up reset by default so there are no user discoverability issues but changes the default behavior which is an issue. Another option that was suggested was to add a separate flag that could be passed to reset so that the "quiet" and "fast" options don't get conflated. I don't care for that option because the two options (at this point and for the foreseeable future) would be identical in behavior from the end users perspective. It was also suggested that there be a single "fast and quiet" option for all of git instead of separate options for each command. I worry about that because now we're forcing users to choose between the "fast and quiet" version of git and the "slow and noisy" version. How do we help them decide which they want? That seems difficult to explain so that they can make a rational choice and also hard to discover. I also have to wonder who would say "give me the slow and noisy version please." :) I'd prefer we systematically move towards a model where the default values that are chosen for various settings throughout the code are all configurable via settings. All defaults by necessity make certain assumptions about user preference, data shape, machine performance, etc and if those assumptions don't match the user's environment then the hard coded defaults aren't appropriate. We do our best but its going to be hit or miss. A consistent way to be able to change those defaults would be very useful in those circumstances. To be clear, I'm not proposing we do a wholesale update of our preferences model at this point in time - that seems like a significant undertaking and I don't want to tie this specific optimization to a potential change in how default settings work. To move this forward, here is what I propose: 1) If the '--quiet' flag is passed, we silently take advantage of the fact we can avoid having to do an "extra" lstat()
[PATCH v1 1/2] reset: don't compute unstaged changes after reset when --quiet
From: Ben Peart When git reset is run with the --quiet flag, don't bother finding any additional unstaged changes as they won't be output anyway. This speeds up the git reset command by avoiding having to lstat() every file looking for changes that aren't going to be reported anyway. The savings can be significant. In a repo with 200K files "git reset" drops from 7.16 seconds to 0.32 seconds for a savings of 96%. Signed-off-by: Ben Peart --- Documentation/git-reset.txt | 4 +++- builtin/reset.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/git-reset.txt b/Documentation/git-reset.txt index 1d697d9962..8610309b55 100644 --- a/Documentation/git-reset.txt +++ b/Documentation/git-reset.txt @@ -95,7 +95,9 @@ OPTIONS -q:: --quiet:: - Be quiet, only report errors. + Be quiet, only report errors. Can optimize the performance of reset + by avoiding scaning all files in the repo looking for additional + unstaged changes. EXAMPLES diff --git a/builtin/reset.c b/builtin/reset.c index 11cd0dcb8c..0822798616 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -376,7 +376,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) if (read_from_tree(, , intent_to_add)) return 1; if (get_git_work_tree()) - refresh_index(_index, flags, NULL, NULL, + refresh_index(_index, flags, quiet ? : NULL, NULL, _("Unstaged changes after reset:")); } else { int err = reset_index(, reset_type, quiet); -- 2.18.0.windows.1
[PATCH v1 0/2] speed up git reset
From: Ben Peart The reset (mixed) command unstages the specified file(s) and then shows you the remaining unstaged changes. This can make the command slow on larger repos because at the end it calls refresh_index() which has a single thread that loops through all the entries calling lstat() for every file. If the user passes the --quiet switch, reset doesn�t display the remaining unstaged changes but it still does all the work to find them, it just doesn�t print them out so passing "--quiet" doesn�t help performance. This patch series will: 1) change the behavior of "git reset --quiet" so that it no longer computes the remaining unstaged changes. 2) add a new config setting so that "--quiet" can be configured as the default so that the default performance of "git reset" is improved. The performance benefit of this can be significant. In a repo with 200K files "git reset foo" performance drops from 7.16 seconds to 0.32 seconds for a savings of 96%. Even with the small git repo, reset times drop from 0.191 seconds to 0.043 seconds for a savings of 77%. Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/2295a310d0 Checkout: git fetch https://github.com/benpeart/git reset-refresh-index-v1 && git checkout 2295a310d0 Ben Peart (2): reset: don't compute unstaged changes after reset when --quiet reset: add new reset.quietDefault config setting Documentation/config.txt| 6 ++ Documentation/git-reset.txt | 4 +++- builtin/reset.c | 3 ++- 3 files changed, 11 insertions(+), 2 deletions(-) base-commit: a4b8ab5363a32f283a61ef3a962853556d136c0e -- 2.18.0.windows.1
[PATCH v1 2/2] reset: add new reset.quietDefault config setting
From: Ben Peart Add a reset.quietDefault config setting that sets the default value of the --quiet flag when running the reset command. This enables users to change the default behavior to take advantage of the performance advantages of avoiding the scan for unstaged changes after reset. Defaults to false. Signed-off-by: Ben Peart --- Documentation/config.txt | 6 ++ builtin/reset.c | 1 + 2 files changed, 7 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index f6f4c21a54..a5cf4c019b 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2728,6 +2728,12 @@ rerere.enabled:: `$GIT_DIR`, e.g. if "rerere" was previously used in the repository. +reset.quietDefault:: + Sets the default value of the "quiet" option for the reset command. + Choosing "quiet" can optimize the performance of the reset command + by avoiding the scan of all files in the repo looking for additional + unstaged changes. Defaults to false. + include::sendemail-config.txt[] sequence.editor:: diff --git a/builtin/reset.c b/builtin/reset.c index 0822798616..7d151d48a0 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -306,6 +306,7 @@ int cmd_reset(int argc, const char **argv, const char *prefix) }; git_config(git_reset_config, NULL); + git_config_get_bool("reset.quietDefault", ); argc = parse_options(argc, argv, prefix, options, git_reset_usage, PARSE_OPT_KEEP_DASHDASH); -- 2.18.0.windows.1
Re: [PATCH v8 0/7] speed up index load through parallelization
fixup! IEOT error messages Enable localizing new error messages and improve the error message for invalid IEOT extension sizes. Signed-off-by: Ben Peart --- read-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/read-cache.c b/read-cache.c index 7acc2c86f4..f9fa6a7979 100644 --- a/read-cache.c +++ b/read-cache.c @@ -3480,7 +3480,7 @@ static struct index_entry_offset_table *read_ieot_extension(const char *mmap, si /* validate the version is IEOT_VERSION */ ext_version = get_be32(index); if (ext_version != IEOT_VERSION) { - error("invalid IEOT version %d", ext_version); + error(_("invalid IEOT version %d"), ext_version); return NULL; } index += sizeof(uint32_t); @@ -3488,7 +3488,7 @@ static struct index_entry_offset_table *read_ieot_extension(const char *mmap, si /* extension size - version bytes / bytes per entry */ nr = (extsize - sizeof(uint32_t)) / (sizeof(uint32_t) + sizeof(uint32_t)); if (!nr) { - error("invalid number of IEOT entries %d", nr); + error(_("invalid IEOT extension size %d"), extsize); return NULL; } ieot = xmalloc(sizeof(struct index_entry_offset_table) -- 2.18.0.windows.1 On 10/14/2018 8:28 AM, Duy Nguyen wrote: On Wed, Oct 10, 2018 at 5:59 PM Ben Peart wrote: @@ -3460,14 +3479,18 @@ static struct index_entry_offset_table *read_ieot_extension(const char *mmap, si /* validate the version is IEOT_VERSION */ ext_version = get_be32(index); - if (ext_version != IEOT_VERSION) + if (ext_version != IEOT_VERSION) { + error("invalid IEOT version %d", ext_version); Please wrap this string in _() so that it can be translated. return NULL; + } index += sizeof(uint32_t); /* extension size - version bytes / bytes per entry */ nr = (extsize - sizeof(uint32_t)) / (sizeof(uint32_t) + sizeof(uint32_t)); - if (!nr) + if (!nr) { + error("invalid number of IEOT entries %d", nr); Ditto. And reporting extsize may be more useful than nr, which we know is zero, but we don't know why it's calculated zero unless we know extsize.
[PATCH v8 6/7] ieot: add Index Entry Offset Table (IEOT) extension
From: Ben Peart This patch enables addressing the CPU cost of loading the index by adding additional data to the index that will allow us to efficiently multi- thread the loading and conversion of cache entries. It accomplishes this by adding an (optional) index extension that is a table of offsets to blocks of cache entries in the index file. To make this work for V4 indexes, when writing the cache entries, it periodically "resets" the prefix-compression by encoding the current entry as if the path name for the previous entry is completely different and saves the offset of that entry in the IEOT. Basically, with V4 indexes, it generates offsets into blocks of prefix-compressed entries. Signed-off-by: Ben Peart --- Documentation/technical/index-format.txt | 18 +++ read-cache.c | 196 ++- 2 files changed, 211 insertions(+), 3 deletions(-) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 6bc2d90f7f..7c4d67aa6a 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -337,3 +337,21 @@ The remaining data of each directory block is grouped by type: SHA-1("TREE" + + "REUC" + ) + +== Index Entry Offset Table + + The Index Entry Offset Table (IEOT) is used to help address the CPU + cost of loading the index by enabling multi-threading the process of + converting cache entries from the on-disk format to the in-memory format. + The signature for this extension is { 'I', 'E', 'O', 'T' }. + + The extension consists of: + + - 32-bit version (currently 1) + + - A number of index offset entries each consisting of: + +- 32-bit offset from the begining of the file to the first cache entry + in this block of entries. + +- 32-bit count of cache entries in this block diff --git a/read-cache.c b/read-cache.c index 2214b3153d..3ace29d58f 100644 --- a/read-cache.c +++ b/read-cache.c @@ -45,6 +45,7 @@ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ #define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */ #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */ +#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ @@ -1696,6 +1697,7 @@ static int read_index_extension(struct index_state *istate, read_fsmonitor_extension(istate, data, sz); break; case CACHE_EXT_ENDOFINDEXENTRIES: + case CACHE_EXT_INDEXENTRYOFFSETTABLE: /* already handled in do_read_index() */ break; default: @@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } +struct index_entry_offset +{ + /* starting byte offset into index file, count of index entries in this block */ + int offset, nr; +}; + +struct index_entry_offset_table +{ + int nr; + struct index_entry_offset entries[FLEX_ARRAY]; +}; + +#ifndef NO_PTHREADS +static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset); +static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot); +#endif + static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); @@ -1929,6 +1948,15 @@ static void *load_index_extensions(void *_data) return NULL; } +/* + * Mostly randomly chosen maximum thread counts: we + * cap the parallelism to online_cpus() threads, and we want + * to have at least 1 cache entries per thread for it to + * be worth starting a thread. + */ + +#define THREAD_COST(1) + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -2521,6 +2549,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; int drop_cache_tree = istate->drop_cache_tree; off_t offset; + int ieot_blocks = 1; + struct index_entry_offset_table *ieot = NULL; + int nr, nr_threads; for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) @@ -2554,10 +2585,44 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; +#ifndef NO_PTHREADS + nr_threads = git_config_get_index_threads(); + if (nr_threads != 1) { + int ieot_blocks, cpus; +
[PATCH v8 5/7] read-cache: load cache extensions on a worker thread
From: Ben Peart This patch helps address the CPU cost of loading the index by loading the cache extensions on a worker thread in parallel with loading the cache entries. In some cases, loading the extensions takes longer than loading the cache entries so this patch utilizes the new EOIE to start the thread to load the extensions before loading all the cache entries in parallel. This is possible because the current extensions don't access the cache entries in the index_state structure so are OK that they don't all exist yet. The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED extensions don't even get a pointer to the index so don't have access to the cache entries. CACHE_EXT_LINK only uses the index_state to initialize the split index. CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last update and dirty flags. I used p0002-read-cache.sh to generate some performance data: Test w/100,000 files reduced the time by 0.53% Test w/1,000,000 files reduced the time by 27.78% Signed-off-by: Ben Peart --- read-cache.c | 95 +++- 1 file changed, 79 insertions(+), 16 deletions(-) diff --git a/read-cache.c b/read-cache.c index 4781515252..2214b3153d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -23,6 +23,7 @@ #include "split-index.h" #include "utf8.h" #include "fsmonitor.h" +#include "thread-utils.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -1890,6 +1891,44 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); +struct load_index_extensions +{ +#ifndef NO_PTHREADS + pthread_t pthread; +#endif + struct index_state *istate; + const char *mmap; + size_t mmap_size; + unsigned long src_offset; +}; + +static void *load_index_extensions(void *_data) +{ + struct load_index_extensions *p = _data; + unsigned long src_offset = p->src_offset; + + while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) { + /* After an array of active_nr index entries, +* there can be arbitrary number of extended +* sections, each of which is prefixed with +* extension name (4-byte) and section length +* in 4-byte network byte order. +*/ + uint32_t extsize = get_be32(p->mmap + src_offset + 4); + if (read_index_extension(p->istate, +p->mmap + src_offset, +p->mmap + src_offset + 8, +extsize) < 0) { + munmap((void *)p->mmap, p->mmap_size); + die(_("index file corrupt")); + } + src_offset += 8; + src_offset += extsize; + } + + return NULL; +} + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -1900,6 +1939,11 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) const char *mmap; size_t mmap_size; const struct cache_entry *previous_ce = NULL; + struct load_index_extensions p; + size_t extension_offset = 0; +#ifndef NO_PTHREADS + int nr_threads; +#endif if (istate->initialized) return istate->cache_nr; @@ -1936,6 +1980,30 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache)); istate->initialized = 1; + p.istate = istate; + p.mmap = mmap; + p.mmap_size = mmap_size; + +#ifndef NO_PTHREADS + nr_threads = git_config_get_index_threads(); + if (!nr_threads) + nr_threads = online_cpus(); + + if (nr_threads > 1) { + extension_offset = read_eoie_extension(mmap, mmap_size); + if (extension_offset) { + int err; + + p.src_offset = extension_offset; + err = pthread_create(, NULL, load_index_extensions, ); + if (err) + die(_("unable to create load_index_extensions thread: %s"), strerror(err)); + + nr_threads--; + } + } +#endif + if (istate->version == 4) { mem_pool_init(>ce_mem_pool, estimate_cache_size_from_compressed(istate->cache_nr)); @@ -1960,22 +2028,17 @@ int do_read_index(struct index_state *istate, const
[PATCH v8 7/7] read-cache: load cache entries on worker threads
From: Ben Peart This patch helps address the CPU cost of loading the index by utilizing the Index Entry Offset Table (IEOT) to divide loading and conversion of the cache entries across multiple threads in parallel. I used p0002-read-cache.sh to generate some performance data: Test w/100,000 files reduced the time by 32.24% Test w/1,000,000 files reduced the time by -4.77% Note that on the 1,000,000 files case, multi-threading the cache entry parsing does not yield a performance win. This is because the cost to parse the index extensions in this repo, far outweigh the cost of loading the cache entries. The high cost of parsing the index extensions is driven by the cache tree and the untracked cache extensions. As this is currently the longest pole, any reduction in this time will reduce the overall index load times so is worth further investigation in another patch series. Signed-off-by: Ben Peart --- read-cache.c | 230 ++- 1 file changed, 193 insertions(+), 37 deletions(-) diff --git a/read-cache.c b/read-cache.c index 3ace29d58f..7acc2c86f4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1720,7 +1720,8 @@ int read_index(struct index_state *istate) return read_index_from(istate, get_index_file(), get_git_dir()); } -static struct cache_entry *create_from_disk(struct index_state *istate, +static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, + unsigned int version, struct ondisk_cache_entry *ondisk, unsigned long *ent_size, const struct cache_entry *previous_ce) @@ -1737,7 +1738,7 @@ static struct cache_entry *create_from_disk(struct index_state *istate, * number of bytes to be stripped from the end of the previous name, * and the bytes to append to the result, to come up with its name. */ - int expand_name_field = istate->version == 4; + int expand_name_field = version == 4; /* On-disk flags are just 16 bits */ flags = get_be16(>flags); @@ -1761,16 +1762,17 @@ static struct cache_entry *create_from_disk(struct index_state *istate, const unsigned char *cp = (const unsigned char *)name; size_t strip_len, previous_len; - previous_len = previous_ce ? previous_ce->ce_namelen : 0; + /* If we're at the begining of a block, ignore the previous name */ strip_len = decode_varint(); - if (previous_len < strip_len) { - if (previous_ce) + if (previous_ce) { + previous_len = previous_ce->ce_namelen; + if (previous_len < strip_len) die(_("malformed name field in the index, near path '%s'"), - previous_ce->name); - else - die(_("malformed name field in the index in the first path")); + previous_ce->name); + copy_len = previous_len - strip_len; + } else { + copy_len = 0; } - copy_len = previous_len - strip_len; name = (const char *)cp; } @@ -1780,7 +1782,7 @@ static struct cache_entry *create_from_disk(struct index_state *istate, len += copy_len; } - ce = mem_pool__ce_alloc(istate->ce_mem_pool, len); + ce = mem_pool__ce_alloc(ce_mem_pool, len); ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec); ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec); @@ -1948,6 +1950,52 @@ static void *load_index_extensions(void *_data) return NULL; } +/* + * A helper function that will load the specified range of cache entries + * from the memory mapped file and add them to the given index. + */ +static unsigned long load_cache_entry_block(struct index_state *istate, + struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap, + unsigned long start_offset, const struct cache_entry *previous_ce) +{ + int i; + unsigned long src_offset = start_offset; + + for (i = offset; i < offset + nr; i++) { + struct ondisk_cache_entry *disk_ce; + struct cache_entry *ce; + unsigned long consumed; + + disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset); + ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, , previous_ce); + set_index_entry(istate, i, ce); + + src_offset += consumed; + previous_ce = ce; + } + return src_offset - start_offset; +}
[PATCH v8 4/7] config: add new index.threads config setting
From: Ben Peart Add support for a new index.threads config setting which will be used to control the threading code in do_read_index(). A value of 0 will tell the index code to automatically determine the correct number of threads to use. A value of 1 will make the code single threaded. A value greater than 1 will set the maximum number of threads to use. For testing purposes, this setting can be overwritten by setting the GIT_TEST_INDEX_THREADS= environment variable to a value greater than 0. Signed-off-by: Ben Peart --- Documentation/config.txt | 7 +++ config.c | 18 ++ config.h | 1 + t/README | 5 + t/t1700-split-index.sh | 5 + 5 files changed, 36 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index ad0f4510c3..8fd973b76b 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2413,6 +2413,13 @@ imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. +index.threads:: + Specifies the number of threads to spawn when loading the index. + This is meant to reduce index load time on multiprocessor machines. + Specifying 0 or 'true' will cause Git to auto-detect the number of + CPU's and set the number of threads accordingly. Specifying 1 or + 'false' will disable multithreading. Defaults to 'true'. + index.version:: Specify the version with which new index files should be initialized. This does not affect existing repositories. diff --git a/config.c b/config.c index 3461993f0a..2ee29f6f86 100644 --- a/config.c +++ b/config.c @@ -2289,6 +2289,24 @@ int git_config_get_fsmonitor(void) return 0; } +int git_config_get_index_threads(void) +{ + int is_bool, val = 0; + + val = git_env_ulong("GIT_TEST_INDEX_THREADS", 0); + if (val) + return val; + + if (!git_config_get_bool_or_int("index.threads", _bool, )) { + if (is_bool) + return val ? 0 : 1; + else + return val; + } + + return 0; /* auto */ +} + NORETURN void git_die_config_linenr(const char *key, const char *filename, int linenr) { diff --git a/config.h b/config.h index ab46e0165d..a06027e69b 100644 --- a/config.h +++ b/config.h @@ -250,6 +250,7 @@ extern int git_config_get_untracked_cache(void); extern int git_config_get_split_index(void); extern int git_config_get_max_percent_split_change(void); extern int git_config_get_fsmonitor(void); +extern int git_config_get_index_threads(void); /* This dies if the configured or default date is in the future */ extern int git_config_get_expiry(const char *key, const char **output); diff --git a/t/README b/t/README index 3ea6c85460..8f5c0620ea 100644 --- a/t/README +++ b/t/README @@ -327,6 +327,11 @@ GIT_TEST_COMMIT_GRAPH=, when true, forces the commit-graph to be written after every 'git commit' command, and overrides the 'core.commitGraph' setting to true. +GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading +of the index for the whole test suite by bypassing the default number of +cache entries and thread minimums. Setting this to 1 will make the +index loading single threaded. + Naming Tests diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 8e17f8e7a0..ef9349bd70 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -6,7 +6,12 @@ test_description='split index mode tests' # We need total control of index splitting here sane_unset GIT_TEST_SPLIT_INDEX + +# Testing a hard coded SHA against an index with an extension +# that can vary from run to run is problematic so we disable +# those extensions. sane_unset GIT_FSMONITOR_TEST +sane_unset GIT_TEST_INDEX_THREADS test_expect_success 'enable split index' ' git config splitIndex.maxPercentChange 100 && -- 2.18.0.windows.1
[PATCH v8 2/7] read-cache: clean up casting and byte decoding
From: Ben Peart This patch does a clean up pass to minimize the casting required to work with the memory mapped index (mmap). It also makes the decoding of network byte order more consistent by using get_be32() where possible. Signed-off-by: Ben Peart --- read-cache.c | 23 +++ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/read-cache.c b/read-cache.c index 583a4fb1f8..6ba99e2c96 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1650,7 +1650,7 @@ int verify_index_checksum; /* Allow fsck to force verification of the cache entry order. */ int verify_ce_order; -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(const struct cache_header *hdr, unsigned long size) { git_hash_ctx c; unsigned char hash[GIT_MAX_RAWSZ]; @@ -1674,7 +1674,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) } static int read_index_extension(struct index_state *istate, - const char *ext, void *data, unsigned long sz) + const char *ext, const char *data, unsigned long sz) { switch (CACHE_EXT(ext)) { case CACHE_EXT_TREE: @@ -1889,8 +1889,8 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) int fd, i; struct stat st; unsigned long src_offset; - struct cache_header *hdr; - void *mmap; + const struct cache_header *hdr; + const char *mmap; size_t mmap_size; const struct cache_entry *previous_ce = NULL; @@ -1918,7 +1918,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) die_errno("unable to map index file"); close(fd); - hdr = mmap; + hdr = (const struct cache_header *)mmap; if (verify_hdr(hdr, mmap_size) < 0) goto unmap; @@ -1943,7 +1943,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) struct cache_entry *ce; unsigned long consumed; - disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset); + disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset); ce = create_from_disk(istate, disk_ce, , previous_ce); set_index_entry(istate, i, ce); @@ -1961,21 +1961,20 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) * in 4-byte network byte order. */ uint32_t extsize; - memcpy(, (char *)mmap + src_offset + 4, 4); - extsize = ntohl(extsize); + extsize = get_be32(mmap + src_offset + 4); if (read_index_extension(istate, -(const char *) mmap + src_offset, -(char *) mmap + src_offset + 8, +mmap + src_offset, +mmap + src_offset + 8, extsize) < 0) goto unmap; src_offset += 8; src_offset += extsize; } - munmap(mmap, mmap_size); + munmap((void *)mmap, mmap_size); return istate->cache_nr; unmap: - munmap(mmap, mmap_size); + munmap((void *)mmap, mmap_size); die("index file corrupt"); } -- 2.18.0.windows.1
[PATCH v8 1/7] read-cache.c: optimize reading index format v4
From: Nguyễn Thái Ngọc Duy Index format v4 requires some more computation to assemble a path based on a previous one. The current code is not very efficient because - it doubles memory copy, we assemble the final path in a temporary first before putting it back to a cache_entry - strbuf_remove() in expand_name_field() is not exactly a good fit for stripping a part at the end, _setlen() would do the same job and is much cheaper. - the open-coded loop to find the end of the string in expand_name_field() can't beat an optimized strlen() This patch avoids the temporary buffer and writes directly to the new cache_entry, which addresses the first two points. The last point could also be avoided if the total string length fits in the first 12 bits of ce_flags, if not we fall back to strlen(). Running "test-tool read-cache 100" on webkit.git (275k files), reading v2 only takes 4.226 seconds, while v4 takes 5.711 seconds, 35% more time. The patch reduces read time on v4 to 4.319 seconds. Signed-off-by: Nguyễn Thái Ngọc Duy --- read-cache.c | 128 --- 1 file changed, 60 insertions(+), 68 deletions(-) diff --git a/read-cache.c b/read-cache.c index 8d04d78a58..583a4fb1f8 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1713,63 +1713,24 @@ int read_index(struct index_state *istate) return read_index_from(istate, get_index_file(), get_git_dir()); } -static struct cache_entry *cache_entry_from_ondisk(struct mem_pool *mem_pool, - struct ondisk_cache_entry *ondisk, - unsigned int flags, - const char *name, - size_t len) -{ - struct cache_entry *ce = mem_pool__ce_alloc(mem_pool, len); - - ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec); - ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec); - ce->ce_stat_data.sd_ctime.nsec = get_be32(>ctime.nsec); - ce->ce_stat_data.sd_mtime.nsec = get_be32(>mtime.nsec); - ce->ce_stat_data.sd_dev = get_be32(>dev); - ce->ce_stat_data.sd_ino = get_be32(>ino); - ce->ce_mode = get_be32(>mode); - ce->ce_stat_data.sd_uid = get_be32(>uid); - ce->ce_stat_data.sd_gid = get_be32(>gid); - ce->ce_stat_data.sd_size = get_be32(>size); - ce->ce_flags = flags & ~CE_NAMEMASK; - ce->ce_namelen = len; - ce->index = 0; - hashcpy(ce->oid.hash, ondisk->sha1); - memcpy(ce->name, name, len); - ce->name[len] = '\0'; - return ce; -} - -/* - * Adjacent cache entries tend to share the leading paths, so it makes - * sense to only store the differences in later entries. In the v4 - * on-disk format of the index, each on-disk cache entry stores the - * number of bytes to be stripped from the end of the previous name, - * and the bytes to append to the result, to come up with its name. - */ -static unsigned long expand_name_field(struct strbuf *name, const char *cp_) -{ - const unsigned char *ep, *cp = (const unsigned char *)cp_; - size_t len = decode_varint(); - - if (name->len < len) - die("malformed name field in the index"); - strbuf_remove(name, name->len - len, len); - for (ep = cp; *ep; ep++) - ; /* find the end */ - strbuf_add(name, cp, ep - cp); - return (const char *)ep + 1 - cp_; -} - -static struct cache_entry *create_from_disk(struct mem_pool *mem_pool, +static struct cache_entry *create_from_disk(struct index_state *istate, struct ondisk_cache_entry *ondisk, unsigned long *ent_size, - struct strbuf *previous_name) + const struct cache_entry *previous_ce) { struct cache_entry *ce; size_t len; const char *name; unsigned int flags; + size_t copy_len; + /* +* Adjacent cache entries tend to share the leading paths, so it makes +* sense to only store the differences in later entries. In the v4 +* on-disk format of the index, each on-disk cache entry stores the +* number of bytes to be stripped from the end of the previous name, +* and the bytes to append to the result, to come up with its name. +*/ + int expand_name_field = istate->version == 4; /* On-disk flags are just 16 bits */ flags = get_be16(>flags); @@ -1789,21 +1750,54 @@ static struct cache_entry *create_from_disk(struct mem_pool *mem_pool, else name = ondisk->name; - if (!previous_name) { - /* v3 and earlier */ - if (len == CE_NAMEMASK) - len = strlen(name); - ce = cache_entry_from_ondisk(mem_pool,
[PATCH v8 3/7] eoie: add End of Index Entry (EOIE) extension
From: Ben Peart The End of Index Entry (EOIE) is used to locate the end of the variable length index entries and the beginning of the extensions. Code can take advantage of this to quickly locate the index extensions without having to parse through all of the index entries. The EOIE extension is always written out to the index file including to the shared index when using the split index feature. Because it is always written out, the SHA checksums in t/t1700-split-index.sh were updated to reflect its inclusion. It is written as an optional extension to ensure compatibility with other git implementations that do not yet support it. It is always written out to ensure it is available as often as possible to speed up index operations. Because it must be able to be loaded before the variable length cache entries and other index extensions, this extension must be written last. The signature for this extension is { 'E', 'O', 'I', 'E' }. The extension consists of: - 32-bit offset to the end of the index entries - 160-bit SHA-1 over the extension types and their sizes (but not their contents). E.g. if we have "TREE" extension that is N-bytes long, "REUC" extension that is M-bytes long, followed by "EOIE", then the hash would be: SHA-1("TREE" + + "REUC" + ) Signed-off-by: Ben Peart --- Documentation/technical/index-format.txt | 23 read-cache.c | 158 +-- t/t1700-split-index.sh | 8 +- 3 files changed, 177 insertions(+), 12 deletions(-) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index db3572626b..6bc2d90f7f 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -314,3 +314,26 @@ The remaining data of each directory block is grouped by type: - An ewah bitmap, the n-th bit indicates whether the n-th index entry is not CE_FSMONITOR_VALID. + +== End of Index Entry + + The End of Index Entry (EOIE) is used to locate the end of the variable + length index entries and the begining of the extensions. Code can take + advantage of this to quickly locate the index extensions without having + to parse through all of the index entries. + + Because it must be able to be loaded before the variable length cache + entries and other index extensions, this extension must be written last. + The signature for this extension is { 'E', 'O', 'I', 'E' }. + + The extension consists of: + + - 32-bit offset to the end of the index entries + + - 160-bit SHA-1 over the extension types and their sizes (but not + their contents). E.g. if we have "TREE" extension that is N-bytes + long, "REUC" extension that is M-bytes long, followed by "EOIE", + then the hash would be: + + SHA-1("TREE" + + + "REUC" + ) diff --git a/read-cache.c b/read-cache.c index 6ba99e2c96..4781515252 100644 --- a/read-cache.c +++ b/read-cache.c @@ -43,6 +43,7 @@ #define CACHE_EXT_LINK 0x6c696e6b/* "link" */ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ #define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */ +#define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ @@ -1693,6 +1694,9 @@ static int read_index_extension(struct index_state *istate, case CACHE_EXT_FSMONITOR: read_fsmonitor_extension(istate, data, sz); break; + case CACHE_EXT_ENDOFINDEXENTRIES: + /* already handled in do_read_index() */ + break; default: if (*ext < 'A' || 'Z' < *ext) return error("index uses %.4s extension, which we do not understand", @@ -1883,6 +1887,9 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } +static size_t read_eoie_extension(const char *mmap, size_t mmap_size); +static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -2190,11 +2197,15 @@ static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len) return 0; } -static int write_index_ext_header(git_hash_ctx *context, int fd, - unsigned int ext, unsigned int sz) +static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx *eoie_context, + int fd, unsigned int ext, unsigned int sz) { ext = htonl(ext); sz = htonl(sz); + if (eoie_context) { +
[PATCH v8 0/7] speed up index load through parallelization
From: Ben Peart Fixed issues identified in review the most impactful probably being plugging some leaks and improved error handling. Also added better error messages and some code cleanup to code I'd touched. The biggest change in the interdiff is the impact of renaming ieot_offset to ieot_start and ieot_work to ieot_blocks in hopes of making it easier to read and understand the code. Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/6caa0bac46 Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v8 && git checkout 6caa0bac46 ### Interdiff (v7..v8): diff --git a/read-cache.c b/read-cache.c index 14402a0738..7acc2c86f4 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1901,7 +1901,7 @@ struct index_entry_offset struct index_entry_offset_table { int nr; - struct index_entry_offset entries[0]; + struct index_entry_offset entries[FLEX_ARRAY]; }; #ifndef NO_PTHREADS @@ -1935,9 +1935,7 @@ static void *load_index_extensions(void *_data) * extension name (4-byte) and section length * in 4-byte network byte order. */ - uint32_t extsize; - memcpy(, p->mmap + src_offset + 4, 4); - extsize = ntohl(extsize); + uint32_t extsize = get_be32(p->mmap + src_offset + 4); if (read_index_extension(p->istate, p->mmap + src_offset, p->mmap + src_offset + 8, @@ -2015,8 +2013,8 @@ struct load_cache_entries_thread_data int offset; const char *mmap; struct index_entry_offset_table *ieot; - int ieot_offset;/* starting index into the ieot array */ - int ieot_work; /* count of ieot entries to process */ + int ieot_start; /* starting index into the ieot array */ + int ieot_blocks;/* count of ieot entries to process */ unsigned long consumed; /* return # of bytes in index file processed */ }; @@ -2030,8 +2028,9 @@ static void *load_cache_entries_thread(void *_data) int i; /* iterate across all ieot blocks assigned to this thread */ - for (i = p->ieot_offset; i < p->ieot_offset + p->ieot_work; i++) { - p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL); + for (i = p->ieot_start; i < p->ieot_start + p->ieot_blocks; i++) { + p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, + p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL); p->offset += p->ieot->entries[i].nr; } return NULL; @@ -2040,48 +2039,45 @@ static void *load_cache_entries_thread(void *_data) static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, unsigned long src_offset, int nr_threads, struct index_entry_offset_table *ieot) { - int i, offset, ieot_work, ieot_offset, err; + int i, offset, ieot_blocks, ieot_start, err; struct load_cache_entries_thread_data *data; unsigned long consumed = 0; - int nr; /* a little sanity checking */ if (istate->name_hash_initialized) BUG("the name hash isn't thread safe"); mem_pool_init(>ce_mem_pool, 0); - data = xcalloc(nr_threads, sizeof(struct load_cache_entries_thread_data)); /* ensure we have no more threads than we have blocks to process */ if (nr_threads > ieot->nr) nr_threads = ieot->nr; - data = xcalloc(nr_threads, sizeof(struct load_cache_entries_thread_data)); + data = xcalloc(nr_threads, sizeof(*data)); - offset = ieot_offset = 0; - ieot_work = DIV_ROUND_UP(ieot->nr, nr_threads); + offset = ieot_start = 0; + ieot_blocks = DIV_ROUND_UP(ieot->nr, nr_threads); for (i = 0; i < nr_threads; i++) { struct load_cache_entries_thread_data *p = [i]; - int j; + int nr, j; - if (ieot_offset + ieot_work > ieot->nr) - ieot_work = ieot->nr - ieot_offset; + if (ieot_start + ieot_blocks > ieot->nr) + ieot_blocks = ieot->nr - ieot_start; p->istate = istate; p->offset = offset; p->mmap = mmap; p->ieot = ieot; - p->ieot_offset = ieot_offset; - p->ieot_work = ieot_work; + p->ieot_start = ieot_start; + p->ieot_blocks = ieot_blocks; /* create a mem_pool for each thread
Re: [PATCH] unpack-trees: allow missing CE_SKIP_WORKTREE objs
On 10/9/2018 5:30 AM, Junio C Hamano wrote: Jonathan Tan writes: @@ -1635,6 +1635,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options o->result.cache_tree = cache_tree(); if (!cache_tree_fully_valid(o->result.cache_tree)) cache_tree_update(>result, + WRITE_TREE_SKIP_WORKTREE_MISSING_OK | WRITE_TREE_SILENT | WRITE_TREE_REPAIR); } H. Should this be passing the bit unconditionally? Shouldn't it be set only when we are doing lazy clone? A non-lazy clone that merely uses sparse checkout has nowhere else to turn to if it loses a blob object that currently is not necessary to complete a checkout, unlike a repository with promisor. I agree. I believe this logic should only be triggered when running in a partial clone repo. Otherwise, we're potentially changing the behavior of 'normal' repos unnecessarily.
Re: [PATCH] unpack-trees: allow missing CE_SKIP_WORKTREE objs
On 10/8/2018 5:48 PM, Jonathan Tan wrote: Whenever a sparse checkout occurs, the existence of all blobs in the index is verified, whether or not they are included or excluded by the .git/info/sparse-checkout specification. This degrades performance, significantly in the case of a partial clone, because a lazy fetch occurs whenever the existence of a missing blob is checked. At the point of invoking cache_tree_update() in unpack_trees(), CE_SKIP_WORKTREE is already set on all excluded blobs (mark_new_skip_worktree() is called twice to set CE_NEW_SKIP_WORKTREE, then apply_sparse_checkout() is called which copies over CE_NEW_SKIP_WORKTREE to CE_SKIP_WORKTREE). cache_tree_update() can use this information to know which blobs are excluded, and thus skip the checking of these. Because cache_tree_update() is used from multiple places, this new behavior is guarded by a new flag WRITE_TREE_SKIP_WORKTREE_MISSING_OK. Implement this new flag, and teach unpack_trees() to invoke cache_tree_update() with this new flag. I wonder if preventing the download of all missing blobs should be limited to only the checkout command. When you looked at the other places that call cache_tree_update(), does it make sense that they trigger the download of all the missing blobs? For example, I suspect you would not want them all downloaded just to do a merge-recursive. In full disclosure, we implemented this a couple of years ago [1] for GVFS and opted to do the logic a little differently. We found that we didn't want to trigger the download of all missing blobs in cache_tree_update() for _any_ command that was executing in a partially cloned repo. This is safe because if any individual blob is actually needed, it will get downloaded "on demand" already. [1] https://github.com/Microsoft/git/commit/ec865500d98 Signed-off-by: Jonathan Tan --- cache-tree.c | 6 +- cache-tree.h | 4 t/t1090-sparse-checkout-scope.sh | 33 unpack-trees.c | 1 + 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/cache-tree.c b/cache-tree.c index 5ce51468f0..340caf2d34 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -246,6 +246,8 @@ static int update_one(struct cache_tree *it, int missing_ok = flags & WRITE_TREE_MISSING_OK; int dryrun = flags & WRITE_TREE_DRY_RUN; int repair = flags & WRITE_TREE_REPAIR; + int skip_worktree_missing_ok = + flags & WRITE_TREE_SKIP_WORKTREE_MISSING_OK; int to_invalidate = 0; int i; @@ -356,7 +358,9 @@ static int update_one(struct cache_tree *it, } if (is_null_oid(oid) || - (mode != S_IFGITLINK && !missing_ok && !has_object_file(oid))) { + (mode != S_IFGITLINK && !missing_ok && +!(skip_worktree_missing_ok && ce_skip_worktree(ce)) && +!has_object_file(oid))) { strbuf_release(); if (expected_missing) return -1; diff --git a/cache-tree.h b/cache-tree.h index 0ab6784ffe..655d487619 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -40,6 +40,10 @@ void cache_tree_verify(struct index_state *); #define WRITE_TREE_DRY_RUN 4 #define WRITE_TREE_SILENT 8 #define WRITE_TREE_REPAIR 16 +/* + * Do not check for the presence of cache entry objects with CE_SKIP_WORKTREE. + */ +#define WRITE_TREE_SKIP_WORKTREE_MISSING_OK 32 /* error return codes */ #define WRITE_TREE_UNREADABLE_INDEX (-1) diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index 25d7c700f6..090b7fc3d3 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -63,4 +63,37 @@ test_expect_success 'return to full checkout of master' ' test "$(cat b)" = "modified" ' +test_expect_success 'in partial clone, sparse checkout only fetches needed blobs' ' + test_create_repo server && + git clone "file://$(pwd)/server" client && + + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + echo a >server/a && + echo bb >server/b && + mkdir server/c && + echo ccc >server/c/c && + git -C server add a b c/c && + git -C server commit -m message && + + test_config -C client core.sparsecheckout 1 && + test_config -C client extensions.partialclone origin && + echo "!/*" >client/.git/info/sparse-checkout && + echo "/a" >>client/.git/info/sparse-checkout && + git -C client fetch --filter=blob:none origin && + git -C client checkout FETCH_HEAD && + + git -C client rev-list HEAD \ + --quiet --objects --missing=print >unsorted_actual && + ( + printf "?" && + git hash-object server/b && + printf "?" && + git
Re: t3404.6 breaks on master under GIT_FSMONITOR_TEST
On 10/8/2018 10:19 AM, Ævar Arnfjörð Bjarmason wrote: On Thu, Sep 06 2018, Ævar Arnfjörð Bjarmason wrote: On Thu, Feb 01 2018, Ævar Arnfjörð Bjarmason wrote: The GIT_FSMONITOR_TEST variable allows you to roundtrip the fsmonitor codpath in the whole test suite. On both Debian & CentOS this breaks for me: (cd t && GIT_FSMONITOR_TEST=$PWD/t7519/fsmonitor-all ./t3404-rebase-interactive.sh -i) Whereas this works: (cd t && GIT_FSMONITOR_TEST=$PWD/t7519/fsmonitor-all GIT_SKIP_TESTS=t3404.6 ./t3404-rebase-interactive.sh -i) The entirety of the rest of the test suite still passes with GIT_FSMONITOR_TEST. This has been failing ever since GIT_FSMONITOR_TEST was introduced in 883e248b8a ("fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.", 2017-09-22). Under -v -x -i: + echo test_must_fail: command succeeded: env FAKE_LINES=exec_echo_foo_>file1 1 git rebase -i HEAD^ test_must_fail: command succeeded: env FAKE_LINES=exec_echo_foo_>file1 1 git rebase -i HEAD^ + return 1 error: last command exited with $?=1 not ok 6 - rebase -i with the exec command checks tree cleanness # # git checkout master && # set_fake_editor && # test_must_fail env FAKE_LINES="exec_echo_foo_>file1 1" git rebase -i HEAD^ && Maybe once this is fixed running the test suite under GIT_FSMONITOR_TEST would be a useful Travis target, but I don't know the current status of adding new options to Travis. *Poke* at this again. Ben, or anyone else with knowledge of fsmonitor: Can you reproduce this? This failure along with the one I noted in https://public-inbox.org/git/87tvn2remn@evledraar.gmail.com/ is failing the tests on Linux when run with GIT_FSMONITOR_TEST. I'm looking at this again because SZEDER's patches to the split index reminded me again that we have these long-standing failures in rare test modes (see https://public-inbox.org/git/87va7ireuu@evledraar.gmail.com/ for the split index discussion). For what it's worth this is still broken, but more importantly (I'm not just keeping bumping the same thing) the only thing that's now broken under fsmonitor. I.e. my skip config is now GIT_SKIP_TESTS="t3404.7" whereas before 43f1180814 ("git-mv: allow submodules and fsmonitor to work together", 2018-09-10) I needed to add "t7411.3 t7411.4" to that. I glanced at this for a few minutes but it wasn't obvious what was happening. It will take some additional effort to dig into and figure out the underlying issue. I haven't forgotten about this - it's still on my list, just below some other things I need to get finished up first.
Re: [PATCH v7 7/7] read-cache: load cache entries on worker threads
On 10/1/2018 1:09 PM, Duy Nguyen wrote: On Mon, Oct 1, 2018 at 3:46 PM Ben Peart wrote: +/* + * A helper function that will load the specified range of cache entries + * from the memory mapped file and add them to the given index. + */ +static unsigned long load_cache_entry_block(struct index_state *istate, + struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap, Please use unsigned long for offset (here and in the thread_data struct). We should use off_t instead, but that's out of scope. At least keep offset type consistent in here. Unfortunately, this code is littered with different types for size and offset. "int" is the most common but there are also off_t, size_t and some unsigned long as well. Currently all of them are at least 32 bits so until we need to have an index larger than 32 bits, we should be OK. I agree, fixing them all is outside the scope of this patch. + unsigned long start_offset, const struct cache_entry *previous_ce) I don't think you want to pass previous_ce in. You always pass NULL anyway. And if this function is about loading a block (i.e. at block boundary) then initial previous_ce _must_ be NULL or things break horribly. The function as written can load any arbitrary subset of cache entries as long as previous_ce is set correctly. I currently only use it on block boundaries but I don't see any good reason to limit its capabilities by moving what code passes the NULL in one function deeper. @@ -1959,20 +2007,125 @@ static void *load_index_extensions(void *_data) #define THREAD_COST(1) +struct load_cache_entries_thread_data +{ + pthread_t pthread; + struct index_state *istate; + struct mem_pool *ce_mem_pool; + int offset; + const char *mmap; + struct index_entry_offset_table *ieot; + int ieot_offset;/* starting index into the ieot array */ If it's an index, maybe just name it ieot_index and we can get rid of the comment. + int ieot_work; /* count of ieot entries to process */ Maybe instead of saving the whole "ieot" table here. Add struct index_entry_offset *blocks; which points to the starting block for this thread and rename that mysterious (to me) ieot_work to nr_blocks. The thread will have access from blocks[0] to blocks[nr_blocks - 1] Meh. Either way you have to figure out there are a block of entries and each thread is going to process some subset of those entries. You can do the base + offset math here or down in the calling function but it has to happen (and be understood) either way. I'll rename ieot_offset to ieot_start and ieot_work to ieot_blocks which should hopefully help make it more obvious what they do. + unsigned long consumed; /* return # of bytes in index file processed */ +}; + +/* + * A thread proc to run the load_cache_entries() computation + * across multiple background threads. + */ +static void *load_cache_entries_thread(void *_data) +{ + struct load_cache_entries_thread_data *p = _data; + int i; + + /* iterate across all ieot blocks assigned to this thread */ + for (i = p->ieot_offset; i < p->ieot_offset + p->ieot_work; i++) { + p->consumed += load_cache_entry_block(p->istate, p->ce_mem_pool, p->offset, p->ieot->entries[i].nr, p->mmap, p->ieot->entries[i].offset, NULL); Please wrap this long line. + p->offset += p->ieot->entries[i].nr; + } + return NULL; +} + +static unsigned long load_cache_entries_threaded(struct index_state *istate, const char *mmap, size_t mmap_size, + unsigned long src_offset, int nr_threads, struct index_entry_offset_table *ieot) +{ + int i, offset, ieot_work, ieot_offset, err; + struct load_cache_entries_thread_data *data; + unsigned long consumed = 0; + int nr; + + /* a little sanity checking */ + if (istate->name_hash_initialized) + BUG("the name hash isn't thread safe"); + + mem_pool_init(>ce_mem_pool, 0); + data = xcalloc(nr_threads, sizeof(struct load_cache_entries_thread_data)); we normally use sizeof(*data) instead of sizeof(struct ...) + + /* ensure we have no more threads than we have blocks to process */ + if (nr_threads > ieot->nr) + nr_threads = ieot->nr; + data = xcalloc(nr_threads, sizeof(struct load_cache_entries_thread_data)); eh.. reallocate the same "data"? Thanks, good catch - I hate leaky code. + + offset = ieot_offset = 0; + ieot_work = DIV_ROUND_UP(ieot->nr, nr_threads); + for (i = 0; i < nr_threads; i++) { + struct load_cache_entries_thread_data *p = [i]; + int j; + + if (ieot_offset + ieot_work > ieot->nr) +
Re: [PATCH v7 6/7] ieot: add Index Entry Offset Table (IEOT) extension
On 10/1/2018 12:27 PM, Duy Nguyen wrote: On Mon, Oct 1, 2018 at 3:46 PM Ben Peart wrote: @@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } +struct index_entry_offset +{ + /* starting byte offset into index file, count of index entries in this block */ + int offset, nr; uint32_t? +}; + +struct index_entry_offset_table +{ + int nr; + struct index_entry_offset entries[0]; Use FLEX_ARRAY. Some compilers are not happy with an array of zero items if I remember correctly. Thanks for the warning, I'll update that. @@ -2523,6 +2551,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; int drop_cache_tree = istate->drop_cache_tree; off_t offset; + int ieot_work = 1; + struct index_entry_offset_table *ieot = NULL; + int nr; There are a bunch of stuff going on in this function, maybe rename this to nr_threads or nr_blocks to be less generic. I can add a nr_threads variable to make this more obvious. for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) @@ -2556,7 +2587,38 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; +#ifndef NO_PTHREADS + if ((nr = git_config_get_index_threads()) != 1) { Maybe keep this assignment out of "if". + int ieot_blocks, cpus; + + /* +* ensure default number of ieot blocks maps evenly to the +* default number of threads that will process them +*/ + if (!nr) { + ieot_blocks = istate->cache_nr / THREAD_COST; + cpus = online_cpus(); + if (ieot_blocks > cpus - 1) + ieot_blocks = cpus - 1; The " - 1" here is for extension thread, yes? Probably worth a comment. + } else { + ieot_blocks = nr; + } + + /* +* no reason to write out the IEOT extension if we don't +* have enough blocks to utilize multi-threading +*/ + if (ieot_blocks > 1) { + ieot = xcalloc(1, sizeof(struct index_entry_offset_table) + + (ieot_blocks * sizeof(struct index_entry_offset))); Use FLEX_ALLOC_MEM() after you declare ..._table with FLEX_ARRAY. FLEX_ALLOC_MEM() is focused on variable length "char" data. All uses of FLEX_ARRAY with non char data did the allocation themselves to avoid the unnecessary memcpy() that comes with FLEX_ALLOC_MEM. This ieot needs to be freed also and should be before any "return -1" in this function. Good catch. Will do. + ieot->nr = 0; + ieot_work = DIV_ROUND_UP(entries, ieot_blocks); Perhaps a better name for ioet_work? This looks like the number of cache entries per block. + } + } +#endif + offset = lseek(newfd, 0, SEEK_CUR) + write_buffer_len; + nr = 0; Eh.. repurpose nr to count cache entries now? It's kinda hard to follow. previous_name = (hdr_version == 4) ? _name_buf : NULL; for (i = 0; i < entries; i++) { @@ -2578,11 +2640,31 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, drop_cache_tree = 1; } + if (ieot && i && (i % ieot_work == 0)) { + ieot->entries[ieot->nr].nr = nr; + ieot->entries[ieot->nr].offset = offset; + ieot->nr++; + /* +* If we have a V4 index, set the first byte to an invalid +* character to ensure there is nothing common with the previous +* entry +*/ + if (previous_name) + previous_name->buf[0] = 0; + nr = 0; + offset = lseek(newfd, 0, SEEK_CUR) + write_buffer_len; This only works correctly if the ce_write_entry() from the last iteration has flushed everything to out to newfd. Maybe it does, but it's error prone to rely on that in my opinion. Maybe we need an explicit ce_write_flush() here to make sure. This logic already takes any unflushed data into account - the offset is what has been flushed to disk (lseek) plus the amount still in the buffer (write_buffer_len) waiting to be flushed. I don
Re: [PATCH v7 3/7] eoie: add End of Index Entry (EOIE) extension
On 10/1/2018 11:30 AM, Duy Nguyen wrote: On Mon, Oct 1, 2018 at 3:46 PM Ben Peart wrote: @@ -2479,6 +2491,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; + offset = lseek(newfd, 0, SEEK_CUR) + write_buffer_len; Note, lseek() could in theory return -1 on error. Looking at the error code list in the man page it's pretty unlikely though, unless Good catch. I'll add the logic to check for an error. +static size_t read_eoie_extension(const char *mmap, size_t mmap_size) +{ + /* +* The end of index entries (EOIE) extension is guaranteed to be last +* so that it can be found by scanning backwards from the EOF. +* +* "EOIE" +* <4-byte length> +* <4-byte offset> +* <20-byte hash> +*/ + const char *index, *eoie; + uint32_t extsize; + size_t offset, src_offset; + unsigned char hash[GIT_MAX_RAWSZ]; + git_hash_ctx c; + + /* ensure we have an index big enough to contain an EOIE extension */ + if (mmap_size < sizeof(struct cache_header) + EOIE_SIZE_WITH_HEADER + the_hash_algo->rawsz) Using sizeof() for on-disk structures could be dangerous because you don't know how much padding there could be (I'm not sure if it's actually specified in the C language spec). I've checked, on at least x86 and amd64, sizeof(struct cache_header) is 12 bytes, but I don't know if there are any crazy architectures out there that set higher padding. This must be safe as the same code has been in do_read_index() and verify_index_from() for a long time.
Re: [PATCH v7 5/7] read-cache: load cache extensions on a worker thread
On 10/1/2018 11:50 AM, Duy Nguyen wrote: On Mon, Oct 1, 2018 at 3:46 PM Ben Peart wrote: @@ -1890,6 +1891,46 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); +struct load_index_extensions +{ +#ifndef NO_PTHREADS + pthread_t pthread; +#endif + struct index_state *istate; + const char *mmap; + size_t mmap_size; + unsigned long src_offset; +}; + +static void *load_index_extensions(void *_data) +{ + struct load_index_extensions *p = _data; + unsigned long src_offset = p->src_offset; + + while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) { + /* After an array of active_nr index entries, +* there can be arbitrary number of extended +* sections, each of which is prefixed with +* extension name (4-byte) and section length +* in 4-byte network byte order. +*/ + uint32_t extsize; + memcpy(, p->mmap + src_offset + 4, 4); + extsize = ntohl(extsize); This could be get_be32() so that the next person will not need to do another cleanup patch. Good point, it was existing code so I focused on doing the minimal change possible but I can clean it up since I'm touching it already. + if (read_index_extension(p->istate, + p->mmap + src_offset, + p->mmap + src_offset + 8, + extsize) < 0) { This alignment is misleading because the conditions are aligned with the code block below. If you can't align it with the '(', then just add another tab. Ditto. I'll make it: uint32_t extsize = get_be32(p->mmap + src_offset + 4); if (read_index_extension(p->istate, p->mmap + src_offset, p->mmap + src_offset + 8, extsize) < 0) { munmap((void *)p->mmap, p->mmap_size); die(_("index file corrupt")); } + munmap((void *)p->mmap, p->mmap_size); This made me pause for a bit since we should not need to cast back to void *. It turns out you need this because mmap pointer is const. But you don't even need to munmap here. We're dying, the OS will clean everything up. I had the same thought about "we're about to die so why bother calling munmap() here" but I decided rather than change it, I'd follow the existing pattern just in case there was some platform/bug that required it. I apparently doesn't cause harm as it's been that way a long time. + die(_("index file corrupt")); + } + src_offset += 8; + src_offset += extsize; + } + + return NULL; +}
Re: [PATCH v7 3/7] eoie: add End of Index Entry (EOIE) extension
On 10/1/2018 11:17 AM, SZEDER Gábor wrote: On Mon, Oct 01, 2018 at 09:45:52AM -0400, Ben Peart wrote: From: Ben Peart The End of Index Entry (EOIE) is used to locate the end of the variable length index entries and the beginning of the extensions. Code can take advantage of this to quickly locate the index extensions without having to parse through all of the index entries. Because it must be able to be loaded before the variable length cache entries and other index extensions, this extension must be written last. The signature for this extension is { 'E', 'O', 'I', 'E' }. The extension consists of: - 32-bit offset to the end of the index entries - 160-bit SHA-1 over the extension types and their sizes (but not their contents). E.g. if we have "TREE" extension that is N-bytes long, "REUC" extension that is M-bytes long, followed by "EOIE", then the hash would be: SHA-1("TREE" + + "REUC" + ) Signed-off-by: Ben Peart I think the commit message should explicitly mention that this this extension - will always be written and why, - but is optional, so other Git implementations not supporting it will have no troubles reading the index, - and that it is written even to the shared index and why, and that because of this the index checksums in t1700 had to be updated. Sure, I'll add that additional information to the commit message on the next spin.
[PATCH v7 6/7] ieot: add Index Entry Offset Table (IEOT) extension
From: Ben Peart This patch enables addressing the CPU cost of loading the index by adding additional data to the index that will allow us to efficiently multi- thread the loading and conversion of cache entries. It accomplishes this by adding an (optional) index extension that is a table of offsets to blocks of cache entries in the index file. To make this work for V4 indexes, when writing the cache entries, it periodically "resets" the prefix-compression by encoding the current entry as if the path name for the previous entry is completely different and saves the offset of that entry in the IEOT. Basically, with V4 indexes, it generates offsets into blocks of prefix-compressed entries. Signed-off-by: Ben Peart --- Documentation/technical/index-format.txt | 18 +++ read-cache.c | 173 +++ 2 files changed, 191 insertions(+) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 6bc2d90f7f..7c4d67aa6a 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -337,3 +337,21 @@ The remaining data of each directory block is grouped by type: SHA-1("TREE" + + "REUC" + ) + +== Index Entry Offset Table + + The Index Entry Offset Table (IEOT) is used to help address the CPU + cost of loading the index by enabling multi-threading the process of + converting cache entries from the on-disk format to the in-memory format. + The signature for this extension is { 'I', 'E', 'O', 'T' }. + + The extension consists of: + + - 32-bit version (currently 1) + + - A number of index offset entries each consisting of: + +- 32-bit offset from the begining of the file to the first cache entry + in this block of entries. + +- 32-bit count of cache entries in this block diff --git a/read-cache.c b/read-cache.c index 77083ab8bb..9557376e78 100644 --- a/read-cache.c +++ b/read-cache.c @@ -45,6 +45,7 @@ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ #define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */ #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */ +#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ @@ -1696,6 +1697,7 @@ static int read_index_extension(struct index_state *istate, read_fsmonitor_extension(istate, data, sz); break; case CACHE_EXT_ENDOFINDEXENTRIES: + case CACHE_EXT_INDEXENTRYOFFSETTABLE: /* already handled in do_read_index() */ break; default: @@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } +struct index_entry_offset +{ + /* starting byte offset into index file, count of index entries in this block */ + int offset, nr; +}; + +struct index_entry_offset_table +{ + int nr; + struct index_entry_offset entries[0]; +}; + +#ifndef NO_PTHREADS +static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset); +static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot); +#endif + static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); @@ -1931,6 +1950,15 @@ static void *load_index_extensions(void *_data) return NULL; } +/* + * Mostly randomly chosen maximum thread counts: we + * cap the parallelism to online_cpus() threads, and we want + * to have at least 1 cache entries per thread for it to + * be worth starting a thread. + */ + +#define THREAD_COST(1) + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -2523,6 +2551,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; int drop_cache_tree = istate->drop_cache_tree; off_t offset; + int ieot_work = 1; + struct index_entry_offset_table *ieot = NULL; + int nr; for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) @@ -2556,7 +2587,38 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; +#ifndef NO_PTHREADS + if ((nr = git_config_get_index_threads()) != 1) { + int ieot_blocks, cpus; + + /* +* ensure default number of ieot blocks
[PATCH v7 5/7] read-cache: load cache extensions on a worker thread
From: Ben Peart This patch helps address the CPU cost of loading the index by loading the cache extensions on a worker thread in parallel with loading the cache entries. In some cases, loading the extensions takes longer than loading the cache entries so this patch utilizes the new EOIE to start the thread to load the extensions before loading all the cache entries in parallel. This is possible because the current extensions don't access the cache entries in the index_state structure so are OK that they don't all exist yet. The CACHE_EXT_TREE, CACHE_EXT_RESOLVE_UNDO, and CACHE_EXT_UNTRACKED extensions don't even get a pointer to the index so don't have access to the cache entries. CACHE_EXT_LINK only uses the index_state to initialize the split index. CACHE_EXT_FSMONITOR only uses the index_state to save the fsmonitor last update and dirty flags. I used p0002-read-cache.sh to generate some performance data: Test w/100,000 files reduced the time by 0.53% Test w/1,000,000 files reduced the time by 27.78% Signed-off-by: Ben Peart --- read-cache.c | 97 +++- 1 file changed, 81 insertions(+), 16 deletions(-) diff --git a/read-cache.c b/read-cache.c index af2605a168..77083ab8bb 100644 --- a/read-cache.c +++ b/read-cache.c @@ -23,6 +23,7 @@ #include "split-index.h" #include "utf8.h" #include "fsmonitor.h" +#include "thread-utils.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -1890,6 +1891,46 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); +struct load_index_extensions +{ +#ifndef NO_PTHREADS + pthread_t pthread; +#endif + struct index_state *istate; + const char *mmap; + size_t mmap_size; + unsigned long src_offset; +}; + +static void *load_index_extensions(void *_data) +{ + struct load_index_extensions *p = _data; + unsigned long src_offset = p->src_offset; + + while (src_offset <= p->mmap_size - the_hash_algo->rawsz - 8) { + /* After an array of active_nr index entries, +* there can be arbitrary number of extended +* sections, each of which is prefixed with +* extension name (4-byte) and section length +* in 4-byte network byte order. +*/ + uint32_t extsize; + memcpy(, p->mmap + src_offset + 4, 4); + extsize = ntohl(extsize); + if (read_index_extension(p->istate, + p->mmap + src_offset, + p->mmap + src_offset + 8, + extsize) < 0) { + munmap((void *)p->mmap, p->mmap_size); + die(_("index file corrupt")); + } + src_offset += 8; + src_offset += extsize; + } + + return NULL; +} + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -1900,6 +1941,11 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) const char *mmap; size_t mmap_size; const struct cache_entry *previous_ce = NULL; + struct load_index_extensions p; + size_t extension_offset = 0; +#ifndef NO_PTHREADS + int nr_threads; +#endif if (istate->initialized) return istate->cache_nr; @@ -1936,6 +1982,30 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache)); istate->initialized = 1; + p.istate = istate; + p.mmap = mmap; + p.mmap_size = mmap_size; + +#ifndef NO_PTHREADS + nr_threads = git_config_get_index_threads(); + if (!nr_threads) + nr_threads = online_cpus(); + + if (nr_threads > 1) { + extension_offset = read_eoie_extension(mmap, mmap_size); + if (extension_offset) { + int err; + + p.src_offset = extension_offset; + err = pthread_create(, NULL, load_index_extensions, ); + if (err) + die(_("unable to create load_index_extensions thread: %s"), strerror(err)); + + nr_threads--; + } + } +#endif + if (istate->version == 4) { mem_pool_init(>ce_mem_pool, estimate_cache_size_from_compressed(istate->cache_nr)); @@ -1960,22 +2030,17 @@ int do_read_index(struct index_st
[PATCH v7 4/7] config: add new index.threads config setting
From: Ben Peart Add support for a new index.threads config setting which will be used to control the threading code in do_read_index(). A value of 0 will tell the index code to automatically determine the correct number of threads to use. A value of 1 will make the code single threaded. A value greater than 1 will set the maximum number of threads to use. For testing purposes, this setting can be overwritten by setting the GIT_TEST_INDEX_THREADS= environment variable to a value greater than 0. Signed-off-by: Ben Peart --- Documentation/config.txt | 7 +++ config.c | 18 ++ config.h | 1 + t/README | 5 + t/t1700-split-index.sh | 5 + 5 files changed, 36 insertions(+) diff --git a/Documentation/config.txt b/Documentation/config.txt index ad0f4510c3..8fd973b76b 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2413,6 +2413,13 @@ imap:: The configuration variables in the 'imap' section are described in linkgit:git-imap-send[1]. +index.threads:: + Specifies the number of threads to spawn when loading the index. + This is meant to reduce index load time on multiprocessor machines. + Specifying 0 or 'true' will cause Git to auto-detect the number of + CPU's and set the number of threads accordingly. Specifying 1 or + 'false' will disable multithreading. Defaults to 'true'. + index.version:: Specify the version with which new index files should be initialized. This does not affect existing repositories. diff --git a/config.c b/config.c index 3461993f0a..2ee29f6f86 100644 --- a/config.c +++ b/config.c @@ -2289,6 +2289,24 @@ int git_config_get_fsmonitor(void) return 0; } +int git_config_get_index_threads(void) +{ + int is_bool, val = 0; + + val = git_env_ulong("GIT_TEST_INDEX_THREADS", 0); + if (val) + return val; + + if (!git_config_get_bool_or_int("index.threads", _bool, )) { + if (is_bool) + return val ? 0 : 1; + else + return val; + } + + return 0; /* auto */ +} + NORETURN void git_die_config_linenr(const char *key, const char *filename, int linenr) { diff --git a/config.h b/config.h index ab46e0165d..a06027e69b 100644 --- a/config.h +++ b/config.h @@ -250,6 +250,7 @@ extern int git_config_get_untracked_cache(void); extern int git_config_get_split_index(void); extern int git_config_get_max_percent_split_change(void); extern int git_config_get_fsmonitor(void); +extern int git_config_get_index_threads(void); /* This dies if the configured or default date is in the future */ extern int git_config_get_expiry(const char *key, const char **output); diff --git a/t/README b/t/README index 3ea6c85460..8f5c0620ea 100644 --- a/t/README +++ b/t/README @@ -327,6 +327,11 @@ GIT_TEST_COMMIT_GRAPH=, when true, forces the commit-graph to be written after every 'git commit' command, and overrides the 'core.commitGraph' setting to true. +GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading +of the index for the whole test suite by bypassing the default number of +cache entries and thread minimums. Setting this to 1 will make the +index loading single threaded. + Naming Tests diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 8e17f8e7a0..ef9349bd70 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -6,7 +6,12 @@ test_description='split index mode tests' # We need total control of index splitting here sane_unset GIT_TEST_SPLIT_INDEX + +# Testing a hard coded SHA against an index with an extension +# that can vary from run to run is problematic so we disable +# those extensions. sane_unset GIT_FSMONITOR_TEST +sane_unset GIT_TEST_INDEX_THREADS test_expect_success 'enable split index' ' git config splitIndex.maxPercentChange 100 && -- 2.18.0.windows.1
[PATCH v7 7/7] read-cache: load cache entries on worker threads
From: Ben Peart This patch helps address the CPU cost of loading the index by utilizing the Index Entry Offset Table (IEOT) to divide loading and conversion of the cache entries across multiple threads in parallel. I used p0002-read-cache.sh to generate some performance data: Test w/100,000 files reduced the time by 32.24% Test w/1,000,000 files reduced the time by -4.77% Note that on the 1,000,000 files case, multi-threading the cache entry parsing does not yield a performance win. This is because the cost to parse the index extensions in this repo, far outweigh the cost of loading the cache entries. The high cost of parsing the index extensions is driven by the cache tree and the untracked cache extensions. As this is currently the longest pole, any reduction in this time will reduce the overall index load times so is worth further investigation in another patch series. Signed-off-by: Ben Peart --- read-cache.c | 224 +++ 1 file changed, 189 insertions(+), 35 deletions(-) diff --git a/read-cache.c b/read-cache.c index 9557376e78..14402a0738 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1720,7 +1720,8 @@ int read_index(struct index_state *istate) return read_index_from(istate, get_index_file(), get_git_dir()); } -static struct cache_entry *create_from_disk(struct index_state *istate, +static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, + unsigned int version, struct ondisk_cache_entry *ondisk, unsigned long *ent_size, const struct cache_entry *previous_ce) @@ -1737,7 +1738,7 @@ static struct cache_entry *create_from_disk(struct index_state *istate, * number of bytes to be stripped from the end of the previous name, * and the bytes to append to the result, to come up with its name. */ - int expand_name_field = istate->version == 4; + int expand_name_field = version == 4; /* On-disk flags are just 16 bits */ flags = get_be16(>flags); @@ -1761,16 +1762,17 @@ static struct cache_entry *create_from_disk(struct index_state *istate, const unsigned char *cp = (const unsigned char *)name; size_t strip_len, previous_len; - previous_len = previous_ce ? previous_ce->ce_namelen : 0; + /* If we're at the begining of a block, ignore the previous name */ strip_len = decode_varint(); - if (previous_len < strip_len) { - if (previous_ce) + if (previous_ce) { + previous_len = previous_ce->ce_namelen; + if (previous_len < strip_len) die(_("malformed name field in the index, near path '%s'"), - previous_ce->name); - else - die(_("malformed name field in the index in the first path")); + previous_ce->name); + copy_len = previous_len - strip_len; + } else { + copy_len = 0; } - copy_len = previous_len - strip_len; name = (const char *)cp; } @@ -1780,7 +1782,7 @@ static struct cache_entry *create_from_disk(struct index_state *istate, len += copy_len; } - ce = mem_pool__ce_alloc(istate->ce_mem_pool, len); + ce = mem_pool__ce_alloc(ce_mem_pool, len); ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec); ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec); @@ -1950,6 +1952,52 @@ static void *load_index_extensions(void *_data) return NULL; } +/* + * A helper function that will load the specified range of cache entries + * from the memory mapped file and add them to the given index. + */ +static unsigned long load_cache_entry_block(struct index_state *istate, + struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap, + unsigned long start_offset, const struct cache_entry *previous_ce) +{ + int i; + unsigned long src_offset = start_offset; + + for (i = offset; i < offset + nr; i++) { + struct ondisk_cache_entry *disk_ce; + struct cache_entry *ce; + unsigned long consumed; + + disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset); + ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, , previous_ce); + set_index_entry(istate, i, ce); + + src_offset += consumed; + previous_ce = ce; + } + return src_offset - start_offset; +}
[PATCH v7 0/7] speed up index load through parallelization
Thanks for all the feedback. The biggest change since the last version is how this patch series interacts with the split-index feature. With a split index, most of the cache entries are stored in the shared index so would benefit from multi-threaded parsing. To enable that, the EOIE and IEOT extensions are now written into the shared index (rather than being stripped out like the other extensions). Because of this, I can now update the tests in t1700-split-index.sh to have updated SHA values that include the EOIE extension instead of disabling the extension. Using p0002-read-cache.sh to generate some performance numbers shows how each of the various patches contribute to the overall performance win on a particularly large repo. Repo w/3M files Baseline Optimize V4 Extensions Entries -- 0002.1: read_cache 693.29655.65 -5.4% 470.71 -32.1% 399.62 -42.4% Note how this cuts nearly 300ms off the index load time! Base Ref: master Web-Diff: https://github.com/benpeart/git/commit/c1125a5d9a Checkout: git fetch https://github.com/benpeart/git read-index-multithread-v7 && git checkout c1125a5d9a ### Patches Ben Peart (6): read-cache: clean up casting and byte decoding eoie: add End of Index Entry (EOIE) extension config: add new index.threads config setting read-cache: load cache extensions on a worker thread ieot: add Index Entry Offset Table (IEOT) extension read-cache: load cache entries on worker threads Nguyễn Thái Ngọc Duy (1): read-cache.c: optimize reading index format v4 Documentation/config.txt | 7 + Documentation/technical/index-format.txt | 41 ++ config.c | 18 + config.h | 1 + read-cache.c | 749 +++ t/README | 5 + t/t1700-split-index.sh | 13 +- 7 files changed, 715 insertions(+), 119 deletions(-) base-commit: fe8321ec057f9231c26c29b364721568e58040f7 -- 2.18.0.windows.1
[PATCH v7 2/7] read-cache: clean up casting and byte decoding
From: Ben Peart This patch does a clean up pass to minimize the casting required to work with the memory mapped index (mmap). It also makes the decoding of network byte order more consistent by using get_be32() where possible. Signed-off-by: Ben Peart --- read-cache.c | 23 +++ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/read-cache.c b/read-cache.c index 583a4fb1f8..6ba99e2c96 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1650,7 +1650,7 @@ int verify_index_checksum; /* Allow fsck to force verification of the cache entry order. */ int verify_ce_order; -static int verify_hdr(struct cache_header *hdr, unsigned long size) +static int verify_hdr(const struct cache_header *hdr, unsigned long size) { git_hash_ctx c; unsigned char hash[GIT_MAX_RAWSZ]; @@ -1674,7 +1674,7 @@ static int verify_hdr(struct cache_header *hdr, unsigned long size) } static int read_index_extension(struct index_state *istate, - const char *ext, void *data, unsigned long sz) + const char *ext, const char *data, unsigned long sz) { switch (CACHE_EXT(ext)) { case CACHE_EXT_TREE: @@ -1889,8 +1889,8 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) int fd, i; struct stat st; unsigned long src_offset; - struct cache_header *hdr; - void *mmap; + const struct cache_header *hdr; + const char *mmap; size_t mmap_size; const struct cache_entry *previous_ce = NULL; @@ -1918,7 +1918,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) die_errno("unable to map index file"); close(fd); - hdr = mmap; + hdr = (const struct cache_header *)mmap; if (verify_hdr(hdr, mmap_size) < 0) goto unmap; @@ -1943,7 +1943,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) struct cache_entry *ce; unsigned long consumed; - disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset); + disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset); ce = create_from_disk(istate, disk_ce, , previous_ce); set_index_entry(istate, i, ce); @@ -1961,21 +1961,20 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) * in 4-byte network byte order. */ uint32_t extsize; - memcpy(, (char *)mmap + src_offset + 4, 4); - extsize = ntohl(extsize); + extsize = get_be32(mmap + src_offset + 4); if (read_index_extension(istate, -(const char *) mmap + src_offset, -(char *) mmap + src_offset + 8, +mmap + src_offset, +mmap + src_offset + 8, extsize) < 0) goto unmap; src_offset += 8; src_offset += extsize; } - munmap(mmap, mmap_size); + munmap((void *)mmap, mmap_size); return istate->cache_nr; unmap: - munmap(mmap, mmap_size); + munmap((void *)mmap, mmap_size); die("index file corrupt"); } -- 2.18.0.windows.1
[PATCH v7 3/7] eoie: add End of Index Entry (EOIE) extension
From: Ben Peart The End of Index Entry (EOIE) is used to locate the end of the variable length index entries and the beginning of the extensions. Code can take advantage of this to quickly locate the index extensions without having to parse through all of the index entries. Because it must be able to be loaded before the variable length cache entries and other index extensions, this extension must be written last. The signature for this extension is { 'E', 'O', 'I', 'E' }. The extension consists of: - 32-bit offset to the end of the index entries - 160-bit SHA-1 over the extension types and their sizes (but not their contents). E.g. if we have "TREE" extension that is N-bytes long, "REUC" extension that is M-bytes long, followed by "EOIE", then the hash would be: SHA-1("TREE" + + "REUC" + ) Signed-off-by: Ben Peart --- Documentation/technical/index-format.txt | 23 read-cache.c | 152 +-- t/t1700-split-index.sh | 8 +- 3 files changed, 171 insertions(+), 12 deletions(-) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index db3572626b..6bc2d90f7f 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -314,3 +314,26 @@ The remaining data of each directory block is grouped by type: - An ewah bitmap, the n-th bit indicates whether the n-th index entry is not CE_FSMONITOR_VALID. + +== End of Index Entry + + The End of Index Entry (EOIE) is used to locate the end of the variable + length index entries and the begining of the extensions. Code can take + advantage of this to quickly locate the index extensions without having + to parse through all of the index entries. + + Because it must be able to be loaded before the variable length cache + entries and other index extensions, this extension must be written last. + The signature for this extension is { 'E', 'O', 'I', 'E' }. + + The extension consists of: + + - 32-bit offset to the end of the index entries + + - 160-bit SHA-1 over the extension types and their sizes (but not + their contents). E.g. if we have "TREE" extension that is N-bytes + long, "REUC" extension that is M-bytes long, followed by "EOIE", + then the hash would be: + + SHA-1("TREE" + + + "REUC" + ) diff --git a/read-cache.c b/read-cache.c index 6ba99e2c96..af2605a168 100644 --- a/read-cache.c +++ b/read-cache.c @@ -43,6 +43,7 @@ #define CACHE_EXT_LINK 0x6c696e6b/* "link" */ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ #define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */ +#define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ @@ -1693,6 +1694,9 @@ static int read_index_extension(struct index_state *istate, case CACHE_EXT_FSMONITOR: read_fsmonitor_extension(istate, data, sz); break; + case CACHE_EXT_ENDOFINDEXENTRIES: + /* already handled in do_read_index() */ + break; default: if (*ext < 'A' || 'Z' < *ext) return error("index uses %.4s extension, which we do not understand", @@ -1883,6 +1887,9 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } +static size_t read_eoie_extension(const char *mmap, size_t mmap_size); +static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -2190,11 +2197,15 @@ static int ce_write(git_hash_ctx *context, int fd, void *data, unsigned int len) return 0; } -static int write_index_ext_header(git_hash_ctx *context, int fd, - unsigned int ext, unsigned int sz) +static int write_index_ext_header(git_hash_ctx *context, git_hash_ctx *eoie_context, + int fd, unsigned int ext, unsigned int sz) { ext = htonl(ext); sz = htonl(sz); + if (eoie_context) { + the_hash_algo->update_fn(eoie_context, , 4); + the_hash_algo->update_fn(eoie_context, , 4); + } return ((ce_write(context, fd, , 4) < 0) || (ce_write(context, fd, , 4) < 0)) ? -1 : 0; } @@ -2437,7 +2448,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, { uint64_t start = getnanotime(); int newfd = tempfile->fd; - git_hash_ctx c;
[PATCH v7 1/7] read-cache.c: optimize reading index format v4
From: Nguyễn Thái Ngọc Duy Index format v4 requires some more computation to assemble a path based on a previous one. The current code is not very efficient because - it doubles memory copy, we assemble the final path in a temporary first before putting it back to a cache_entry - strbuf_remove() in expand_name_field() is not exactly a good fit for stripping a part at the end, _setlen() would do the same job and is much cheaper. - the open-coded loop to find the end of the string in expand_name_field() can't beat an optimized strlen() This patch avoids the temporary buffer and writes directly to the new cache_entry, which addresses the first two points. The last point could also be avoided if the total string length fits in the first 12 bits of ce_flags, if not we fall back to strlen(). Running "test-tool read-cache 100" on webkit.git (275k files), reading v2 only takes 4.226 seconds, while v4 takes 5.711 seconds, 35% more time. The patch reduces read time on v4 to 4.319 seconds. Signed-off-by: Nguyễn Thái Ngọc Duy --- read-cache.c | 128 --- 1 file changed, 60 insertions(+), 68 deletions(-) diff --git a/read-cache.c b/read-cache.c index 8d04d78a58..583a4fb1f8 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1713,63 +1713,24 @@ int read_index(struct index_state *istate) return read_index_from(istate, get_index_file(), get_git_dir()); } -static struct cache_entry *cache_entry_from_ondisk(struct mem_pool *mem_pool, - struct ondisk_cache_entry *ondisk, - unsigned int flags, - const char *name, - size_t len) -{ - struct cache_entry *ce = mem_pool__ce_alloc(mem_pool, len); - - ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec); - ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec); - ce->ce_stat_data.sd_ctime.nsec = get_be32(>ctime.nsec); - ce->ce_stat_data.sd_mtime.nsec = get_be32(>mtime.nsec); - ce->ce_stat_data.sd_dev = get_be32(>dev); - ce->ce_stat_data.sd_ino = get_be32(>ino); - ce->ce_mode = get_be32(>mode); - ce->ce_stat_data.sd_uid = get_be32(>uid); - ce->ce_stat_data.sd_gid = get_be32(>gid); - ce->ce_stat_data.sd_size = get_be32(>size); - ce->ce_flags = flags & ~CE_NAMEMASK; - ce->ce_namelen = len; - ce->index = 0; - hashcpy(ce->oid.hash, ondisk->sha1); - memcpy(ce->name, name, len); - ce->name[len] = '\0'; - return ce; -} - -/* - * Adjacent cache entries tend to share the leading paths, so it makes - * sense to only store the differences in later entries. In the v4 - * on-disk format of the index, each on-disk cache entry stores the - * number of bytes to be stripped from the end of the previous name, - * and the bytes to append to the result, to come up with its name. - */ -static unsigned long expand_name_field(struct strbuf *name, const char *cp_) -{ - const unsigned char *ep, *cp = (const unsigned char *)cp_; - size_t len = decode_varint(); - - if (name->len < len) - die("malformed name field in the index"); - strbuf_remove(name, name->len - len, len); - for (ep = cp; *ep; ep++) - ; /* find the end */ - strbuf_add(name, cp, ep - cp); - return (const char *)ep + 1 - cp_; -} - -static struct cache_entry *create_from_disk(struct mem_pool *mem_pool, +static struct cache_entry *create_from_disk(struct index_state *istate, struct ondisk_cache_entry *ondisk, unsigned long *ent_size, - struct strbuf *previous_name) + const struct cache_entry *previous_ce) { struct cache_entry *ce; size_t len; const char *name; unsigned int flags; + size_t copy_len; + /* +* Adjacent cache entries tend to share the leading paths, so it makes +* sense to only store the differences in later entries. In the v4 +* on-disk format of the index, each on-disk cache entry stores the +* number of bytes to be stripped from the end of the previous name, +* and the bytes to append to the result, to come up with its name. +*/ + int expand_name_field = istate->version == 4; /* On-disk flags are just 16 bits */ flags = get_be16(>flags); @@ -1789,21 +1750,54 @@ static struct cache_entry *create_from_disk(struct mem_pool *mem_pool, else name = ondisk->name; - if (!previous_name) { - /* v3 and earlier */ - if (len == CE_NAMEMASK) - len = strlen(name); - ce = cache_entry_from_ondisk(mem_pool,
Re: [PATCH v6 4/7] config: add new index.threads config setting
On 9/28/2018 6:15 PM, Junio C Hamano wrote: Ramsay Jones writes: if (!nr) { ieot_blocks = istate->cache_nr / THREAD_COST; - if (ieot_blocks < 1) - ieot_blocks = 1; cpus = online_cpus(); if (ieot_blocks > cpus - 1) ieot_blocks = cpus - 1; So, am I reading this correctly - you need cpus > 2 before an IEOT extension block is written out? OK. Why should we be even calling online_cpus() in this codepath to write the index in a single thread to begin with? The number of cpus that readers would use to read this index file has nothing to do with the number of cpus available to this particular writer process. As I mentioned in my other reply, this is optimizing for the most common case where the index is read from the same machine that wrote it and the user is taking the default settings (ie index.threads=true). Aligning the number of blocks to the number of threads that will be processing them avoids situations where one thread may have up to double the work to do as the other threads (for example, if there were 3 blocks to be processed by 2 threads).
Re: [PATCH v6 4/7] config: add new index.threads config setting
On 9/28/2018 1:07 PM, Junio C Hamano wrote: Ben Peart writes: Why does multithreading have to be disabled in this test? If multi-threading is enabled, it will write out the IEOT extension which changes the SHA and causes the test to fail. I think it is a design mistake to let the writing processes's capability decide what is written in the file to be read later by a different process, which possibly may have different capability. If you are not writing with multiple threads, it should not matter if that writer process is capable of and configured to spawn 8 threads if the process were reading the file---as it is not reading the file it is writing right now. I can understand if the design is to write IEOT only if the resulting index is expected to become large enough (above an arbitrary threshold like 100k entries) to matter. I also can understand if IEOT is omitted when the repository configuration says that no process is allowed to read the index with multi-threaded codepath in that repository. There are two different paths which determine how many blocks are written to the IEOT. The first is the default path. On this path, the number of blocks is determined by the number of cache entries divided by the THREAD_COST. If there are sufficient entries to make it faster to use threading, then it will automatically use enough blocks to optimize the performance of reading the entries across multiple threads. I currently cap the maximum number of blocks to be the number of cores that would be available to process them on that same machine purely as an optimization. The majority of the time, the index will be read from the same machine that it was written on so this works well. Before I added that logic, you would usually end up with more blocks than available threads which meant some threads had more to do than the other threads and resulted in worse performance. For example, 4 blocks across 3 threads results in the 1st thread having twice as much work to do as the other threads. If the index is copied to a machine with a different number of cores, it will still all work - it just may not be optimal for that machine. This is self correcting because as soon as the index is written out, it will be optimized for that machine. If the "automatically try to make it perform optimally" logic doesn't work for some reason, we have path #2. The second path is when the user specifies a specific number of blocks via the GIT_TEST_INDEX_THREADS= environment variable or the index.threads= config setting. If they ask for n blocks, they will get n blocks. This is the "I know what I'm doing and want to control the behavior" path. I just added one additional test (see patch below) to avoid a divide by zero bug and simplify things a bit. With this change, if there are fewer than two blocks, the IEOT extension is not written out as it isn't needed. The load would be single threaded anyway so there is no reason to write out a IEOT extensions that won't be used. diff --git a/read-cache.c b/read-cache.c index f5d766088d..a1006fa824 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2751,18 +2751,23 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfil e, */ if (!nr) { ieot_blocks = istate->cache_nr / THREAD_COST; - if (ieot_blocks < 1) - ieot_blocks = 1; cpus = online_cpus(); if (ieot_blocks > cpus - 1) ieot_blocks = cpus - 1; } else { ieot_blocks = nr; } - ieot = xcalloc(1, sizeof(struct index_entry_offset_table) - + (ieot_blocks * sizeof(struct index_entry_offset))); - ieot->nr = 0; - ieot_work = DIV_ROUND_UP(entries, ieot_blocks); + + /* +* no reason to write out the IEOT extension if we don't +* have enough blocks to utilize multi-threading +*/ + if (ieot_blocks > 1) { + ieot = xcalloc(1, sizeof(struct index_entry_offset_table) + + (ieot_blocks * sizeof(struct index_entry_offset))); + ieot->nr = 0; + ieot_work = DIV_ROUND_UP(entries, ieot_blocks); + } } #endif
Re: [PATCH v6 3/7] eoie: add End of Index Entry (EOIE) extension
On 9/27/2018 8:19 PM, SZEDER Gábor wrote: On Wed, Sep 26, 2018 at 03:54:38PM -0400, Ben Peart wrote: The End of Index Entry (EOIE) is used to locate the end of the variable Nit: perhaps start with: The End of Index Entry (EOIE) optional extension can be used to ... to make it clearer for those who don't immediately realize the significance of the upper case 'E' in the extension's signature. length index entries and the beginning of the extensions. Code can take advantage of this to quickly locate the index extensions without having to parse through all of the index entries. Because it must be able to be loaded before the variable length cache entries and other index extensions, this extension must be written last. The signature for this extension is { 'E', 'O', 'I', 'E' }. The extension consists of: - 32-bit offset to the end of the index entries - 160-bit SHA-1 over the extension types and their sizes (but not their contents). E.g. if we have "TREE" extension that is N-bytes long, "REUC" extension that is M-bytes long, followed by "EOIE", then the hash would be: SHA-1("TREE" + + "REUC" + ) Signed-off-by: Ben Peart --- Documentation/technical/index-format.txt | 23 read-cache.c | 151 +-- t/README | 5 + t/t1700-split-index.sh | 1 + 4 files changed, 172 insertions(+), 8 deletions(-) diff --git a/t/README b/t/README index 3ea6c85460..aa33ac4f26 100644 --- a/t/README +++ b/t/README @@ -327,6 +327,11 @@ GIT_TEST_COMMIT_GRAPH=, when true, forces the commit-graph to be written after every 'git commit' command, and overrides the 'core.commitGraph' setting to true. +GIT_TEST_DISABLE_EOIE= disables writing the EOIE extension. +This is used to allow tests 1, 4-9 in t1700-split-index.sh to succeed +as they currently hard code SHA values for the index which are no longer +valid due to the addition of the EOIE extension. Is this extension enabled by default? The commit message doesn't explicitly say so, but I don't see any way to turn it on or off, while there is this new GIT_TEST environment variable to disable it for one particular test, so it seems so. If that's indeed the case, then wouldn't it be better to update those hard-coded SHA1 values in t1700 instead? Yes, it is enabled by default and the only way to disable it is the GIT_TEST_DISABLE_EOIE environment variable. The tests in t1700-split-index.sh assume that there are no extensions in the index file so anything that adds an extension, will break one or more of the tests. First in 'enable split index', they hard code SHA values assuming there are no extensions. If some option adds an extension, these hard coded values no longer match and the test fails. Later in 'disable split index' they save off the SHA of the index with split-index turned off and then in later tests, compare it to the SHA of the shared index. Because extensions are stripped when the shared index is written out this only works if there were not extensions in the original index. I'll document this behavior and reasoning in the test directly. This did cause me to reexamine how EOIE and IEOT behave when split index is turned on. These two extensions help most with a large index. When split index is turned on, the large index is actually the shared index as the index is now the smaller set of deltas. Currently, the extensions are stripped out of the shared index which means they are not available when they are needed to quickly load the shared index. I'll see if I can update the patch so that these extensions are still written out and available in the shared index to speed up when it is loaded. Thanks! Naming Tests diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index be22398a85..1f168378c8 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -7,6 +7,7 @@ test_description='split index mode tests' # We need total control of index splitting here sane_unset GIT_TEST_SPLIT_INDEX sane_unset GIT_FSMONITOR_TEST +GIT_TEST_DISABLE_EOIE=true; export GIT_TEST_DISABLE_EOIE test_expect_success 'enable split index' ' git config splitIndex.maxPercentChange 100 && -- 2.18.0.windows.1
Re: [PATCH v3 3/5] fsmonitor: update GIT_TEST_FSMONITOR support
On 9/28/2018 10:21 AM, Ben Peart wrote: On 9/28/2018 6:01 AM, SZEDER Gábor wrote: On Tue, Sep 18, 2018 at 11:29:35PM +, Ben Peart wrote: diff --git a/t/README b/t/README index 56a417439c..47165f7eab 100644 --- a/t/README +++ b/t/README @@ -319,6 +319,10 @@ GIT_TEST_OE_DELTA_SIZE= exercises the uncommon pack-objects code path where deltas larger than this limit require extra memory allocation for bookkeeping. +GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor +code path for utilizing a file system monitor to speed up detecting +new or changed files. Here you tell us to set GIT_TEST_FSMONITOR to an absolute path, and we are good to go. diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index 756beb0d8e..d77012ea6d 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -8,7 +8,7 @@ test_description='git status with file system watcher' # To run the entire git test suite using fsmonitor: # # copy t/t7519/fsmonitor-all to a location in your path and then set -# GIT_FSMONITOR_TEST=fsmonitor-all and run your tests. +# GIT_TEST_FSMONITOR=fsmonitor-all and run your tests. But this old comment is different, suggesting copying that script to our $PATH. I prefer your instructions above, because it's only a single step, and, more importantly, it won't pollute my $PATH. I think this comment should be updated to make the advices in both places consistent. Or perhaps even removed, now that all GIT_TEST variables are documented in the same place? I prefer the suggestion to simply remove this text from the test script now that there is documentation for it in the t/README file. Junio, can you squash in the following patch or would you prefer I reroll the entire series? Thanks, Ben From 393007340dc1baf3539ab727e0a8128e7c408a27 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Fri, 28 Sep 2018 10:23:18 -0400 Subject: fixup! fsmonitor: remove outdated instructions from test Remove the outdated instructions on how to run the test suite utilizing fsmonitor now that it is properly documented in t/README. Signed-off-by: Ben Peart --- Notes: Base Ref: git-test-cleanup-v3 Web-Diff: https://github.com/benpeart/git/commit/393007340d Checkout: git fetch https://github.com/benpeart/git git-test-cleanup-v1 && git checkout 393007340d t/t7519-status-fsmonitor.sh | 7 --- 1 file changed, 7 deletions(-) diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index 8308d6d5b1..3f0dd98010 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -4,13 +4,6 @@ test_description='git status with file system watcher' . ./test-lib.sh -# -# To run the entire git test suite using fsmonitor: -# -# copy t/t7519/fsmonitor-all to a location in your path and then set -# GIT_TEST_FSMONITOR=fsmonitor-all and run your tests. -# - # Note, after "git reset --hard HEAD" no extensions exist other than 'TREE' # "git update-index --fsmonitor" can be used to get the extension written # before testing the results. base-commit: 043246d9369fb851c5c2b922466f77fc7ef0327b -- 2.18.0.windows.1
Re: [PATCH v3 3/5] fsmonitor: update GIT_TEST_FSMONITOR support
On 9/28/2018 6:01 AM, SZEDER Gábor wrote: On Tue, Sep 18, 2018 at 11:29:35PM +, Ben Peart wrote: diff --git a/t/README b/t/README index 56a417439c..47165f7eab 100644 --- a/t/README +++ b/t/README @@ -319,6 +319,10 @@ GIT_TEST_OE_DELTA_SIZE= exercises the uncommon pack-objects code path where deltas larger than this limit require extra memory allocation for bookkeeping. +GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor +code path for utilizing a file system monitor to speed up detecting +new or changed files. Here you tell us to set GIT_TEST_FSMONITOR to an absolute path, and we are good to go. diff --git a/t/t7519-status-fsmonitor.sh b/t/t7519-status-fsmonitor.sh index 756beb0d8e..d77012ea6d 100755 --- a/t/t7519-status-fsmonitor.sh +++ b/t/t7519-status-fsmonitor.sh @@ -8,7 +8,7 @@ test_description='git status with file system watcher' # To run the entire git test suite using fsmonitor: # # copy t/t7519/fsmonitor-all to a location in your path and then set -# GIT_FSMONITOR_TEST=fsmonitor-all and run your tests. +# GIT_TEST_FSMONITOR=fsmonitor-all and run your tests. But this old comment is different, suggesting copying that script to our $PATH. I prefer your instructions above, because it's only a single step, and, more importantly, it won't pollute my $PATH. I think this comment should be updated to make the advices in both places consistent. Or perhaps even removed, now that all GIT_TEST variables are documented in the same place? I prefer the suggestion to simply remove this text from the test script now that there is documentation for it in the t/README file.
Re: [PATCH v6 4/7] config: add new index.threads config setting
On 9/27/2018 8:26 PM, SZEDER Gábor wrote: On Wed, Sep 26, 2018 at 03:54:39PM -0400, Ben Peart wrote: Add support for a new index.threads config setting which will be used to control the threading code in do_read_index(). A value of 0 will tell the index code to automatically determine the correct number of threads to use. A value of 1 will make the code single threaded. A value greater than 1 will set the maximum number of threads to use. For testing purposes, this setting can be overwritten by setting the GIT_TEST_INDEX_THREADS= environment variable to a value greater than 0. Signed-off-by: Ben Peart --- diff --git a/t/README b/t/README index aa33ac4f26..0fcecf4500 100644 --- a/t/README +++ b/t/README @@ -332,6 +332,11 @@ This is used to allow tests 1, 4-9 in t1700-split-index.sh to succeed as they currently hard code SHA values for the index which are no longer valid due to the addition of the EOIE extension. +GIT_TEST_INDEX_THREADS= enables exercising the multi-threaded loading +of the index for the whole test suite by bypassing the default number of +cache entries and thread minimums. Settting this to 1 will make the s/ttt/tt/ +index loading single threaded. + Naming Tests diff --git a/t/t1700-split-index.sh b/t/t1700-split-index.sh index 1f168378c8..ab205954cf 100755 --- a/t/t1700-split-index.sh +++ b/t/t1700-split-index.sh @@ -8,6 +8,7 @@ test_description='split index mode tests' sane_unset GIT_TEST_SPLIT_INDEX sane_unset GIT_FSMONITOR_TEST GIT_TEST_DISABLE_EOIE=true; export GIT_TEST_DISABLE_EOIE +GIT_TEST_INDEX_THREADS=1; export GIT_TEST_INDEX_THREADS Why does multithreading have to be disabled in this test? If multi-threading is enabled, it will write out the IEOT extension which changes the SHA and causes the test to fail. I will update the logic in this case to not write out the IEOT extension as it isn't needed. test_expect_success 'enable split index' ' git config splitIndex.maxPercentChange 100 && -- 2.18.0.windows.1
Re: [PATCH] read-cache: fix division by zero core-dump
On 9/27/2018 6:24 PM, Ramsay Jones wrote: commit 225df8a468 ("ieot: add Index Entry Offset Table (IEOT) extension", 2018-09-26) added a 'DIV_ROUND_UP(entries, ieot_blocks) expression, where ieot_blocks was set to zero for a single cpu platform. This caused an SIGFPE and a core dump in practically every test in the test-suite, until test t4056-diff-order.sh, which then went into an infinite loop! Signed-off-by: Ramsay Jones --- Hi Ben, Could you please squash this into the relevant commits on your 'bp/read-cache-parallel' branch. (The first hunk fixes a sparse warning about using an integer as a NULL pointer). Absolutely - thanks for the patch. I don't know how long it's been since I've been on a single core CPU - I'm sad for you. ;-) Thanks! ATB, Ramsay Jones read-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/read-cache.c b/read-cache.c index 6755d58877..40f096f70a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -2141,7 +2141,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) size_t extension_offset = 0; #ifndef NO_PTHREADS int nr_threads, cpus; - struct index_entry_offset_table *ieot = 0; + struct index_entry_offset_table *ieot = NULL; #endif if (istate->initialized) @@ -2771,7 +2771,7 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ieot_blocks < 1) ieot_blocks = 1; cpus = online_cpus(); - if (ieot_blocks > cpus - 1) + if (cpus > 1 && ieot_blocks > cpus - 1) ieot_blocks = cpus - 1; } else { ieot_blocks = nr;
Re: Git Test Coverage Report (Tuesday, Sept 25)
On 9/26/2018 2:54 PM, Derrick Stolee wrote: On 9/26/2018 2:43 PM, Thomas Gummerer wrote: On 09/26, Derrick Stolee wrote: This is a bit tricky to do, but I will investigate. For some things, the values can conflict with each other (GIT_TEST_SPLIT_INDEX doesn't play nicely with other index options, I think). Just commenting on this point. I think all the index options should be playing nicely with eachother. I occasionally run the test suite with some of them turned on, and if something failed that was always an actual bug. The different modes can be used in different combinations in the wild as well, so we should get them to interact nicely in the test suite. Thanks! I'm still working out details on this, since the test suite is broken with GIT_TEST_COMMIT_GRAPH due to a verbosity issue [1], which I forgot until I ran the tests with the variable on. I'll re-run with these variables: GIT_TEST_SPLIT_INDEX=1 GIT_TEST_FULL_IN_PACK_ARRAY=1 GIT_TEST_OE_SIZE=128 GIT_TEST_OE_DELTA_SIZE=128 GIT_TEST_VALIDATE_INDEX_CACHE_ENTRIES=1 GIT_TEST_FSMONITOR=$PWD/t/t7519/fsmonitor-all GIT_TEST_INDEX_VERSION=4 GIT_TEST_PRELOAD_INDEX=1 GIT_TEST_DISABLE_EOIE=1 GIT_TEST_INDEX_THREADS=1 Because the test repos are so small (ie smaller than 10K files), the test suite already executes as if GIT_TEST_INDEX_THREADS=1 is set (ie single threaded). I added the test variable so that the multi-threading code could be forced to execute in spite of the default minimum number of files logic. I'd recommend you set GIT_TEST_INDEX_THREADS=3 instead. (3 because 1 thread is used for loading the index extensions and we need 2 or more threads available to exercise multi-threaded loading of the index entries). Thanks, -Stolee [1] https://public-inbox.org/git/60aae3d6-35b2-94fb-afd7-6978e935a...@gmail.com/
Re: Git Test Coverage Report (Tuesday, Sept 25)
On 9/26/2018 2:44 PM, Derrick Stolee wrote: On 9/26/2018 1:59 PM, Junio C Hamano wrote: Derrick Stolee writes: In an effort to ensure new code is reasonably covered by the test suite, we now have contrib/coverage-diff.sh to combine the gcov output from 'make coverage-test ; make coverage-report' with the output from 'git diff A B' to discover _new_ lines of code that are not covered. This report takes the output of these results after running on four branches: pu: 80e728fa913dc3a1165b6dec9a7afa6052a86325 jch: 0c10634844314ab89666ed0a1c7d36dde7ac9689 next: 76f2f5c1e34c4dbef1029e2984c2892894c444ce master: fe8321ec057f9231c26c29b364721568e58040f7 master@{1}: 2d3b1c576c85b7f5db1f418907af00ab88e0c303 I ran the test suite on each of these branches on an Ubuntu Linux VM, and I'm missing some dependencies (like apache, svn, and perforce) so not all tests are run. Thanks. master@{1}..master: builtin/remote.c 5025425dfff ( Shulhan 2018-09-13 20:18:33 +0700 864) return error(_("No such remote: '%s'"), name); commit-reach.c b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700 559) continue; b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700 569) from->objects[i].item->flags |= assign_flag; b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700 570) continue; b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700 576) result = 0; b67f6b26e35 (Derrick Stolee 2018-09-21 08:05:26 -0700 577) goto cleanup; cat: compat#mingw.c.gcov: No such file or directory ll-merge.c d64324cb60e (Torsten Bögershausen 2018-09-12 21:32:02 +0200 379) marker_size = DEFAULT_CONFLICT_MARKER_SIZE; remote-curl.c c3b9bc94b9b (Elijah Newren 2018-09-05 10:03:07 -0700 181) options.filter = xstrdup(value); As I think the data presented here is very valuable, let me ignore the findings of this specific run (which will be fixed by individual authors as/if necessary), and focus on the way the data is presented (which will affect the ease of consumption by authors of future commits). These wrapped blame output lines are harder to view. Can we have this in plain/text without format=flowed at least? Perhaps removing the middle columns of data and just " ) " would be easier? We could also remove tabs to save space. For example: builtin/remote.c 5025425dfff 864) return error(_("No such remote: '%s'"), name); commit-reach.c b67f6b26e35 559) continue; b67f6b26e35 569) from->objects[i].item->flags |= assign_flag; b67f6b26e35 570) continue; b67f6b26e35 576) result = 0; b67f6b26e35 577) goto cleanup; ll-merge.c d64324cb60e 379) marker_size = DEFAULT_CONFLICT_MARKER_SIZE; remote-curl.c c3b9bc94b9b 181) options.filter = xstrdup(value); This does still pad the data by a bit, but should be more readable. Most "uncovered" code will be indented at least one level. We do lose the author information, but keen readers could identify code they are interested in by filename and then look up the commit by OID later. I personally find the author data very useful as it makes it trivial for me to scan for and find changes I'm responsible for. Just scanning the output of the mail and looking for file names I may have changed lately is much more laborious - meaning I'm much less likely to actually do it :-). Perhaps a reasonable compromise would be to put the author name once with the block of changes (like you are doing for the file name) rather than on every line that changed and wasn't executed. I personally do not mind a monospaced and non-wrapping website, just I do not mind visiting travis-ci.org for recent results from time to time. Others may differ. There is an error message from "cat" in it, by the way. Thanks! I'll add an 'if' statement when there is no gcov file. This happens for the compat layers that are not compiled in and for the t/helper directory, it seems. preload-index.c ae9af12287b (Nguyễn Thái Ngọc Duy 2018-09-15 19:56:04 +0200 73) struct progress_data *pd = p->progress; ae9af12287b (Nguyễn Thái Ngọc Duy 2018-09-15 19:56:04 +0200 75) pthread_mutex_lock(>mutex); ae9af12287b (Nguyễn Thái Ngọc Duy 2018-09-15 19:56:04 +0200 76) pd->n += last_nr - nr; ae9af12287b (Nguyễn Thái Ngọc Duy 2018-09-15 19:56:04 +0200 77) display_progress(pd->progress, pd->n); ae9af12287b (Nguyễn Thái Ngọc Duy 2018-09-15 19:56:04 +0200 78) pthread_mutex_unlock(>mutex); ae9af12287b (Nguyễn Thái Ngọc Duy 2018-09-15 19:56:04 +0200 79) last_nr = nr; I wonder how much we can save the effort that is needed to scan the output if we somehow coalesce these consecutive lines that are attributed to the same commit. It could be possible to group consecutive lines together, but hopefully reducing
[PATCH v6 7/7] read-cache: load cache entries on worker threads
This patch helps address the CPU cost of loading the index by utilizing the Index Entry Offset Table (IEOT) to divide loading and conversion of the cache entries across multiple threads in parallel. I used p0002-read-cache.sh to generate some performance data: Test w/100,000 files reduced the time by 32.24% Test w/1,000,000 files reduced the time by -4.77% Note that on the 1,000,000 files case, multi-threading the cache entry parsing does not yield a performance win. This is because the cost to parse the index extensions in this repo, far outweigh the cost of loading the cache entries. The high cost of parsing the index extensions is driven by the cache tree and the untracked cache extensions. As this is currently the longest pole, any reduction in this time will reduce the overall index load times so is worth further investigation in another patch series. Signed-off-by: Ben Peart --- read-cache.c | 224 +++ 1 file changed, 189 insertions(+), 35 deletions(-) diff --git a/read-cache.c b/read-cache.c index 9b0554d4e6..f5d766088d 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1720,7 +1720,8 @@ int read_index(struct index_state *istate) return read_index_from(istate, get_index_file(), get_git_dir()); } -static struct cache_entry *create_from_disk(struct index_state *istate, +static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, + unsigned int version, struct ondisk_cache_entry *ondisk, unsigned long *ent_size, const struct cache_entry *previous_ce) @@ -1737,7 +1738,7 @@ static struct cache_entry *create_from_disk(struct index_state *istate, * number of bytes to be stripped from the end of the previous name, * and the bytes to append to the result, to come up with its name. */ - int expand_name_field = istate->version == 4; + int expand_name_field = version == 4; /* On-disk flags are just 16 bits */ flags = get_be16(>flags); @@ -1761,16 +1762,17 @@ static struct cache_entry *create_from_disk(struct index_state *istate, const unsigned char *cp = (const unsigned char *)name; size_t strip_len, previous_len; - previous_len = previous_ce ? previous_ce->ce_namelen : 0; + /* If we're at the begining of a block, ignore the previous name */ strip_len = decode_varint(); - if (previous_len < strip_len) { - if (previous_ce) + if (previous_ce) { + previous_len = previous_ce->ce_namelen; + if (previous_len < strip_len) die(_("malformed name field in the index, near path '%s'"), - previous_ce->name); - else - die(_("malformed name field in the index in the first path")); + previous_ce->name); + copy_len = previous_len - strip_len; + } else { + copy_len = 0; } - copy_len = previous_len - strip_len; name = (const char *)cp; } @@ -1780,7 +1782,7 @@ static struct cache_entry *create_from_disk(struct index_state *istate, len += copy_len; } - ce = mem_pool__ce_alloc(istate->ce_mem_pool, len); + ce = mem_pool__ce_alloc(ce_mem_pool, len); ce->ce_stat_data.sd_ctime.sec = get_be32(>ctime.sec); ce->ce_stat_data.sd_mtime.sec = get_be32(>mtime.sec); @@ -1950,6 +1952,52 @@ static void *load_index_extensions(void *_data) return NULL; } +/* + * A helper function that will load the specified range of cache entries + * from the memory mapped file and add them to the given index. + */ +static unsigned long load_cache_entry_block(struct index_state *istate, + struct mem_pool *ce_mem_pool, int offset, int nr, const char *mmap, + unsigned long start_offset, const struct cache_entry *previous_ce) +{ + int i; + unsigned long src_offset = start_offset; + + for (i = offset; i < offset + nr; i++) { + struct ondisk_cache_entry *disk_ce; + struct cache_entry *ce; + unsigned long consumed; + + disk_ce = (struct ondisk_cache_entry *)(mmap + src_offset); + ce = create_from_disk(ce_mem_pool, istate->version, disk_ce, , previous_ce); + set_index_entry(istate, i, ce); + + src_offset += consumed; + previous_ce = ce; + } + return src_offset - start_offset; +} + +stati
[PATCH v6 6/7] ieot: add Index Entry Offset Table (IEOT) extension
This patch enables addressing the CPU cost of loading the index by adding additional data to the index that will allow us to efficiently multi- thread the loading and conversion of cache entries. It accomplishes this by adding an (optional) index extension that is a table of offsets to blocks of cache entries in the index file. To make this work for V4 indexes, when writing the cache entries, it periodically "resets" the prefix-compression by encoding the current entry as if the path name for the previous entry is completely different and saves the offset of that entry in the IEOT. Basically, with V4 indexes, it generates offsets into blocks of prefix-compressed entries. Signed-off-by: Ben Peart --- Documentation/technical/index-format.txt | 18 +++ read-cache.c | 166 +++ 2 files changed, 184 insertions(+) diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt index 6bc2d90f7f..7c4d67aa6a 100644 --- a/Documentation/technical/index-format.txt +++ b/Documentation/technical/index-format.txt @@ -337,3 +337,21 @@ The remaining data of each directory block is grouped by type: SHA-1("TREE" + + "REUC" + ) + +== Index Entry Offset Table + + The Index Entry Offset Table (IEOT) is used to help address the CPU + cost of loading the index by enabling multi-threading the process of + converting cache entries from the on-disk format to the in-memory format. + The signature for this extension is { 'I', 'E', 'O', 'T' }. + + The extension consists of: + + - 32-bit version (currently 1) + + - A number of index offset entries each consisting of: + +- 32-bit offset from the begining of the file to the first cache entry + in this block of entries. + +- 32-bit count of cache entries in this block diff --git a/read-cache.c b/read-cache.c index 8da21c9273..9b0554d4e6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -45,6 +45,7 @@ #define CACHE_EXT_UNTRACKED 0x554E5452 /* "UNTR" */ #define CACHE_EXT_FSMONITOR 0x46534D4E /* "FSMN" */ #define CACHE_EXT_ENDOFINDEXENTRIES 0x454F4945 /* "EOIE" */ +#define CACHE_EXT_INDEXENTRYOFFSETTABLE 0x49454F54 /* "IEOT" */ /* changes that can be kept in $GIT_DIR/index (basically all extensions) */ #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \ @@ -1696,6 +1697,7 @@ static int read_index_extension(struct index_state *istate, read_fsmonitor_extension(istate, data, sz); break; case CACHE_EXT_ENDOFINDEXENTRIES: + case CACHE_EXT_INDEXENTRYOFFSETTABLE: /* already handled in do_read_index() */ break; default: @@ -1888,6 +1890,23 @@ static size_t estimate_cache_size(size_t ondisk_size, unsigned int entries) return ondisk_size + entries * per_entry; } +struct index_entry_offset +{ + /* starting byte offset into index file, count of index entries in this block */ + int offset, nr; +}; + +struct index_entry_offset_table +{ + int nr; + struct index_entry_offset entries[0]; +}; + +#ifndef NO_PTHREADS +static struct index_entry_offset_table *read_ieot_extension(const char *mmap, size_t mmap_size, size_t offset); +static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_table *ieot); +#endif + static size_t read_eoie_extension(const char *mmap, size_t mmap_size); static void write_eoie_extension(struct strbuf *sb, git_hash_ctx *eoie_context, size_t offset); @@ -1931,6 +1950,15 @@ static void *load_index_extensions(void *_data) return NULL; } +/* + * Mostly randomly chosen maximum thread counts: we + * cap the parallelism to online_cpus() threads, and we want + * to have at least 1 cache entries per thread for it to + * be worth starting a thread. + */ + +#define THREAD_COST(1) + /* remember to discard_cache() before reading a different cache! */ int do_read_index(struct index_state *istate, const char *path, int must_exist) { @@ -2523,6 +2551,9 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, struct strbuf previous_name_buf = STRBUF_INIT, *previous_name; int drop_cache_tree = istate->drop_cache_tree; off_t offset; + int ieot_work = 1; + struct index_entry_offset_table *ieot = NULL; + int nr; for (i = removed = extended = 0; i < entries; i++) { if (cache[i]->ce_flags & CE_REMOVE) @@ -2556,7 +2587,33 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile, if (ce_write(, newfd, , sizeof(hdr)) < 0) return -1; +#ifndef NO_PTHREADS + if (!strip_extensions && (nr = git_config_get_index_threads()) != 1) { + int ieot_blocks, cpus; + + /* +* ensure default number of ieot