[PATCH v3 1/2] builtin/config.c: treat type specifiers singularly
Internally, we represent `git config`'s type specifiers as a bitset using OPT_BIT. 'bool' is 1<<0, 'int' is 1<<1, and so on. This technique allows for the representation of multiple type specifiers in the `int types` field, but this multi-representation is left unused. In fact, `git config` will not accept multiple type specifiers at a time, as indicated by: $ git config --int --bool some.section error: only one type at a time. This patch uses `OPT_SET_INT` to prefer the _last_ mentioned type specifier, so that the above command would instead be valid, and a synonym of: $ git config --bool some.section This change is motivated by two urges: (1) it does not make sense to represent a singular type specifier internally as a bitset, only to complain when there are multiple bits in the set. `OPT_SET_INT` is more well-suited to this task than `OPT_BIT` is. (2) a future patch will introduce `--type=`, and we would like not to complain in the following situation: $ git config --int --type=int Signed-off-by: Taylor Blau --- builtin/config.c | 49 +++--- t/t1300-repo-config.sh | 11 ++ 2 files changed, 33 insertions(+), 27 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index 01169dd62..92fb8d56b 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -25,7 +25,7 @@ static char term = '\n'; static int use_global_config, use_system_config, use_local_config; static struct git_config_source given_config_source; -static int actions, types; +static int actions, type; static int end_null; static int respect_includes_opt = -1; static struct config_options config_options; @@ -55,11 +55,11 @@ static int show_origin; #define PAGING_ACTIONS (ACTION_LIST | ACTION_GET_ALL | \ ACTION_GET_REGEXP | ACTION_GET_URLMATCH) -#define TYPE_BOOL (1<<0) -#define TYPE_INT (1<<1) -#define TYPE_BOOL_OR_INT (1<<2) -#define TYPE_PATH (1<<3) -#define TYPE_EXPIRY_DATE (1<<4) +#define TYPE_BOOL 1 +#define TYPE_INT 2 +#define TYPE_BOOL_OR_INT 3 +#define TYPE_PATH 4 +#define TYPE_EXPIRY_DATE 5 static struct option builtin_config_options[] = { OPT_GROUP(N_("Config file location")), @@ -84,11 +84,11 @@ static struct option builtin_config_options[] = { OPT_BIT(0, "get-color", &actions, N_("find the color configured: slot [default]"), ACTION_GET_COLOR), OPT_BIT(0, "get-colorbool", &actions, N_("find the color setting: slot [stdout-is-tty]"), ACTION_GET_COLORBOOL), OPT_GROUP(N_("Type")), - OPT_BIT(0, "bool", &types, N_("value is \"true\" or \"false\""), TYPE_BOOL), - OPT_BIT(0, "int", &types, N_("value is decimal number"), TYPE_INT), - OPT_BIT(0, "bool-or-int", &types, N_("value is --bool or --int"), TYPE_BOOL_OR_INT), - OPT_BIT(0, "path", &types, N_("value is a path (file or directory name)"), TYPE_PATH), - OPT_BIT(0, "expiry-date", &types, N_("value is an expiry date"), TYPE_EXPIRY_DATE), + OPT_SET_INT(0, "bool", &type, N_("value is \"true\" or \"false\""), TYPE_BOOL), + OPT_SET_INT(0, "int", &type, N_("value is decimal number"), TYPE_INT), + OPT_SET_INT(0, "bool-or-int", &type, N_("value is --bool or --int"), TYPE_BOOL_OR_INT), + OPT_SET_INT(0, "path", &type, N_("value is a path (file or directory name)"), TYPE_PATH), + OPT_SET_INT(0, "expiry-date", &type, N_("value is an expiry date"), TYPE_EXPIRY_DATE), OPT_GROUP(N_("Other")), OPT_BOOL('z', "null", &end_null, N_("terminate values with NUL byte")), OPT_BOOL(0, "name-only", &omit_values, N_("show variable names only")), @@ -149,26 +149,26 @@ static int format_config(struct strbuf *buf, const char *key_, const char *value if (show_keys) strbuf_addch(buf, key_delim); - if (types == TYPE_INT) + if (type == TYPE_INT) strbuf_addf(buf, "%"PRId64, git_config_int64(key_, value_ ? value_ : "")); - else if (types == TYPE_BOOL) + else if (type == TYPE_BOOL) strbuf_addstr(buf, git_config_bool(key_, value_) ? "true" : "false"); - else if (types == TYPE_BOOL_OR_INT) { + else if (type == TYPE_BOOL_OR_INT) { int is_bool, v; v = git_config_bool_or_int(key_, value_, &is_bool); if (is_bool) strbuf_addstr(buf, v ? "true" : "false"); else strbuf_addf(buf, "%d", v); - } else if (types == TYPE_PATH) { + } else if (type == TYPE_PATH) { const char *v; if (git_config_pathname(&v, key_, value_) < 0) return -1;
[PATCH v3 2/2] builtin/config.c: prefer `--type=bool` over `--bool`, etc.
`git config` has long allowed the ability for callers to provide a 'type specifier', which instructs `git config` to (1) ensure that incoming values are satisfiable under that type, and (2) that outgoing values are canonicalized under that type. In another series, we propose to add extend this functionality with `--color` and `--default` to replace `--get-color`. However, we traditionally use `--color` to mean "colorize this output", instead of "this value should be treated as a color". Currently, `git config` does not support this kind of colorization, but we should be careful to avoid inhabiting this option too soon, so that `git config` can support `--color` (in the traditional sense) in the future, if that is desired. In this patch, we prefer `--type=[int|bool|bool-or-int|...]` over `--int`, `--bool`, and etc. This allows the aforementioned other patch to add `--color` (in the non-traditional sense) via `--type=color`, instead of `--color`. Signed-off-by: Taylor Blau --- Documentation/git-config.txt | 66 +++- builtin/config.c | 28 +++ t/t1300-repo-config.sh | 18 ++ 3 files changed, 80 insertions(+), 32 deletions(-) diff --git a/Documentation/git-config.txt b/Documentation/git-config.txt index e09ed5d7d..86c9b 100644 --- a/Documentation/git-config.txt +++ b/Documentation/git-config.txt @@ -9,13 +9,13 @@ git-config - Get and set repository or global options SYNOPSIS [verse] -'git config' [] [type] [--show-origin] [-z|--null] name [value [value_regex]] -'git config' [] [type] --add name value -'git config' [] [type] --replace-all name value [value_regex] -'git config' [] [type] [--show-origin] [-z|--null] --get name [value_regex] -'git config' [] [type] [--show-origin] [-z|--null] --get-all name [value_regex] -'git config' [] [type] [--show-origin] [-z|--null] [--name-only] --get-regexp name_regex [value_regex] -'git config' [] [type] [-z|--null] --get-urlmatch name URL +'git config' [] [--type] [--show-origin] [-z|--null] name [value [value_regex]] +'git config' [] [--type] --add name value +'git config' [] [--type] --replace-all name value [value_regex] +'git config' [] [--type] [--show-origin] [-z|--null] --get name [value_regex] +'git config' [] [--type] [--show-origin] [-z|--null] --get-all name [value_regex] +'git config' [] [--type] [--show-origin] [-z|--null] [--name-only] --get-regexp name_regex [value_regex] +'git config' [] [--type] [-z|--null] --get-urlmatch name URL 'git config' [] --unset name [value_regex] 'git config' [] --unset-all name [value_regex] 'git config' [] --rename-section old_name new_name @@ -38,12 +38,10 @@ existing values that match the regexp are updated or unset. If you want to handle the lines that do *not* match the regex, just prepend a single exclamation mark in front (see also <>). -The type specifier can be either `--int` or `--bool`, to make -'git config' ensure that the variable(s) are of the given type and -convert the value to the canonical form (simple decimal number for int, -a "true" or "false" string for bool), or `--path`, which does some -path expansion (see `--path` below). If no type specifier is passed, no -checks or transformations are performed on the value. +A type specifier may be given as an argument to `--type` to make 'git config' +ensure that the variable(s) are of the given type and convert the value to the +canonical form. If no type specifier is passed, no checks or transformations are +performed on the value. When reading, the values are read from the system, global and repository local configuration files by default, and options @@ -160,30 +158,34 @@ See also <>. --list:: List all variables set in config file, along with their values. ---bool:: - 'git config' will ensure that the output is "true" or "false" +--type [type]:: + 'git config' will ensure that any input output is valid under the given type + constraint(s), and will canonicalize outgoing values in `[type]`'s canonical + form. ++ +Valid `[type]`'s include: ++ +- 'bool': canonicalize values as either "true" or "false". +- 'int': canonicalize values as simple decimal numbers. An optional suffix of + 'k', 'm', or 'g' will cause the value to be multiplied by 1024, 1048576, or + 1073741824 prior to output. +- 'bool-or-int': canonicalize according to either 'bool' or 'int', as described + above. +- 'path': canonicalize by adding a leading `~` to the value of `$HOME` and + `~user` to the home directory for the specified user. This specifier has no + effect when setting the value (but you can use `git config section.variable + ~/` from the command line to let your shell do the expansion.) +- 'expiry-date': canonicalize by converting from a fixed or relative date-string + to a timestamp. This specifier has no effect when setting the value. ++ +--bool:: --int:: - 'git config' will ensure that the output is a simple - decimal number
[PATCH v3 0/2] builtin/config.c: prefer `--type=bool` over `--bool`, etc.
Hi, Here is a v3 of a series to (1) treat type specifiers in `git config` as singulars rather than a collection of bits, and (2) to prefer --type= over --. I have made the following changes since v2: - Fix some misspellings in Documentation/git-config.txt (cc: René). - Handle --no-type correctly (cc: Peff). - No longer prefer (1 << x) style for enumerating type specifiers (cc: Peff). - Fix awkward rebase (cc: Peff). Thanks as always for your review :-). Thanks, Taylor Taylor Blau (2): builtin/config.c: treat type specifiers singularly builtin/config.c: prefer `--type=bool` over `--bool`, etc. Documentation/git-config.txt | 66 --- builtin/config.c | 77 +++- t/t1300-repo-config.sh | 29 ++ 3 files changed, 113 insertions(+), 59 deletions(-) -- 2.16.2.440.gc6284da4f
RE: [ANNOUNCE] Git v2.17.0
On April 2, 2018 3:34 PM, Junio C Hamano wrote: > Subject: [ANNOUNCE] Git v2.17.0 > > The latest feature release Git v2.17.0 is now available at the usual places. > It is > comprised of 516 non-merge commits since v2.16.0, contributed by 71 > people, 20 of which are new faces. The NonStop platform variant's regression suite (after applying are now very small set of platform mods) had the usual failures in t1308:23, t1404:52, and 4 in t9001. This is equivalent to the same breaks since 2.8.5 through 2.16.2. We should be installing in production tomorrow. Thanks for everyone's hard work. Cheers, Randall -- Brief whoami: NonStop developer since approximately 2112884442 UNIX developer since approximately 421664400 -- In my real life, I talk too much.
Re: [PATCH 2/6] commit: add generation number to struct commmit
On Wed, Apr 04, 2018 at 12:17:06AM +0100, Ramsay Jones wrote: > >> Is there any reason to believe this would be too small of a value in the > >> future? Or is a 32 bit unsigned good enough? > > > > The linux kernel took ~10 years to produce 500k commits. Even assuming > > those were all linear (and they're not), that gives us ~80,000 years of > > leeway. So even if the pace of development speeds up or we have a > > quicker project, it still seems we have a pretty reasonable safety > > margin. > > I didn't read the patches closely, but isn't it ~20,000 years? > > Given that '#define GENERATION_NUMBER_MAX 0x3FFF', that is. ;-) What, I'm supposed to read the patches before responding? Heresy. -Peff
Re: [PATCH 2/6] commit: add generation number to struct commmit
On 03/04/18 19:28, Jeff King wrote: > On Tue, Apr 03, 2018 at 11:05:36AM -0700, Brandon Williams wrote: > >> On 04/03, Derrick Stolee wrote: >>> The generation number of a commit is defined recursively as follows: >>> >>> * If a commit A has no parents, then the generation number of A is one. >>> * If a commit A has parents, then the generation number of A is one >>> more than the maximum generation number among the parents of A. >>> >>> Add a uint32_t generation field to struct commit so we can pass this >> >> Is there any reason to believe this would be too small of a value in the >> future? Or is a 32 bit unsigned good enough? > > The linux kernel took ~10 years to produce 500k commits. Even assuming > those were all linear (and they're not), that gives us ~80,000 years of > leeway. So even if the pace of development speeds up or we have a > quicker project, it still seems we have a pretty reasonable safety > margin. I didn't read the patches closely, but isn't it ~20,000 years? Given that '#define GENERATION_NUMBER_MAX 0x3FFF', that is. ;-) ATB, Ramsay Jones
Re: [RFC PATCH v3 2/2] t7406: add test for non-default branch in submodule
Notes: I am aware this test is not probably the best one and maybe I should have first one test that does a one level non-default, before trying a test with 2 levels of submodules, but I wanted to express the goal of the patch. Currently the test fails, so I am obviously missing something. Help would be appreciated. 2018-04-04 1:20 GMT+03:00 Eddy Petrișor : > From: Eddy Petrișor > > If a submodule uses a non-default branch and the branch info is versioned, on > submodule update --recursive --init the correct branch should be checked out. > > Signed-off-by: Eddy Petrișor > --- > t/t7406-submodule-update.sh | 54 > + > 1 file changed, 54 insertions(+) > > diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh > index 6f083c4d6..7b65f1dd1 100755 > --- a/t/t7406-submodule-update.sh > +++ b/t/t7406-submodule-update.sh > @@ -259,6 +259,60 @@ test_expect_success 'submodule update --remote should > fetch upstream changes wit > ) > ' > > +test_expect_success 'submodule update --remote --recursive --init should > fetch module branch from .gitmodules' ' > + git clone . super5 && > + git clone super5 submodl2b2 && > + git clone super5 submodl1b1 && > + cd submodl2b2 && > + echo linel2b2 > l2b2 && > + git checkout -b b2 && > + git add l2b2 && > + test_tick && > + git commit -m "commit on b2 branch in l2" && > + git rev-parse --verify HEAD >../expectl2 && > + git checkout master && > + cd ../submodl1b1 && > + git checkout -b b1 && > + echo linel1b1 > l1b1 && > + git add l1b1 && > + test_tick && > + git commit -m "commit on b1 branch in l1" && > + git submodule add ../submodl2b2 submodl2b2 && > + git config -f .gitmodules submodule."submodl2b2".branch b2 && > + git add .gitmodules && > + test_tick && > + git commit -m "add l2 module with branch b2 in l1 module in branch > b1" && > + git submodule init submodl2b2 && > + git rev-parse --verify HEAD >../expectl1 && > + git checkout master && > + cd ../super5 && > + echo super_with_2_chained_modules > super5 && > + git add super5 && > + test_tick && > + git commit -m "commit on default branch in super5" && > + git submodule add ../submodl1b1 submodl1b1 && > + git config -f .gitmodules submodule."submodl1b1".branch b1 && > + git add .gitmodules && > + test_tick && > + git commit -m "add l1 module with branch b1 in super5" && > + git submodule init submodl1b1 && > + git clone super5 super && > + ( > + cd super && > + git submodule update --recursive --init > + ) && > + ( > + cd submodl1b1 && > + git rev-parse --verify HEAD >../../actuall1 && > + test_cmp ../../expectl1 ../../actuall1 > + ) && > + ( > + cd submodl2b2 && > + git rev-parse --verify HEAD >../../../actuall2 && > + test_cmp ../../../expectl2 ../../../actuall2 > + ) > +' > + > test_expect_success 'local config should override .gitmodules branch' ' > (cd submodule && > git checkout test-branch && > -- > 2.16.2 > -- Eddy Petrișor
[PATCH] Fix condition for redirecting stderr
Since the --log-destination option was added in 0c591cacb ("daemon: add --log-destination=(stderr|syslog|none)", 2018-02-04) with the explicit goal of allowing logging to stderr when running in inetd mode, we should not always redirect stderr to /dev/null in inetd mode, but rather only when stderr is not being used for logging. Signed-off-by: Lucas Werkmeister --- Notes: I have to apologize to the list here… even though I proposed this option with the goal of using it on my server, for some reason I decided to only use it there after the next Git release had come out, and didn’t test it anywhere else. The code looked okay, but I missed this part near the end of daemon.c that made it ineffective, rendering the feature broken in the 2.17.0 release and making me look like an idiot :/ sorry, everyone. For what it’s worth, with this fix the feature appears to work as intended (I *have* tested it on my server now and log messages from git-daemon show up in the journal as intended, attributed to the correct unit and everything). daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon.c b/daemon.c index fe833ea7d..9d2e0d20e 100644 --- a/daemon.c +++ b/daemon.c @@ -1459,7 +1459,7 @@ int cmd_main(int argc, const char **argv) die("base-path '%s' does not exist or is not a directory", base_path); - if (inetd_mode) { + if (log_destination != LOG_DESTINATION_STDERR) { if (!freopen("/dev/null", "w", stderr)) die_errno("failed to redirect stderr to /dev/null"); } -- 2.16.3
[RFC PATCH v3 1/2] git-submodule.sh:cmd_update: if submodule branch exists, fetch that instead of default
From: Eddy Petrișor There are projects such as llvm/clang which use several repositories, and they might be forked for providing support for various features such as adding Redox awareness to the toolchain. This typically means the superproject will use another branch than master, occasionally even use an old commit from that non-master branch. Combined with the fact that when incorporating such a hierachy of repositories usually the user is interested in just the exact commit specified in the submodule info, it follows that a desireable usecase is to be also able to provide '--depth 1' or at least have a shallow clone to avoid waiting for ages for the clone operation to finish. In theory, this should be straightforward since the git protocol allows fetching an arbitary commit, but, in practice, some servers do not permit fetch-by-sha1. Git submodule seems to be very stubborn and cloning master, although the wrapper script and the gitmodules-helper could work together to clone directly the branch specified in the .gitmodules file, if specified. Signed-off-by: Eddy Petrișor --- git-submodule.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/git-submodule.sh b/git-submodule.sh index 24914963c..65e3af08b 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -589,8 +589,10 @@ cmd_update() branch=$(git submodule--helper remote-branch "$sm_path") if test -z "$nofetch" then + # non-default branch refspec + br_refspec=$(git submodule-helper remote-branch $sm_path) # Fetch remote before determining tracking $sha1 - fetch_in_submodule "$sm_path" $depth || + fetch_in_submodule "$sm_path" $depth $br_refspec || die "$(eval_gettext "Unable to fetch in submodule path '\$sm_path'")" fi remote_name=$(sanitize_submodule_env; cd "$sm_path" && get_default_remote) -- 2.16.2
[RFC PATCH v3 2/2] t7406: add test for non-default branch in submodule
From: Eddy Petrișor If a submodule uses a non-default branch and the branch info is versioned, on submodule update --recursive --init the correct branch should be checked out. Signed-off-by: Eddy Petrișor --- t/t7406-submodule-update.sh | 54 + 1 file changed, 54 insertions(+) diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index 6f083c4d6..7b65f1dd1 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -259,6 +259,60 @@ test_expect_success 'submodule update --remote should fetch upstream changes wit ) ' +test_expect_success 'submodule update --remote --recursive --init should fetch module branch from .gitmodules' ' + git clone . super5 && + git clone super5 submodl2b2 && + git clone super5 submodl1b1 && + cd submodl2b2 && + echo linel2b2 > l2b2 && + git checkout -b b2 && + git add l2b2 && + test_tick && + git commit -m "commit on b2 branch in l2" && + git rev-parse --verify HEAD >../expectl2 && + git checkout master && + cd ../submodl1b1 && + git checkout -b b1 && + echo linel1b1 > l1b1 && + git add l1b1 && + test_tick && + git commit -m "commit on b1 branch in l1" && + git submodule add ../submodl2b2 submodl2b2 && + git config -f .gitmodules submodule."submodl2b2".branch b2 && + git add .gitmodules && + test_tick && + git commit -m "add l2 module with branch b2 in l1 module in branch b1" && + git submodule init submodl2b2 && + git rev-parse --verify HEAD >../expectl1 && + git checkout master && + cd ../super5 && + echo super_with_2_chained_modules > super5 && + git add super5 && + test_tick && + git commit -m "commit on default branch in super5" && + git submodule add ../submodl1b1 submodl1b1 && + git config -f .gitmodules submodule."submodl1b1".branch b1 && + git add .gitmodules && + test_tick && + git commit -m "add l1 module with branch b1 in super5" && + git submodule init submodl1b1 && + git clone super5 super && + ( + cd super && + git submodule update --recursive --init + ) && + ( + cd submodl1b1 && + git rev-parse --verify HEAD >../../actuall1 && + test_cmp ../../expectl1 ../../actuall1 + ) && + ( + cd submodl2b2 && + git rev-parse --verify HEAD >../../../actuall2 && + test_cmp ../../../expectl2 ../../../actuall2 + ) +' + test_expect_success 'local config should override .gitmodules branch' ' (cd submodule && git checkout test-branch && -- 2.16.2
Re: [RFC][PATCH] git-stash: convert git stash list to C builtin
On 25.03.2018 10:08, Eric Sunshine wrote: On Sat, Mar 24, 2018 at 2:23 PM, Paul-Sebastian Ungureanu wrote: Currently, because git stash is not fully converted to C, I introduced a new helper that will hold the converted commands. --- diff --git a/builtin/stash--helper.c b/builtin/stash--helper.c @@ -0,0 +1,52 @@ +int cmd_stash__helper(int argc, const char **argv, const char *prefix) +{ + int cmdmode = 0; + + struct option options[] = { + OPT_CMDMODE(0, "list", &cmdmode, +N_("list stash entries"), LIST_STASH), + OPT_END() + }; Is the intention that once git-stash--helper implements all 'stash' functionality, you will simply rename git-stash--helper to git-stash? If so, then I'd think that you'd want it to accept subcommand arguments as bare words ("apply", "drop") in order to be consistent with the existing git-stash command set, not in dashed form ("--apply", "--drop"). In that case, OPT_CMDMODE doesn't seem appropriate. Instead, you should be consulting argv[] directly (as in [1]) after parse_options(). [1]: https://public-inbox.org/git/20180324173707.17699-2-j...@teichroeb.net/ It makes sense. In the end, when all stash is converted, it would just require an additional pointless effort to bring (back) from dashed form to bare word form. + argc = parse_options(argc, argv, prefix, options, +git_stash__helper_usage, PARSE_OPT_KEEP_UNKNOWN); + + if (!cmdmode) + usage_with_options(git_stash__helper_usage, options); + + switch (cmdmode) { + case LIST_STASH: + return list_stash(argc, argv, prefix); + } + return 0; +} diff --git a/git.c b/git.c @@ -466,6 +466,7 @@ static struct cmd_struct commands[] = { { "show-branch", cmd_show_branch, RUN_SETUP }, { "show-ref", cmd_show_ref, RUN_SETUP }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, + { "stash--helper", cmd_stash__helper, RUN_SETUP }, You don't require a working tree? Seems odd for git-stash. { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, { "stripspace", cmd_stripspace }, { "submodule--helper", cmd_submodule__helper, RUN_SETUP | SUPPORT_SUPER_PREFIX}, For now, I do not think that it is necessary (for stash list), but I am pretty sure that it will be required in the future when porting other commands. Thanks for advice!
Re: A potential approach to making tests faster on Windows
On Tue, Apr 3, 2018 at 5:49 AM, Johannes Schindelin wrote: > My main evidence that shell scripts on macOS are slower than on Linux was > the difference of the improvement incurred by moving more things from > git-rebase--interactive.sh into sequencer.c: Linux saw an improvement only > of about 3x, while macOS saw an improvement of 4x, IIRC. If I don't > remember the absolute numbers correctly, at least I vividly remember the > qualitative difference: It was noticeable. MacOS is _slow_, much, much slower than, say, Linux. Several years ago, when I had this machine configured for multi-boot, I ran MacOS and Linux on bare metal. Back then, using ram disk for the "trash" directories, and disabling Spotlight indexing on MacOS to avoid it eating CPU and causing I/O contention, the Git test suite would run to completion on Linux in slightly over 1 minute. On MacOS, it would take over 10 minutes; 10 times slower. These days, the Git test suite takes 15 minutes to run on the same hardware (with same conditions: ram disk and Spotlight disabled), which is painfully slow, thus I rarely do it. Unfortunately, I don't have Linux installed on bare metal anymore, so I can't make a proper comparison, but I do run Linux in a virtual machine under MacOS and, even though its running within a virtualized environment, Linux is still much faster than MacOS, taking 4:25 (slow, but not to the point of outright pain). That the test suite runs so much faster on Linux (bare metal or virtualized) than MacOS on this machine, I have attributed (or understood as being due) to poor HFS+ filesystem performance. It's even worse when Spotlight interferes. Presumably, the new, recently released, Mac filesystem has improved performance, but it's restricted to SSD's, whereas this machine has a physical drive, thus I can't test it.
[PATCH] diff: add a blocks mode for moved code detection
> Currently we have plain, zebra & dimmed_zebra, and zebra is the > default. > > I got an internal report from someone who had, because zebra looked > crappy in his terminal, moved to "plain", and was reporting getting > worse moved diffs as a result. > > I found that there's essentially a missing setting between "plain" and > "zebra", in git command terms: > > # The "plain" setting > git -c diff.colorMoved=true \ > -c diff.colorMoved=plain \ > show > > # We don't have this, it's "plain" but with "zebra" heuristics, > # plain_zebra? > git -c diff.colorMoved=true \ > -c color.diff.oldMovedAlternative="bold magenta" \ > -c color.diff.newMovedAlternative="bold yellow" \ > -c diff.colorMoved=zebra \ > show > > # The "zebra" setting. > git -c diff.colorMoved=true \ > -c diff.colorMoved=zebra \ > show > > Which is what I mean by the current config conflating two (to me) > unrelated things. One is how we, via any method, detect what's moved or > not, and the other is what color/format we use to present this to the > user. Oh I see. Reading the docs again, maybe we want to have a "blocks" mode, that is zebra with the same color for any block? > You can feed that plain_zebra invocation input where it'll color-wise > produce something that looks *almost* like "plain", but will differ (and > usually be better) in what lines it decides to show as moved, which of > course is due to *MovedAlternative. I would think this is close to what you want (module implementation errors, I did not run/test this code). One could also argue that this is *too* weak, as when there are multiple blocks of say 15 chars adjacent, they might be one large block. ---8< >From dde04f6afa35a313fac3575100fe83b554ec2b59 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 3 Apr 2018 14:03:01 -0700 Subject: [PATCH] diff: add a blocks mode for moved code detection Signed-off-by: Stefan Beller --- Documentation/diff-options.txt | 5 + diff.c | 4 +++- diff.h | 5 +++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index c330c01ff0..abce5142d2 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -268,6 +268,11 @@ plain:: that are added somewhere else in the diff. This mode picks up any moved line, but it is not very useful in a review to determine if a block of code was moved without permutation. +blocks: + Blocks of moved text of at least 20 alphanumeric characters + are detected greedily. The detected blocks are + painted using either the 'color.diff.{old,new}Moved' color. + Adjacent blocks cannot be told apart. zebra:: Blocks of moved text of at least 20 alphanumeric characters are detected greedily. The detected blocks are diff --git a/diff.c b/diff.c index 21c3838b25..80dd8cbd9a 100644 --- a/diff.c +++ b/diff.c @@ -271,6 +271,8 @@ static int parse_color_moved(const char *arg) return COLOR_MOVED_NO; else if (!strcmp(arg, "plain")) return COLOR_MOVED_PLAIN; + else if (!strcmp(arg, "blocks")) + return COLOR_MOVED_BLOCKS; else if (!strcmp(arg, "zebra")) return COLOR_MOVED_ZEBRA; else if (!strcmp(arg, "default")) @@ -899,7 +901,7 @@ static void mark_color_as_moved(struct diff_options *o, block_length++; - if (flipped_block) + if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; } adjust_last_block(o, n, block_length); diff --git a/diff.h b/diff.h index 6bd278aac1..3a228861d9 100644 --- a/diff.h +++ b/diff.h @@ -207,8 +207,9 @@ struct diff_options { enum { COLOR_MOVED_NO = 0, COLOR_MOVED_PLAIN = 1, - COLOR_MOVED_ZEBRA = 2, - COLOR_MOVED_ZEBRA_DIM = 3, + COLOR_MOVED_BLOCKS = 2, + COLOR_MOVED_ZEBRA = 3, + COLOR_MOVED_ZEBRA_DIM = 4, } color_moved; #define COLOR_MOVED_DEFAULT COLOR_MOVED_ZEBRA #define COLOR_MOVED_MIN_ALNUM_COUNT 20 -- 2.17.0.484.g0c8726318c-goog
Re: [PATCH 5/7] diff.c: refactor internal representation for coloring moved code
On Tue, Apr 03 2018, Stefan Beller wrote: > On Tue, Apr 3, 2018 at 12:39 PM, Ævar Arnfjörð Bjarmason > wrote: >> >> On Mon, Apr 02 2018, Stefan Beller wrote: >> >>> At the time the move coloring was implemented we thought an enum of modes >>> is the best to configure this feature. However as we want to tack on new >>> features, the enum would grow exponentially. >>> >>> Refactor the code such that features are enabled via bits. Currently we can >>> * activate the move detection, >>> * enable the block detection on top, and >>> * enable the dimming inside a block, though this could be done without >>> block detection as well (mode "plain, dimmed") >>> >>> Choose the flags to not be at bit position 2,3,4 as the next patch >>> will occupy these. >> >> When I've been playing with colorMoved the thing I've found really >> confusing is that the current config has confused two completely >> unrelated things (at least, from a user perspective), what underlying >> algorithm you use, and how the colors look. > > Not sure I follow. The colors are in color.diff.X and the algorithm is in > diff.colorMoved, whereas some colors are reused for different algorithms? > >> >> I was helping someone at work the other day where they were trying: >> >> git -c color.diff.new="green bold" \ >> -c color.diff.old="red bold" \ >> -c color.diff.newMoved="green" \ >> -c color.diff.oldMoved="red" \ >> -c diff.colorMoved=plain show >> >> But what gave better results was: >> >> git -c color.diff.new="green bold" \ >> -c color.diff.old="red bold" \ >> -c color.diff.newMoved="green" \ >> -c color.diff.oldMoved="red" \ >> -c diff.colorMoved=zebra \ >> -c color.diff.oldMovedAlternative=red \ >> -c color.diff.newMovedAlternative=green show >> >> I don't have a public test commit to share (sorry), but I have an >> internal example where "plain" will consider a thing as falling under >> color.diff.old OR color.diff.oldMoved, but zebra will consider that >> whole part only color.diff.old. > > What do you mean by "OR" ? > Is the hunk present multiple times and colored one or the other way? > Is it colored differently in different invocations of Git? > Is one hunk mixing up both colors? > > Is the hunk "small" ? > small hunks are un-colored, to avoid showing empty lines > or closing braces as moved. But plain mode ignores this heuristic. > >> I see now that that might be since only the "zebra" supports the >> *Alternative that it ends up "stealing" chunks from something that would >> have otherwise been classified differently, so I have no idea if there's >> an easy "solution", or if it's even a problem. > > Can you describe the issue more to see if it is a problem? > (It sounds like a problem in the documentation/UX to me already > as the docs could not tell you what to expect) > >> Sorry about being vague, I just dug this up from some old notes now >> after this patch jolted my memory about it. Forget about what I said so far, sorry, that was a really shitty report. I dug into this a bit more and here's a better one. I still can't share the actual diff I have in front of me (internal code). Currently we have plain, zebra & dimmed_zebra, and zebra is the default. I got an internal report from someone who had, because zebra looked crappy in his terminal, moved to "plain", and was reporting getting worse moved diffs as a result. I found that there's essentially a missing setting between "plain" and "zebra", in git command terms: # The "plain" setting git -c diff.colorMoved=true \ -c diff.colorMoved=plain \ show # We don't have this, it's "plain" but with "zebra" heuristics, # plain_zebra? git -c diff.colorMoved=true \ -c color.diff.oldMovedAlternative="bold magenta" \ -c color.diff.newMovedAlternative="bold yellow" \ -c diff.colorMoved=zebra \ show # The "zebra" setting. git -c diff.colorMoved=true \ -c diff.colorMoved=zebra \ show Which is what I mean by the current config conflating two (to me) unrelated things. One is how we, via any method, detect what's moved or not, and the other is what color/format we use to present this to the user. You can feed that plain_zebra invocation input where it'll color-wise produce something that looks *almost* like "plain", but will differ (and usually be better) in what lines it decides to show as moved, which of course is due to *MovedAlternative.
Re: [PATCH 7/7] diff.c: add --color-moved-ignore-space-delta option
On Tue, 3 Apr 2018 12:22:32 -0700 Stefan Beller wrote: > On Mon, Apr 2, 2018 at 5:41 PM, Jonathan Tan wrote: > > On Mon, 2 Apr 2018 15:48:54 -0700 > > Stefan Beller wrote: > > > >> +struct ws_delta { > >> + int deltachars; > >> + char firstchar; > >> +}; > > > > I'll just make some overall design comments. > > > > Shouldn't this be a string of characters (or a char* and len) and > > whether it was added or removed? If you're only checking the first > > character, this would not work if the other characters were different. > > I considered diving into this, but it seemed to be too complicated for > >95 % of the use cases, which can be approximated in change of the > first character. It's true that most use cases can be approximated this way, but I don't think that it's worth the approximation. > Because if we take a string of characters, we'd also need to take care of > tricky conversions (e.g. Are 8 white spaces equal to a tab, and if so do > we break blocks if one line converts 8 ws to a tab?) No conversions - spaces are spaces and tabs are tabs. > So I would definitely pursue the string instead of change of first > character, but what are all the heuristics to put in? No heuristics - a few lines make a block if the same prefix (which consists of all whitespace) was added or removed. > Just to be clear: The string would contain only the change in > white space up front, or would we also somehow store white space > in other parts? Only change in white space at the start of the line - this option only handles space at the start of the line, right? > - # This is a sample comment > - # across multiple lines > - # maybe even a license header > + # This is a sample comment > + # across multiple lines > + # maybe even a license header > > How about this? My understanding is that this patch does not handle this case. > >> @@ -717,10 +752,20 @@ static int moved_entry_cmp(const void > >> *hashmap_cmp_fn_data, > >> const struct diff_options *diffopt = hashmap_cmp_fn_data; > >> const struct moved_entry *a = entry; > >> const struct moved_entry *b = entry_or_key; > >> + unsigned flags = diffopt->color_moved & XDF_WHITESPACE_FLAGS; > >> + > >> + if (diffopt->color_moved & COLOR_MOVED_DELTA_WHITESPACES) > >> + /* > >> + * As there is not specific white space config given, > >> + * we'd need to check for a new block, so ignore all > >> + * white space. The setup of the white space > >> + * configuration for the next block is done else where > >> + */ > >> + flags |= XDF_IGNORE_WHITESPACE; > >> > >> return !xdiff_compare_lines(a->es->line, a->es->len, > >> b->es->line, b->es->len, > >> - diffopt->color_moved & > >> XDF_WHITESPACE_FLAGS); > >> + flags); > >> } > > > > I think we should just prohibit combining this with any of the > > whitespace ignoring flags except for the space-at-eol one. They seem to > > contradict anyway. > > ok, we can narrow this one down to ignore all white space. What do you mean? The rationale for my comment is that I saw that you need to specify a special flag to xdiff_compare_lines if COLOR_MOVED_DELTA_WHITESPACES is set, which could conflict with other flags that the user has explicitly set. So avoiding that case entirely seems like a good idea, especially since it is logical to do so. > >> +test_expect_success 'compare whitespace delta across moved blocks' ' > >> + > >> + git reset --hard && > >> + q_to_tab <<-\EOF >text.txt && > >> + QIndented > >> + QText > >> + Qacross > >> + Qfive > >> + Qlines > >> + QBut! > >> + Qthis > >> + QQone > >> + Qline > >> + QQdid > >> + Qnot > >> + QQadjust > >> + EOF > > > > Do we need 5 lines? I thought 2 would suffice. (It's the ALNUM_COUNT > > that matters, as far as I know.) This makes it hard to see that the > > "But!" line is the one that counts. > > I did not want to go with the bare minimum as then adjusting the minimum > would be a pain as these unrelated (to the minimum) test cases would > break. That is true, but it makes the test case harder to read now. If you're worried about bumping into the minimum if we do adjust the minimum, making the lines longer should be sufficient. > >> +test_expect_success 'compare whitespace delta across moved blocks with > >> multiple indentation levels' ' > > >> + EOF > > > > If the objective it just to show that the functions f and g are treated > > as one unit despite their lines being of multiple indentation levels, > > the test file could be much shorter. > > yeah, I noticed that we already test that in the test above where we > have that test after the "But!", where lines ziggy-zag. Will drop this test. OK, sounds good.
Re: Timing issue in t5570 "daemon log records all attributes"
On Tue, Apr 03, 2018 at 09:33:10PM +0200, Jan Palus wrote: > My understanding of test "daemon log records all attributes" is that daemon > process is started in background, some git command is executed and daemon's > output (saved to daemon.log) is compared against expected value. However > daemon.log is not a straight redirect to file -- it is being piped through > fifo, > read by a loop in test-git-daemon.sh, additional processing is performed and > finally it makes it to daemon.log. All of this performed concurrently with > test > execution. My question is how do you exactly avoid timing issues here? grep on > daemon.log is performed immediately after git invocation: > > >daemon.log && > GIT_OVERRIDE_VIRTUAL_HOST=localhost \ > git -c protocol.version=1 \ > ls-remote "$GIT_DAEMON_URL/interp.git" && > grep -i extended.attribute daemon.log | cut -d" " -f2- >actual && > > how can you be sure grep operates on daemon.log that already includes all > output > and not on intermediate state that is just being processed by while loop? Same > question applies to ">daemon.log" since shell might still be processing output > of previous test and its content might possibly land in the file after > zeroing. You're right, this is racy. You can see it much more obviously with: diff --git a/t/lib-git-daemon.sh b/t/lib-git-daemon.sh index edbea2d986..3c7fea169b 100644 --- a/t/lib-git-daemon.sh +++ b/t/lib-git-daemon.sh @@ -62,6 +62,7 @@ start_git_daemon() { ( while read -r line <&7 do + sleep 1 printf "%s\n" "$line" printf >&4 "%s\n" "$line" done I'm not sure of the best solution. Even if we removed the shell-loop pumping the data from the fifo, it's still possible to race if git-daemon hangs up the client socket before flushing its log output (since our only real synchronization is waiting for the client to exit). Nor could we even wait for an EOF on the fifo, since we won't get one until the daemon actually exits. If we want to do it without polling, I think the best we could do is have the pumping loop key on some particular line in the output as a synchronization point, and then trigger _another_ fifo that the test snippet listens on. Yuck. I'm tempted to say we should just scrap this test. It was added relatively recently and only shows the fix for a pretty minor bug. It's probably not worth the contortions to make it race-proof. -Peff
Re: [PATCH 0/3] Lazy-load trees when reading commit-graph
On Tue, Apr 03, 2018 at 09:14:50AM -0400, Derrick Stolee wrote: > > I'm not sure what the exact solution would be, but I imagine something > > like variable-sized "struct commit"s with the parent pointers embedded, > > with some kind of flag to indicate the number of parents (and probably > > some fallback to break out to a linked list for extreme cases of more > > than 2 parents). It may end up pretty invasive, though, as there's a > > lot of open-coded traversals of that parent list. > > > > Anyway, not anything to do with this patch, but food for thought as you > > micro-optimize these traversals. > > One other thing that I've been thinking about is that 'struct commit' is so > much bigger than the other structs in 'union any_object'. This means that > the object cache, which I think creates blocks of 'union any_object' for > memory-alignment reasons, is overly bloated. This would be especially true > when walking many more trees than commits. > > Perhaps there are other memory concerns in that case (such as cached > buffers) that the 'union any_object' is not a concern, but it is worth > thinking about as we brainstorm how to reduce the parent-list memory. It definitely bloats any_object, but I don't think we typically allocate too many of those. Those should only come from lookup_unknown_object(), but typically we'll come across objects by traversing the graph, in which case we have an expectation of the type (and use the appropriate lookup_foo() function, which uses the type-specific block allocators). -Peff
Re: Socket activation for GitWeb FastCGI with systemd?
On Tue, Apr 3, 2018 at 11:53 AM, Alex Ivanov wrote: > Hi. > I want to use systemd as fastcgi spawner for gitweb + nginx. > The traffic is low and number of users is limited + traversal bots. For that > reason I've decided to use following mimimal services > > gitweb.socket > [Unit] > Description=GitWeb Socket > > [Socket] > ListenStream=/run/gitweb.sock > Accept=false > > [Install] > WantedBy=sockets.target > > gitweb.service > [Unit] > Description=GitWeb Service > > [Service] > Type=simple > ExecStart=/path/to/gitweb.cgi --fcgi > StandardInput=socket > > However this scheme is not resistant to simple DDOS. > E.g. traversal bots often kill the service by opening non existing path (e.g > http://host/?p=repo;a=blob;f=nonexisting/path;hb=HEAD showing in browser 404 > - Cannot find file) many times consecutively, which leads to > Apr 03 21:32:10 host systemd[1]: gitweb.service: Start request repeated too > quickly. > Apr 03 21:32:10 host systemd[1]: gitweb.service: Failed with result > 'start-limit-hit'. > Apr 03 21:32:10 host systemd[1]: Failed to start GitWeb service. > and 502 Bad Gateway in browser. I believe the reason is that gitweb.service > dies on failure and if it happens too often, systemd declines to restart the > service due to start limit hit. > So my question is how to correct systemd services for GitWeb to be resistant > to such issue? I prefer to use single process to process all clients. > Thanks. This sounds like a systemd specific question that might get a better answer from the systemd mailing list. That being said, I believe if in this case gitweb is dying due to the path not existing? You might be able to configure systemd to understand that the particular exit code for when the path doesn't exist is a "valid" exit, and not a failure case.. I'm not entirely understanding your goal.. you want each request to launch the gitweb process, and when it's done you want it to exit? But if there are multiple connections at once you want it to stay alive until it services them all? I think the best answer is configure systemd to understand that the exit code for when the path is invalid will be counted as a success. Thanks, Jake
Re: [RFC PATCH 0/7] Moved code detection: ignore space on uniform indentation
On Tue, Apr 3, 2018 at 12:00 PM, Stefan Beller wrote: The fun is in the last patch, which allows white space sensitive languages to trust the move detection, too. Each block that is marked as moved will have the same delta in {in-, de-}dentation. I would think this mode might be a reasonable default eventually. >>> >>> This sounds like a good idea. "Trust" is probably too strong a word, but >>> I can see this being useful even in non-whitespace-sensitive languages >>> with nested blocks (like C). >> >> The ability to detect moved code despite whitespace changes would be >> good, even while showing diffs with the whitespace intact. > > That is what the last patch is about. Right. I was trying to say "I agree with the goal, even if we don't necessarily allow every possible white-space + color-moved lines combination" (i.e. to avoid polluting the option space too much) Thanks, Jake
Re: [PATCH 5/7] diff.c: refactor internal representation for coloring moved code
On Tue, Apr 3, 2018 at 12:39 PM, Ævar Arnfjörð Bjarmason wrote: > > On Mon, Apr 02 2018, Stefan Beller wrote: > >> At the time the move coloring was implemented we thought an enum of modes >> is the best to configure this feature. However as we want to tack on new >> features, the enum would grow exponentially. >> >> Refactor the code such that features are enabled via bits. Currently we can >> * activate the move detection, >> * enable the block detection on top, and >> * enable the dimming inside a block, though this could be done without >> block detection as well (mode "plain, dimmed") >> >> Choose the flags to not be at bit position 2,3,4 as the next patch >> will occupy these. > > When I've been playing with colorMoved the thing I've found really > confusing is that the current config has confused two completely > unrelated things (at least, from a user perspective), what underlying > algorithm you use, and how the colors look. Not sure I follow. The colors are in color.diff.X and the algorithm is in diff.colorMoved, whereas some colors are reused for different algorithms? > > I was helping someone at work the other day where they were trying: > > git -c color.diff.new="green bold" \ > -c color.diff.old="red bold" \ > -c color.diff.newMoved="green" \ > -c color.diff.oldMoved="red" \ > -c diff.colorMoved=plain show > > But what gave better results was: > > git -c color.diff.new="green bold" \ > -c color.diff.old="red bold" \ > -c color.diff.newMoved="green" \ > -c color.diff.oldMoved="red" \ > -c diff.colorMoved=zebra \ > -c color.diff.oldMovedAlternative=red \ > -c color.diff.newMovedAlternative=green show > > I don't have a public test commit to share (sorry), but I have an > internal example where "plain" will consider a thing as falling under > color.diff.old OR color.diff.oldMoved, but zebra will consider that > whole part only color.diff.old. What do you mean by "OR" ? Is the hunk present multiple times and colored one or the other way? Is it colored differently in different invocations of Git? Is one hunk mixing up both colors? Is the hunk "small" ? small hunks are un-colored, to avoid showing empty lines or closing braces as moved. But plain mode ignores this heuristic. > I see now that that might be since only the "zebra" supports the > *Alternative that it ends up "stealing" chunks from something that would > have otherwise been classified differently, so I have no idea if there's > an easy "solution", or if it's even a problem. Can you describe the issue more to see if it is a problem? (It sounds like a problem in the documentation/UX to me already as the docs could not tell you what to expect) > Sorry about being vague, I just dug this up from some old notes now > after this patch jolted my memory about it. ok.
Re: [PATCH 5/7] diff.c: refactor internal representation for coloring moved code
On Mon, Apr 02 2018, Stefan Beller wrote: > At the time the move coloring was implemented we thought an enum of modes > is the best to configure this feature. However as we want to tack on new > features, the enum would grow exponentially. > > Refactor the code such that features are enabled via bits. Currently we can > * activate the move detection, > * enable the block detection on top, and > * enable the dimming inside a block, though this could be done without > block detection as well (mode "plain, dimmed") > > Choose the flags to not be at bit position 2,3,4 as the next patch > will occupy these. When I've been playing with colorMoved the thing I've found really confusing is that the current config has confused two completely unrelated things (at least, from a user perspective), what underlying algorithm you use, and how the colors look. I was helping someone at work the other day where they were trying: git -c color.diff.new="green bold" \ -c color.diff.old="red bold" \ -c color.diff.newMoved="green" \ -c color.diff.oldMoved="red" \ -c diff.colorMoved=plain show But what gave better results was: git -c color.diff.new="green bold" \ -c color.diff.old="red bold" \ -c color.diff.newMoved="green" \ -c color.diff.oldMoved="red" \ -c diff.colorMoved=zebra \ -c color.diff.oldMovedAlternative=red \ -c color.diff.newMovedAlternative=green show I don't have a public test commit to share (sorry), but I have an internal example where "plain" will consider a thing as falling under color.diff.old OR color.diff.oldMoved, but zebra will consider that whole part only color.diff.old. I see now that that might be since only the "zebra" supports the *Alternative that it ends up "stealing" chunks from something that would have otherwise been classified differently, so I have no idea if there's an easy "solution", or if it's even a problem. Sorry about being vague, I just dug this up from some old notes now after this patch jolted my memory about it.
Timing issue in t5570 "daemon log records all attributes"
My understanding of test "daemon log records all attributes" is that daemon process is started in background, some git command is executed and daemon's output (saved to daemon.log) is compared against expected value. However daemon.log is not a straight redirect to file -- it is being piped through fifo, read by a loop in test-git-daemon.sh, additional processing is performed and finally it makes it to daemon.log. All of this performed concurrently with test execution. My question is how do you exactly avoid timing issues here? grep on daemon.log is performed immediately after git invocation: >daemon.log && GIT_OVERRIDE_VIRTUAL_HOST=localhost \ git -c protocol.version=1 \ ls-remote "$GIT_DAEMON_URL/interp.git" && grep -i extended.attribute daemon.log | cut -d" " -f2- >actual && how can you be sure grep operates on daemon.log that already includes all output and not on intermediate state that is just being processed by while loop? Same question applies to ">daemon.log" since shell might still be processing output of previous test and its content might possibly land in the file after zeroing. The reason I'm asking is because /bin/sh in my distribution (mksh) actually manifests the issue -- test fails because at the time of grep output was not processed yet (fixed by sleep 1 before grep). Also there is an issue with output of previous test landing in daemon.log despite ">daemon.log". Regards Jan
Re: [PATCH 7/7] diff.c: add --color-moved-ignore-space-delta option
On Mon, Apr 2, 2018 at 5:41 PM, Jonathan Tan wrote: > On Mon, 2 Apr 2018 15:48:54 -0700 > Stefan Beller wrote: > >> +struct ws_delta { >> + int deltachars; >> + char firstchar; >> +}; > > I'll just make some overall design comments. > > Shouldn't this be a string of characters (or a char* and len) and > whether it was added or removed? If you're only checking the first > character, this would not work if the other characters were different. I considered diving into this, but it seemed to be too complicated for >95 % of the use cases, which can be approximated in change of the first character. Because if we take a string of characters, we'd also need to take care of tricky conversions (e.g. Are 8 white spaces equal to a tab, and if so do we break blocks if one line converts 8 ws to a tab?) So I would definitely pursue the string instead of change of first character, but what are all the heuristics to put in? Just to be clear: The string would contain only the change in white space up front, or would we also somehow store white space in other parts? - # This is a sample comment - # across multiple lines - # maybe even a license header + # This is a sample comment + # across multiple lines + # maybe even a license header How about this? > >> @@ -717,10 +752,20 @@ static int moved_entry_cmp(const void >> *hashmap_cmp_fn_data, >> const struct diff_options *diffopt = hashmap_cmp_fn_data; >> const struct moved_entry *a = entry; >> const struct moved_entry *b = entry_or_key; >> + unsigned flags = diffopt->color_moved & XDF_WHITESPACE_FLAGS; >> + >> + if (diffopt->color_moved & COLOR_MOVED_DELTA_WHITESPACES) >> + /* >> + * As there is not specific white space config given, >> + * we'd need to check for a new block, so ignore all >> + * white space. The setup of the white space >> + * configuration for the next block is done else where >> + */ >> + flags |= XDF_IGNORE_WHITESPACE; >> >> return !xdiff_compare_lines(a->es->line, a->es->len, >> b->es->line, b->es->len, >> - diffopt->color_moved & >> XDF_WHITESPACE_FLAGS); >> + flags); >> } > > I think we should just prohibit combining this with any of the > whitespace ignoring flags except for the space-at-eol one. They seem to > contradict anyway. ok, we can narrow this one down to ignore all white space. > >> +test_expect_success 'compare whitespace delta across moved blocks' ' >> + >> + git reset --hard && >> + q_to_tab <<-\EOF >text.txt && >> + QIndented >> + QText >> + Qacross >> + Qfive >> + Qlines >> + QBut! >> + Qthis >> + QQone >> + Qline >> + QQdid >> + Qnot >> + QQadjust >> + EOF > > Do we need 5 lines? I thought 2 would suffice. (It's the ALNUM_COUNT > that matters, as far as I know.) This makes it hard to see that the > "But!" line is the one that counts. I did not want to go with the bare minimum as then adjusting the minimum would be a pain as these unrelated (to the minimum) test cases would break. > >> +test_expect_success 'compare whitespace delta across moved blocks with >> multiple indentation levels' ' >> + EOF > > If the objective it just to show that the functions f and g are treated > as one unit despite their lines being of multiple indentation levels, > the test file could be much shorter. yeah, I noticed that we already test that in the test above where we have that test after the "But!", where lines ziggy-zag. Will drop this test.
Re: [PATCH 6/6] commit-graph.txt: update future work
On Tue, 3 Apr 2018 12:51:43 -0400 Derrick Stolee wrote: > We now calculate generation numbers in the commit-graph file and use > them in paint_down_to_common(). For completeness, I'll mention that I don't see any issues with this patch, of course. Thanks for this series.
Re: [PATCH 0/6] Compute and consume generation numbers
On Tue, Apr 03, 2018 at 02:47:27PM -0400, Jeff King wrote: > On Tue, Apr 03, 2018 at 02:29:01PM -0400, Derrick Stolee wrote: > > > If we have generic "can X reach Y?" queries, then we can also use generation > > numbers there to great effect (by not walking commits Z with gen(Z) <= > > gen(Y)). Perhaps I should look at that "git branch --contains" thread for > > ideas. > > I think the gist of it is the patch below. Which I hastily adapted from > the patch we run at GitHub that uses timestamps as a proxy. So it's > possible I completely flubbed the logic. I'm assuming unavailable > generation numbers are set to 0; the logic is actually a bit simpler if > they end up as (uint32_t)-1. Oh indeed, that is already the value of your UNDEF. So the patch is more like this: diff --git a/ref-filter.c b/ref-filter.c index 45fc56216a..b147b1d0ee 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1584,7 +1584,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) */ static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, - struct contains_cache *cache) + struct contains_cache *cache, + uint32_t cutoff) { enum contains_result *cached = contains_cache_at(cache, candidate); @@ -1598,8 +1599,11 @@ static enum contains_result contains_test(struct commit *candidate, return CONTAINS_YES; } - /* Otherwise, we don't know; prepare to recurse */ parse_commit_or_die(candidate); + + if (candidate->generation < cutoff) + return CONTAINS_NO; + return CONTAINS_UNKNOWN; } @@ -1615,8 +1619,20 @@ static enum contains_result contains_tag_algo(struct commit *candidate, struct contains_cache *cache) { struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result = contains_test(candidate, want, cache); + enum contains_result result; + uint32_t cutoff = GENERATION_NUMBER_UNDEF; + const struct commit_list *p; + + for (p = want; p; p = p->next) { + struct commit *c = p->item; + parse_commit_or_die(c); + if (c->generation < cutoff) + cutoff = c->generation; + } + if (cutoff == GENERATION_NUMBER_UNDEF) + cutoff = GENERATION_NUMBER_NONE; + result = contains_test(candidate, want, cache, cutoff); if (result != CONTAINS_UNKNOWN) return result; @@ -1634,7 +1650,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, * If we just popped the stack, parents->item has been marked, * therefore contains_test will return a meaningful yes/no. */ - else switch (contains_test(parents->item, want, cache)) { + else switch (contains_test(parents->item, want, cache, cutoff)) { case CONTAINS_YES: *contains_cache_at(cache, commit) = CONTAINS_YES; contains_stack.nr--; @@ -1648,7 +1664,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, } } free(contains_stack.contains_stack); - return contains_test(candidate, want, cache); + return contains_test(candidate, want, cache, cutoff); } static int commit_contains(struct ref_filter *filter, struct commit *commit,
Re: [PATCH 5/6] commit.c: use generation to halt paint walk
On Tue, 3 Apr 2018 12:51:42 -0400 Derrick Stolee wrote: > -static int queue_has_nonstale(struct prio_queue *queue) > +static int queue_has_nonstale(struct prio_queue *queue, uint32_t min_gen) > { > - int i; > - for (i = 0; i < queue->nr; i++) { > - struct commit *commit = queue->array[i].data; > - if (!(commit->object.flags & STALE)) > - return 1; > + if (min_gen != GENERATION_NUMBER_UNDEF) { > + if (queue->nr > 0) { > + struct commit *commit = queue->array[0].data; > + return commit->generation >= min_gen; > + } This only works if the prio_queue has compare_commits_by_gen_then_commit_date. Also, I don't think that the min_gen != GENERATION_NUMBER_UNDEF check is necessary. So I would write this as: if (queue->compare == compare_commits_by_gen_then_commit_date && queue->nr) { struct commit *commit = queue->array[0].data; return commit->generation >= min_gen; } for (i = 0 ... If you'd rather not perform the comparison to compare_commits_by_gen_then_commit_date every time you invoke queue_has_nonstale(), that's fine with me too, but document somewhere that queue_has_nonstale() only works if this comparison function is used. > + if (commit->generation > last_gen) > + BUG("bad generation skip"); > + > + last_gen = commit->generation; last_gen seems to only be used to ensure that the priority queue returns elements in the correct order - I think we can generally trust the queue, and if we need to test it, we can do it elsewhere.
Re: [RFC PATCH 0/7] Moved code detection: ignore space on uniform indentation
>>> The fun is in the last patch, which allows white space sensitive >>> languages to trust the move detection, too. Each block that is marked as >>> moved will have the same delta in {in-, de-}dentation. >>> I would think this mode might be a reasonable default eventually. >> >> This sounds like a good idea. "Trust" is probably too strong a word, but >> I can see this being useful even in non-whitespace-sensitive languages >> with nested blocks (like C). > > The ability to detect moved code despite whitespace changes would be > good, even while showing diffs with the whitespace intact. That is what the last patch is about.
Socket activation for GitWeb FastCGI with systemd?
Hi. I want to use systemd as fastcgi spawner for gitweb + nginx. The traffic is low and number of users is limited + traversal bots. For that reason I've decided to use following mimimal services gitweb.socket [Unit] Description=GitWeb Socket [Socket] ListenStream=/run/gitweb.sock Accept=false [Install] WantedBy=sockets.target gitweb.service [Unit] Description=GitWeb Service [Service] Type=simple ExecStart=/path/to/gitweb.cgi --fcgi StandardInput=socket However this scheme is not resistant to simple DDOS. E.g. traversal bots often kill the service by opening non existing path (e.g http://host/?p=repo;a=blob;f=nonexisting/path;hb=HEAD showing in browser 404 - Cannot find file) many times consecutively, which leads to Apr 03 21:32:10 host systemd[1]: gitweb.service: Start request repeated too quickly. Apr 03 21:32:10 host systemd[1]: gitweb.service: Failed with result 'start-limit-hit'. Apr 03 21:32:10 host systemd[1]: Failed to start GitWeb service. and 502 Bad Gateway in browser. I believe the reason is that gitweb.service dies on failure and if it happens too often, systemd declines to restart the service due to start limit hit. So my question is how to correct systemd services for GitWeb to be resistant to such issue? I prefer to use single process to process all clients. Thanks.
Re: [PATCH 5/7] diff.c: refactor internal representation for coloring moved code
On Mon, Apr 2, 2018 at 4:51 PM, Jonathan Tan wrote: > On Mon, 2 Apr 2018 15:48:52 -0700 > Stefan Beller wrote: > >> At the time the move coloring was implemented we thought an enum of modes >> is the best to configure this feature. However as we want to tack on new >> features, the enum would grow exponentially. >> >> Refactor the code such that features are enabled via bits. Currently we can >> * activate the move detection, >> * enable the block detection on top, and >> * enable the dimming inside a block, though this could be done without >> block detection as well (mode "plain, dimmed") > > Firstly, patches 1-4 are obviously correct. > > As for this patch, I don't think that using flags is the right way to do > this. We are not under any size pressure for struct diff_options, and > the additional options that we plan to add (color-moved-whitespace-flags > and ignore-space-delta) can easily be additional fields instead. Gah! I'll give it a try. When I came to the conclusion that the features of this series is orthogonal to the existing code, bit fields came first to mind. Let's see if the alternative is easier to read.
Re: [PATCH 3/6] commit-graph: compute generation numbers
On Tue, Apr 3, 2018 at 11:30 AM, Jonathan Tan wrote: > On Tue, 3 Apr 2018 12:51:40 -0400 > Derrick Stolee wrote: > >> + if ((*list)->generation != GENERATION_NUMBER_UNDEF) { >> + if ((*list)->generation > GENERATION_NUMBER_MAX) >> + die("generation number %u is too large to >> store in commit-graph", >> + (*list)->generation); >> + packedDate[0] |= htonl((*list)->generation << 2); >> + } > > The die() should have "BUG:" if you agree with my comment below. I would remove the BUG/die() altogether and keep going. (But do not write it out, i.e. warn and skip the next line) A degraded commit graph with partial generation numbers is better than Git refusing to write any part of the commit graph (which later on will be part of many maintenance operations I would think, leading to more immediate headache rather than "working but slightly slower") > >> +static void compute_generation_numbers(struct commit** commits, >> +int nr_commits) > > Style: space before **, not after. > >> + if (all_parents_computed) { >> + current->generation = max_generation + 1; >> + pop_commit(&list); >> + } > > I think the current->generation should be clamped to _MAX here. If we do, then > the die() I mentioned in my first comment will have "BUG:", since we are never > meant to write any number larger than _MAX in ->generation. When we clamp here, we'd have to treat the _MAX specially in all our use cases or we'd encounter funny bugs due to miss ordered commits later?
Re: [PATCH 0/6] Compute and consume generation numbers
On Tue, Apr 03, 2018 at 02:29:01PM -0400, Derrick Stolee wrote: > If we have generic "can X reach Y?" queries, then we can also use generation > numbers there to great effect (by not walking commits Z with gen(Z) <= > gen(Y)). Perhaps I should look at that "git branch --contains" thread for > ideas. I think the gist of it is the patch below. Which I hastily adapted from the patch we run at GitHub that uses timestamps as a proxy. So it's possible I completely flubbed the logic. I'm assuming unavailable generation numbers are set to 0; the logic is actually a bit simpler if they end up as (uint32_t)-1. Assuming it works, that would cover for-each-ref and tag. You'd probably want to drop the "with_commit_tag_algo" flag in ref-filter.h, and just use always use it by default (and that would cover "git branch"). --- diff --git a/ref-filter.c b/ref-filter.c index 45fc56216a..6bea6173d1 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1584,7 +1584,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) */ static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, - struct contains_cache *cache) + struct contains_cache *cache, + uint32_t cutoff) { enum contains_result *cached = contains_cache_at(cache, candidate); @@ -1598,8 +1599,11 @@ static enum contains_result contains_test(struct commit *candidate, return CONTAINS_YES; } - /* Otherwise, we don't know; prepare to recurse */ parse_commit_or_die(candidate); + + if (candidate->generation && candidate->generation < cutoff) + return CONTAINS_NO; + return CONTAINS_UNKNOWN; } @@ -1615,8 +1619,20 @@ static enum contains_result contains_tag_algo(struct commit *candidate, struct contains_cache *cache) { struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result = contains_test(candidate, want, cache); + enum contains_result result; + uint32_t cutoff = -1; + const struct commit_list *p; + + for (p = want; p; p = p->next) { + struct commit *c = p->item; + parse_commit_or_die(c); + if (c->generation && c->generation < cutoff ) + cutoff = c->generation; + } + if (cutoff == -1) + cutoff = 0; + result = contains_test(candidate, want, cache, cutoff); if (result != CONTAINS_UNKNOWN) return result; @@ -1634,7 +1650,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, * If we just popped the stack, parents->item has been marked, * therefore contains_test will return a meaningful yes/no. */ - else switch (contains_test(parents->item, want, cache)) { + else switch (contains_test(parents->item, want, cache, cutoff)) { case CONTAINS_YES: *contains_cache_at(cache, commit) = CONTAINS_YES; contains_stack.nr--; @@ -1648,7 +1664,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, } } free(contains_stack.contains_stack); - return contains_test(candidate, want, cache); + return contains_test(candidate, want, cache, cutoff); } static int commit_contains(struct ref_filter *filter, struct commit *commit,
Re: [PATCH 2/6] commit: add generation number to struct commmit
On Tue, Apr 3, 2018 at 11:28 AM, Jeff King wrote: > On Tue, Apr 03, 2018 at 11:05:36AM -0700, Brandon Williams wrote: > >> On 04/03, Derrick Stolee wrote: >> > The generation number of a commit is defined recursively as follows: >> > >> > * If a commit A has no parents, then the generation number of A is one. >> > * If a commit A has parents, then the generation number of A is one >> > more than the maximum generation number among the parents of A. >> > >> > Add a uint32_t generation field to struct commit so we can pass this >> >> Is there any reason to believe this would be too small of a value in the >> future? Or is a 32 bit unsigned good enough? > > The linux kernel took ~10 years to produce 500k commits. Even assuming > those were all linear (and they're not), ... which you meant in terms of DAG, where a linear history is the worst case for generation numbers. I first read it the other way round, as the best case w.r.t. timing ~/linux$ git log --oneline |wc -l 721223 $ git log --oneline --since 2012 |wc -l 421853 $ git log --oneline --since 2011 |wc -l 477155 The number of commits is growing exponentially, though the exponential part is very small and the YoY growth can be estimated using linear interpolation. In linux, the release is a natural synchronization point IIUC as well as on a regular schedule. So an interesting question to ask there would be whether the delta in generation number goes up over time, or if the DAG just gets wider (=more parallel) > that gives us ~80,000 years of > leeway. So even if the pace of development speeds up or we have a > quicker project, it still seems we have a pretty reasonable safety > margin. Thanks for the estimate. Stefan
Re: [PATCH 3/3] commit-graph: lazy-load trees
>>> +/* >>> + * For performance reasons, a commit loaded from the graph does not >>> + * have a tree loaded until trying to consume it for the first time. >> >> That is the theme of this series/patch, but do we need to write it down >> into the codebase? I'd be inclined to omit this part and only go with: >> >>Load the root tree of a commit and return the tree. > > > OK. This was just a suggestion, others may want to chime in on the terseness. > >> >>> struct tree *get_commit_tree(const struct commit *commit) >>> { >>> - return commit->tree; >>> + if (commit->tree || !commit->object.parsed) >> >> I understand to return the tree from the commit >> when we have the tree in the commit object (the first >> part). >> >> But 'when we have not (yet) parsed the commit object', >> we also just return its tree? Could you explain the >> second part of the condition? >> Is that for commits that are not part of the commit graph? >> (But then why does it need to be negated?) > > > Some callers check the value of 'commit->tree' without a guarantee that the > commit was parsed. In this case, the way to preserve the existing behavior > is to continue returning NULL. If I remove the "|| !commit->object.parsed" > then the BUG("commit has NULL tree, but was not loaded from commit-graph") Oh. That totally makes sense now. I should have more coffee (got up extra early to see a dentist before going into work) > is hit in these two tests: > > t6012-rev-list-simplify.sh > t6110-rev-list-sparse.sh > > I prefer to keep the BUG() statement and instead use this if statement. If > someone has more clarity on why this is a good existing behavior, then > please chime in. > I would also keep the BUG statement; I am unsure if we'd want to have a comment explaining the situation, or if it is obvious enough and I was just not focused enough. This totally makes sense now and I'd keep the code as is. Thanks, Stefan
Re: [PATCH 1/6] object.c: parse commit in graph first
On 4/3/2018 2:28 PM, Jeff King wrote: On Tue, Apr 03, 2018 at 11:21:36AM -0700, Jonathan Tan wrote: On Tue, 3 Apr 2018 12:51:38 -0400 Derrick Stolee wrote: Most code paths load commits using lookup_commit() and then parse_commit(). In some cases, including some branch lookups, the commit is parsed using parse_object_buffer() which side-steps parse_commit() in favor of parse_commit_buffer(). Before adding generation numbers to the commit-graph, we need to ensure that any commit that exists in the graph is loaded from the graph, so check parse_commit_in_graph() before calling parse_commit_buffer(). Signed-off-by: Derrick Stolee Modifying parse_object_buffer() is the most pragmatic way to accomplish this, but this also means that parse_object_buffer() now potentially reads from the local object store (instead of only relying on what's in memory and what's in the provided buffer). parse_object_buffer() is called by several callers including in builtin/fsck.c. I would feel more comfortable if the relevant [1] caller to parse_object_buffer() was modified instead of parse_object_buffer(), but I'll let others give their opinions too. It's not just you. This seems like a really odd place to put it. Especially because if we have the buffer to pass to this function, then we'd already have incurred the cost to inflate the object. OK. Thanks. I'll try to find the better place to put this check. -Stolee
Re: [PATCH 4/6] commit: use generations in paint_down_to_common()
On Tue, 3 Apr 2018 12:51:41 -0400 Derrick Stolee wrote: > +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, > void *unused) > +{ > + const struct commit *a = a_, *b = b_; > + > + if (a->generation < b->generation) > + return 1; > + else if (a->generation > b->generation) > + return -1; > + > + /* newer commits with larger date first */ > + if (a->date < b->date) > + return 1; > + else if (a->date > b->date) > + return -1; > + return 0; > +} I think it would be clearer if you commented above the first block "newer commits first", then on the second block, "use date as a heuristic to determine newer commit". Other than that, this looks good.
Re: [PATCH 2/6] commit: add generation number to struct commmit
On 04/03, Jeff King wrote: > On Tue, Apr 03, 2018 at 11:05:36AM -0700, Brandon Williams wrote: > > > On 04/03, Derrick Stolee wrote: > > > The generation number of a commit is defined recursively as follows: > > > > > > * If a commit A has no parents, then the generation number of A is one. > > > * If a commit A has parents, then the generation number of A is one > > > more than the maximum generation number among the parents of A. > > > > > > Add a uint32_t generation field to struct commit so we can pass this > > > > Is there any reason to believe this would be too small of a value in the > > future? Or is a 32 bit unsigned good enough? > > The linux kernel took ~10 years to produce 500k commits. Even assuming > those were all linear (and they're not), that gives us ~80,000 years of > leeway. So even if the pace of development speeds up or we have a > quicker project, it still seems we have a pretty reasonable safety > margin. > > -Peff I figured as much, but just wanted to check since the windows folks seems to produce commits pretty quickly. -- Brandon Williams
Re: [PATCH 4/6] commit: use generations in paint_down_to_common()
On Tue, Apr 3, 2018 at 9:51 AM, Derrick Stolee wrote: > Define compare_commits_by_gen_then_commit_date(), which uses generation > numbers as a primary comparison and commit date to break ties (or as a > comparison when both commits do not have computed generation numbers). > > Since the commit-graph file is closed under reachability, we know that > all commits in the file have generation at most GENERATION_NUMBER_MAX > which is less than GENERATION_NUMBER_UNDEF. > > This change does not affect the number of commits that are walked during > the execution of paint_down_to_common(), only the order that those > commits are inspected. In the case that commit dates violate topological > order (i.e. a parent is "newer" than a child), the previous code could > walk a commit twice: if a commit is reached with the PARENT1 bit, but > later is re-visited with the PARENT2 bit, then that PARENT2 bit must be > propagated to its parents. Using generation numbers avoids this extra > effort, even if it is somewhat rare. This patch (or later in this series) may want to touch Documentation/technical/commit-graph.txt, that mentions this in the section of Future Work: - After computing and storing generation numbers, we must make graph walks aware of generation numbers to gain the performance benefits they enable. This will mostly be accomplished by swapping a commit-date-ordered priority queue with one ordered by generation number. The following operations are important candidates: - paint_down_to_common() - 'log --topo-order' The paint down to common is only internal, not exposed to the user for ordering, i.e. the topological ordering is still ordering commits in a branch adjacent? Thanks, Stefan
Re: [PATCH 2/6] commit: add generation number to struct commmit
On 4/3/2018 2:28 PM, Jeff King wrote: On Tue, Apr 03, 2018 at 11:05:36AM -0700, Brandon Williams wrote: On 04/03, Derrick Stolee wrote: The generation number of a commit is defined recursively as follows: * If a commit A has no parents, then the generation number of A is one. * If a commit A has parents, then the generation number of A is one more than the maximum generation number among the parents of A. Add a uint32_t generation field to struct commit so we can pass this Is there any reason to believe this would be too small of a value in the future? Or is a 32 bit unsigned good enough? The linux kernel took ~10 years to produce 500k commits. Even assuming those were all linear (and they're not), that gives us ~80,000 years of leeway. So even if the pace of development speeds up or we have a quicker project, it still seems we have a pretty reasonable safety margin. That, and larger projects do not have linear histories. Despite having almost 2 million reachable commits, the Windows repository has maximum generation number ~100,000. Thanks, -Stolee
Re: [PATCH 3/6] commit-graph: compute generation numbers
On Tue, 3 Apr 2018 12:51:40 -0400 Derrick Stolee wrote: > + if ((*list)->generation != GENERATION_NUMBER_UNDEF) { > + if ((*list)->generation > GENERATION_NUMBER_MAX) > + die("generation number %u is too large to store > in commit-graph", > + (*list)->generation); > + packedDate[0] |= htonl((*list)->generation << 2); > + } The die() should have "BUG:" if you agree with my comment below. > +static void compute_generation_numbers(struct commit** commits, > +int nr_commits) Style: space before **, not after. > + if (all_parents_computed) { > + current->generation = max_generation + 1; > + pop_commit(&list); > + } I think the current->generation should be clamped to _MAX here. If we do, then the die() I mentioned in my first comment will have "BUG:", since we are never meant to write any number larger than _MAX in ->generation.
Re: [PATCH 0/6] Compute and consume generation numbers
On 4/3/2018 2:03 PM, Brandon Williams wrote: On 04/03, Derrick Stolee wrote: This is the first of several "small" patches that follow the serialized Git commit graph patch (ds/commit-graph). As described in Documentation/technical/commit-graph.txt, the generation number of a commit is one more than the maximum generation number among its parents (trivially, a commit with no parents has generation number one). Thanks for ensuring that this is defined and documented somewhere :) This series makes the computation of generation numbers part of the commit-graph write process. Finally, generation numbers are used to order commits in the priority queue in paint_down_to_common(). This allows a constant-time check in queue_has_nonstale() instead of the previous linear-time check. This does not have a significant performance benefit in repositories of normal size, but in the Windows repository, some merge-base calculations improve from 3.1s to 2.9s. A modest speedup, but provides an actual consumer of generation numbers as a starting point. A more substantial refactoring of revision.c is required before making 'git log --graph' use generation numbers effectively. log --graph should benefit a lot more from this correct? I know we've talked a bit about negotiation and I wonder if these generation numbers should be able to help out a little bit with that some day. 'log --graph' should be a HUGE speedup, when it is refactored. Since the topo-order can "stream" commits to the pager, it can be very responsive to return the graph in almost all conditions. (The case where generation numbers are not enough is when filters reduce the set of displayed commits to be very sparse, so many commits are walked anyway.) If we have generic "can X reach Y?" queries, then we can also use generation numbers there to great effect (by not walking commits Z with gen(Z) <= gen(Y)). Perhaps I should look at that "git branch --contains" thread for ideas. For negotiation, there are some things we can do here. VSTS uses generation numbers as a heuristic for determining "all wants connected to haves" which is a condition for halting negotiation. The idea is very simple, and I'd be happy to discuss it on a separate thread. Thanks, -Stolee
Re: [PATCH 1/6] object.c: parse commit in graph first
On Tue, Apr 03, 2018 at 11:21:36AM -0700, Jonathan Tan wrote: > On Tue, 3 Apr 2018 12:51:38 -0400 > Derrick Stolee wrote: > > > Most code paths load commits using lookup_commit() and then > > parse_commit(). In some cases, including some branch lookups, the commit > > is parsed using parse_object_buffer() which side-steps parse_commit() in > > favor of parse_commit_buffer(). > > > > Before adding generation numbers to the commit-graph, we need to ensure > > that any commit that exists in the graph is loaded from the graph, so > > check parse_commit_in_graph() before calling parse_commit_buffer(). > > > > Signed-off-by: Derrick Stolee > > Modifying parse_object_buffer() is the most pragmatic way to accomplish > this, but this also means that parse_object_buffer() now potentially > reads from the local object store (instead of only relying on what's in > memory and what's in the provided buffer). parse_object_buffer() is > called by several callers including in builtin/fsck.c. I would feel more > comfortable if the relevant [1] caller to parse_object_buffer() was > modified instead of parse_object_buffer(), but I'll let others give > their opinions too. It's not just you. This seems like a really odd place to put it. Especially because if we have the buffer to pass to this function, then we'd already have incurred the cost to inflate the object. -Peff
Re: [PATCH 2/6] commit: add generation number to struct commmit
On Tue, Apr 03, 2018 at 11:05:36AM -0700, Brandon Williams wrote: > On 04/03, Derrick Stolee wrote: > > The generation number of a commit is defined recursively as follows: > > > > * If a commit A has no parents, then the generation number of A is one. > > * If a commit A has parents, then the generation number of A is one > > more than the maximum generation number among the parents of A. > > > > Add a uint32_t generation field to struct commit so we can pass this > > Is there any reason to believe this would be too small of a value in the > future? Or is a 32 bit unsigned good enough? The linux kernel took ~10 years to produce 500k commits. Even assuming those were all linear (and they're not), that gives us ~80,000 years of leeway. So even if the pace of development speeds up or we have a quicker project, it still seems we have a pretty reasonable safety margin. -Peff
Re: [PATCH 2/6] commit: add generation number to struct commmit
On Tue, 3 Apr 2018 12:51:39 -0400 Derrick Stolee wrote: > The generation number of a commit is defined recursively as follows: > > * If a commit A has no parents, then the generation number of A is one. > * If a commit A has parents, then the generation number of A is one > more than the maximum generation number among the parents of A. > > Add a uint32_t generation field to struct commit so we can pass this > information to revision walks. We use two special values to signal > the generation number is invalid: > > GENERATION_NUMBER_UNDEF 0x > GENERATION_NUMBER_NONE 0 > > The first (_UNDEF) means the generation number has not been loaded or > computed. The second (_NONE) means the generation number was loaded > from a commit graph file that was stored before generation numbers > were computed. > > Signed-off-by: Derrick Stolee This looks straightforward and correct, thanks. I think some of the description above should appear as code comments. > +#define GENERATION_NUMBER_UNDEF 0x > +#define GENERATION_NUMBER_NONE 0 I would include the description above here as documentation, and would replace "was stored before generation numbers were computed" by "was written by a version of Git that did not support generation numbers".
Re: [PATCH 3/3] commit-graph: lazy-load trees
On 4/3/2018 2:00 PM, Stefan Beller wrote: On Tue, Apr 3, 2018 at 5:00 AM, Derrick Stolee wrote: The commit-graph file provides quick access to commit data, including the OID of the root tree for each commit in the graph. When performing a deep commit-graph walk, we may not need to load most of the trees for these commits. Delay loading the tree object for a commit loaded from the graph until requested via get_commit_tree(). Do not lazy-load trees for commits not in the graph, since that requires duplicate parsing and the relative peformance improvement when trees are not needed is small. On the Linux repository, performance tests were run for the following command: git log --graph --oneline -1000 Before: 0.83s After: 0.65s Rel %: -21.6% This is an awesome speedup. Adding '-- kernel/' to the command requires loading the root tree for every commit that is walked. and as the walk prunes those commits that do not touch kernel/ which from my quick glance is the real core thing. Linus' announcements claim that > 50% is drivers, networking and documentation[1]. So the "-- kernel/" walk needs to walk twice as many commits to find a thousand commits that actually touch kernel/ ? [1] http://lkml.iu.edu/hypermail/linux/kernel/1801.3/02794.html http://lkml.iu.edu/hypermail/linux/kernel/1803.3/00580.html There was no measureable performance change as a result of this patch. ... which means that the walking itself is really fast now and the dominating effects are setup and checking the tree? Yeah. I was concerned that since we take two accesses into the commit-graph file that we could measurably slow down cases where we need to load the trees. That is not an issue since we will likely parse the tree after loading, and parsing is much slower than these commit-graph accesses. Is git smart enough to not load the root tree for "log -- ./" or would we get the desired performance numbers from that? I wonder, since it only really needs the OID of the root tree to determine TREESAME. If it cares about following TREESAME relationships on ./, then it should do that. @@ -317,6 +315,27 @@ int parse_commit_in_graph(struct commit *item) return 0; } +static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c) +{ + struct object_id oid; + const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * (c->graph_pos); What is 16? (I imagine it is the "length of the row" - g->hash_len ?) Would it make sense to have a constant/define for an entire row instead? (By any chance what is the meaning of GRAPH_DATA_WIDTH, which is 36? That is defined but never used.) Yeah, I should use GRAPH_DATA_WIDTH here instead. +struct tree *get_commit_tree_in_graph(const struct commit *c) +{ + if (c->tree) + return c->tree; This double checking is defensive programming, in case someone doesn't check themselves (as get_commit_tree does below). ok. @@ -17,6 +17,13 @@ char *get_commit_graph_filename(const char *obj_dir); */ int parse_commit_in_graph(struct commit *item); +/* + * For performance reasons, a commit loaded from the graph does not + * have a tree loaded until trying to consume it for the first time. That is the theme of this series/patch, but do we need to write it down into the codebase? I'd be inclined to omit this part and only go with: Load the root tree of a commit and return the tree. OK. struct tree *get_commit_tree(const struct commit *commit) { - return commit->tree; + if (commit->tree || !commit->object.parsed) I understand to return the tree from the commit when we have the tree in the commit object (the first part). But 'when we have not (yet) parsed the commit object', we also just return its tree? Could you explain the second part of the condition? Is that for commits that are not part of the commit graph? (But then why does it need to be negated?) Some callers check the value of 'commit->tree' without a guarantee that the commit was parsed. In this case, the way to preserve the existing behavior is to continue returning NULL. If I remove the "|| !commit->object.parsed" then the BUG("commit has NULL tree, but was not loaded from commit-graph") is hit in these two tests: t6012-rev-list-simplify.sh t6110-rev-list-sparse.sh I prefer to keep the BUG() statement and instead use this if statement. If someone has more clarity on why this is a good existing behavior, then please chime in. Thanks, -Stolee
Re: [PATCH 1/6] object.c: parse commit in graph first
On Tue, 3 Apr 2018 12:51:38 -0400 Derrick Stolee wrote: > Most code paths load commits using lookup_commit() and then > parse_commit(). In some cases, including some branch lookups, the commit > is parsed using parse_object_buffer() which side-steps parse_commit() in > favor of parse_commit_buffer(). > > Before adding generation numbers to the commit-graph, we need to ensure > that any commit that exists in the graph is loaded from the graph, so > check parse_commit_in_graph() before calling parse_commit_buffer(). > > Signed-off-by: Derrick Stolee Modifying parse_object_buffer() is the most pragmatic way to accomplish this, but this also means that parse_object_buffer() now potentially reads from the local object store (instead of only relying on what's in memory and what's in the provided buffer). parse_object_buffer() is called by several callers including in builtin/fsck.c. I would feel more comfortable if the relevant [1] caller to parse_object_buffer() was modified instead of parse_object_buffer(), but I'll let others give their opinions too. [1] The caller which, if modified, will result in the speedup to the merge-base calculations in the Windows repository you describe in your cover letter.
Re: [PATCH 2/6] commit: add generation number to struct commmit
On 04/03, Derrick Stolee wrote: > The generation number of a commit is defined recursively as follows: > > * If a commit A has no parents, then the generation number of A is one. > * If a commit A has parents, then the generation number of A is one > more than the maximum generation number among the parents of A. > > Add a uint32_t generation field to struct commit so we can pass this Is there any reason to believe this would be too small of a value in the future? Or is a 32 bit unsigned good enough? > information to revision walks. We use two special values to signal > the generation number is invalid: > > GENERATION_NUMBER_UNDEF 0x > GENERATION_NUMBER_NONE 0 > > The first (_UNDEF) means the generation number has not been loaded or > computed. The second (_NONE) means the generation number was loaded > from a commit graph file that was stored before generation numbers > were computed. > > Signed-off-by: Derrick Stolee > --- > alloc.c| 1 + > commit-graph.c | 2 ++ > commit.h | 3 +++ > 3 files changed, 6 insertions(+) > > diff --git a/alloc.c b/alloc.c > index cf4f8b61e1..1a62e85ac3 100644 > --- a/alloc.c > +++ b/alloc.c > @@ -94,6 +94,7 @@ void *alloc_commit_node(void) > c->object.type = OBJ_COMMIT; > c->index = alloc_commit_index(); > c->graph_pos = COMMIT_NOT_FROM_GRAPH; > + c->generation = GENERATION_NUMBER_UNDEF; > return c; > } > > diff --git a/commit-graph.c b/commit-graph.c > index 1fc63d541b..d24b947525 100644 > --- a/commit-graph.c > +++ b/commit-graph.c > @@ -264,6 +264,8 @@ static int fill_commit_in_graph(struct commit *item, > struct commit_graph *g, uin > date_low = get_be32(commit_data + g->hash_len + 12); > item->date = (timestamp_t)((date_high << 32) | date_low); > > + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; > + > pptr = &item->parents; > > edge_value = get_be32(commit_data + g->hash_len); > diff --git a/commit.h b/commit.h > index e57ae4b583..3cadd386f3 100644 > --- a/commit.h > +++ b/commit.h > @@ -10,6 +10,8 @@ > #include "pretty.h" > > #define COMMIT_NOT_FROM_GRAPH 0x > +#define GENERATION_NUMBER_UNDEF 0x > +#define GENERATION_NUMBER_NONE 0 > > struct commit_list { > struct commit *item; > @@ -24,6 +26,7 @@ struct commit { > struct commit_list *parents; > struct tree *tree; > uint32_t graph_pos; > + uint32_t generation; > }; > > extern int save_commit_buffer; > -- > 2.17.0.rc0 > -- Brandon Williams
Re: [PATCH 0/6] Compute and consume generation numbers
On 04/03, Derrick Stolee wrote: > This is the first of several "small" patches that follow the serialized > Git commit graph patch (ds/commit-graph). > > As described in Documentation/technical/commit-graph.txt, the generation > number of a commit is one more than the maximum generation number among > its parents (trivially, a commit with no parents has generation number > one). Thanks for ensuring that this is defined and documented somewhere :) > > This series makes the computation of generation numbers part of the > commit-graph write process. > > Finally, generation numbers are used to order commits in the priority > queue in paint_down_to_common(). This allows a constant-time check in > queue_has_nonstale() instead of the previous linear-time check. > > This does not have a significant performance benefit in repositories > of normal size, but in the Windows repository, some merge-base > calculations improve from 3.1s to 2.9s. A modest speedup, but provides > an actual consumer of generation numbers as a starting point. > > A more substantial refactoring of revision.c is required before making > 'git log --graph' use generation numbers effectively. log --graph should benefit a lot more from this correct? I know we've talked a bit about negotiation and I wonder if these generation numbers should be able to help out a little bit with that some day. > > This patch series depends on v7 of ds/commit-graph. > > Derrick Stolee (6): > object.c: parse commit in graph first > commit: add generation number to struct commmit > commit-graph: compute generation numbers > commit: sort by generation number in paint_down_to_common() > commit.c: use generation number to stop merge-base walks > commit-graph.txt: update design doc with generation numbers > > Documentation/technical/commit-graph.txt | 7 +--- > alloc.c | 1 + > commit-graph.c | 48 + > commit.c | 53 > commit.h | 7 +++- > object.c | 4 +- > 6 files changed, 104 insertions(+), 16 deletions(-) > > -- > 2.17.0.20.g9f30ba16e1 > -- Brandon Williams
Re: [PATCH 3/3] commit-graph: lazy-load trees
On Tue, Apr 3, 2018 at 5:00 AM, Derrick Stolee wrote: > The commit-graph file provides quick access to commit data, including > the OID of the root tree for each commit in the graph. When performing > a deep commit-graph walk, we may not need to load most of the trees > for these commits. > > Delay loading the tree object for a commit loaded from the graph > until requested via get_commit_tree(). Do not lazy-load trees for > commits not in the graph, since that requires duplicate parsing > and the relative peformance improvement when trees are not needed > is small. > > On the Linux repository, performance tests were run for the following > command: > > git log --graph --oneline -1000 > > Before: 0.83s > After: 0.65s > Rel %: -21.6% This is an awesome speedup. > > Adding '-- kernel/' to the command requires loading the root tree > for every commit that is walked. and as the walk prunes those commits that do not touch kernel/ which from my quick glance is the real core thing. Linus' announcements claim that > 50% is drivers, networking and documentation[1]. So the "-- kernel/" walk needs to walk twice as many commits to find a thousand commits that actually touch kernel/ ? [1] http://lkml.iu.edu/hypermail/linux/kernel/1801.3/02794.html http://lkml.iu.edu/hypermail/linux/kernel/1803.3/00580.html > There was no measureable performance > change as a result of this patch. ... which means that the walking itself is really fast now and the dominating effects are setup and checking the tree? Is git smart enough to not load the root tree for "log -- ./" or would we get the desired performance numbers from that? > @@ -317,6 +315,27 @@ int parse_commit_in_graph(struct commit *item) > return 0; > } > > +static struct tree *load_tree_for_commit(struct commit_graph *g, struct > commit *c) > +{ > + struct object_id oid; > + const unsigned char *commit_data = g->chunk_commit_data + > (g->hash_len + 16) * (c->graph_pos); What is 16? (I imagine it is the "length of the row" - g->hash_len ?) Would it make sense to have a constant/define for an entire row instead? (By any chance what is the meaning of GRAPH_DATA_WIDTH, which is 36? That is defined but never used.) > +struct tree *get_commit_tree_in_graph(const struct commit *c) > +{ > + if (c->tree) > + return c->tree; This double checking is defensive programming, in case someone doesn't check themselves (as get_commit_tree does below). ok. > @@ -17,6 +17,13 @@ char *get_commit_graph_filename(const char *obj_dir); > */ > int parse_commit_in_graph(struct commit *item); > > +/* > + * For performance reasons, a commit loaded from the graph does not > + * have a tree loaded until trying to consume it for the first time. That is the theme of this series/patch, but do we need to write it down into the codebase? I'd be inclined to omit this part and only go with: Load the root tree of a commit and return the tree. > struct tree *get_commit_tree(const struct commit *commit) > { > - return commit->tree; > + if (commit->tree || !commit->object.parsed) I understand to return the tree from the commit when we have the tree in the commit object (the first part). But 'when we have not (yet) parsed the commit object', we also just return its tree? Could you explain the second part of the condition? Is that for commits that are not part of the commit graph? (But then why does it need to be negated?) Thanks, Stefan
[PATCH] commit: allow partial commits with relative paths
Commit 8894d53580 (commit: allow partial commits with relative paths, 2011-07-30) ensured that partial commits were allowed when a user supplies a relative pathspec but then this was regressed in 5879f5684c (remove prefix argument from pathspec_prefix, 2011-09-04) when the prefix argument to 'pathspec_prefix' removed and the 'list_paths' function wasn't properly adjusted to cope with the change, resulting in over-eager pruning of the tree that is overlayed on the index. This fixes the regression and adds a regression test so this can be prevented in the future. Signed-off-by: Brandon Williams --- builtin/commit.c | 3 +-- t/t7501-commit.sh | 12 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 37fcb55ab0..5571d4a3e2 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -218,8 +218,7 @@ static int list_paths(struct string_list *list, const char *with_tree, if (with_tree) { char *max_prefix = common_prefix(pattern); - overlay_tree_on_index(&the_index, with_tree, - max_prefix ? max_prefix : prefix); + overlay_tree_on_index(&the_index, with_tree, max_prefix); free(max_prefix); } diff --git a/t/t7501-commit.sh b/t/t7501-commit.sh index fa61b1a4ee..9dbbd01fc0 100755 --- a/t/t7501-commit.sh +++ b/t/t7501-commit.sh @@ -52,6 +52,18 @@ test_expect_success PERL 'can use paths with --interactive' ' git reset --hard HEAD^ ' +test_expect_success 'removed files and relative paths' ' + test_when_finished "rm -rf foo" && + git init foo && + >foo/foo.txt && + git -C foo add foo.txt && + git -C foo commit -m first && + git -C foo rm foo.txt && + + mkdir -p foo/bar && + git -C foo/bar commit -m second ../foo.txt +' + test_expect_success 'using invalid commit with -C' ' test_must_fail git commit --allow-empty -C bogus ' -- 2.17.0.rc1.321.gba9d0f2565-goog
[PATCH] l10n: de.po: fix a 'add --interactive' message
Noticed-by: Matthias Rüster Signed-off-by: Ralf Thielow --- po/de.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/po/de.po b/po/de.po index 793bd2a80..257f527d6 100644 --- a/po/de.po +++ b/po/de.po @@ -16991,7 +16991,7 @@ msgstr "Löschung im Arbeitsverzeichnis verwerfen [y,n,q,a,d%s,?]? " #: git-add--interactive.perl:1405 #, perl-format msgid "Discard this hunk from worktree [y,n,q,a,d%s,?]? " -msgstr "diesen Patch-Block im Arbeitsverzeichnis verwerfen [y,n,q,a,d%s,?]? " +msgstr "Diesen Patch-Block im Arbeitsverzeichnis verwerfen [y,n,q,a,d%s,?]? " #: git-add--interactive.perl:1408 #, perl-format -- 2.17.0.484.g0c8726318
Re: [PATCH] l10n: de.po: translate 132 new messages
2018-04-02 20:09 GMT+02:00 Matthias Rüster : > Hi Ralf, > > thanks a lot for your translations! > > I've only found a small issue: > >> #: git-add--interactive.perl:1405 >> -#, fuzzy, perl-format >> +#, perl-format >> msgid "Discard this hunk from worktree [y,n,q,a,d%s,?]? " >> -msgstr "diesen Patch-Block im Arbeitsverzeichnis verwerfen >> [y,n,q,a,d,/%s,?]? " >> +msgstr "diesen Patch-Block im Arbeitsverzeichnis verwerfen >> [y,n,q,a,d%s,?]? " > > > "Diesen ..." > Hi Matthias, Thanks for the review! I'll send a fix for this since this version has already been merged. Ralf > > Kind regards, > Matthias
[PATCH] l10n: TEAMS: remove inactive de team members
Thanks for your contributions! Signed-off-by: Ralf Thielow --- po/TEAMS | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/po/TEAMS b/po/TEAMS index 065771cfe..762879550 100644 --- a/po/TEAMS +++ b/po/TEAMS @@ -13,12 +13,8 @@ Members: Alex Henrie Language: de (German) Repository:https://github.com/ralfth/git-po-de Leader:Ralf Thielow -Members: Thomas Rast - Jan Krüger - Christian Stimming +Members: Matthias Rüster Phillip Szelat - Matthias Rüster - Magnus Görlitz Language: es (Spanish) Repository:https://github.com/ChrisADR/git-po -- 2.17.0.484.g0c8726318
Re: [PATCH 0/6] Compute and consume generation numbers
On 4/3/2018 12:51 PM, Derrick Stolee wrote: This is the first of several "small" patches that follow the serialized Git commit graph patch (ds/commit-graph). As described in Documentation/technical/commit-graph.txt, the generation number of a commit is one more than the maximum generation number among its parents (trivially, a commit with no parents has generation number one). This series makes the computation of generation numbers part of the commit-graph write process. Finally, generation numbers are used to order commits in the priority queue in paint_down_to_common(). This allows a constant-time check in queue_has_nonstale() instead of the previous linear-time check. This does not have a significant performance benefit in repositories of normal size, but in the Windows repository, some merge-base calculations improve from 3.1s to 2.9s. A modest speedup, but provides an actual consumer of generation numbers as a starting point. A more substantial refactoring of revision.c is required before making 'git log --graph' use generation numbers effectively. This patch series depends on v7 of ds/commit-graph. Derrick Stolee (6): object.c: parse commit in graph first commit: add generation number to struct commmit commit-graph: compute generation numbers commit: sort by generation number in paint_down_to_common() commit.c: use generation number to stop merge-base walks commit-graph.txt: update design doc with generation numbers This patch is also available as a GitHub pull request [1] [1] https://github.com/derrickstolee/git/pull/5
[PATCH 2/6] commit: add generation number to struct commmit
The generation number of a commit is defined recursively as follows: * If a commit A has no parents, then the generation number of A is one. * If a commit A has parents, then the generation number of A is one more than the maximum generation number among the parents of A. Add a uint32_t generation field to struct commit so we can pass this information to revision walks. We use two special values to signal the generation number is invalid: GENERATION_NUMBER_UNDEF 0x GENERATION_NUMBER_NONE 0 The first (_UNDEF) means the generation number has not been loaded or computed. The second (_NONE) means the generation number was loaded from a commit graph file that was stored before generation numbers were computed. Signed-off-by: Derrick Stolee --- alloc.c| 1 + commit-graph.c | 2 ++ commit.h | 3 +++ 3 files changed, 6 insertions(+) diff --git a/alloc.c b/alloc.c index cf4f8b61e1..1a62e85ac3 100644 --- a/alloc.c +++ b/alloc.c @@ -94,6 +94,7 @@ void *alloc_commit_node(void) c->object.type = OBJ_COMMIT; c->index = alloc_commit_index(); c->graph_pos = COMMIT_NOT_FROM_GRAPH; + c->generation = GENERATION_NUMBER_UNDEF; return c; } diff --git a/commit-graph.c b/commit-graph.c index 1fc63d541b..d24b947525 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -264,6 +264,8 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin date_low = get_be32(commit_data + g->hash_len + 12); item->date = (timestamp_t)((date_high << 32) | date_low); + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + pptr = &item->parents; edge_value = get_be32(commit_data + g->hash_len); diff --git a/commit.h b/commit.h index e57ae4b583..3cadd386f3 100644 --- a/commit.h +++ b/commit.h @@ -10,6 +10,8 @@ #include "pretty.h" #define COMMIT_NOT_FROM_GRAPH 0x +#define GENERATION_NUMBER_UNDEF 0x +#define GENERATION_NUMBER_NONE 0 struct commit_list { struct commit *item; @@ -24,6 +26,7 @@ struct commit { struct commit_list *parents; struct tree *tree; uint32_t graph_pos; + uint32_t generation; }; extern int save_commit_buffer; -- 2.17.0.rc0
[PATCH 3/6] commit-graph: compute generation numbers
While preparing commits to be written into a commit-graph file, compute the generation numbers using a depth-first strategy. The only commits that are walked in this depth-first search are those without a precomputed generation number. Thus, computation time will be relative to the number of new commits to the commit-graph file. Signed-off-by: Derrick Stolee --- commit-graph.c | 46 ++ commit.h | 1 + 2 files changed, 47 insertions(+) diff --git a/commit-graph.c b/commit-graph.c index d24b947525..b80c8ad80e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -419,6 +419,13 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, else packedDate[0] = 0; + if ((*list)->generation != GENERATION_NUMBER_UNDEF) { + if ((*list)->generation > GENERATION_NUMBER_MAX) + die("generation number %u is too large to store in commit-graph", + (*list)->generation); + packedDate[0] |= htonl((*list)->generation << 2); + } + packedDate[1] = htonl((*list)->date); hashwrite(f, packedDate, 8); @@ -551,6 +558,43 @@ static void close_reachable(struct packed_oid_list *oids) } } +static void compute_generation_numbers(struct commit** commits, + int nr_commits) +{ + int i; + struct commit_list *list = NULL; + + for (i = 0; i < nr_commits; i++) { + if (commits[i]->generation != GENERATION_NUMBER_UNDEF && + commits[i]->generation != GENERATION_NUMBER_NONE) + continue; + + commit_list_insert(commits[i], &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_generation = 0; + + for (parent = current->parents; parent; parent = parent->next) { + if (parent->item->generation == GENERATION_NUMBER_UNDEF || + parent->item->generation == GENERATION_NUMBER_NONE) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } else if (parent->item->generation > max_generation) { + max_generation = parent->item->generation; + } + } + + if (all_parents_computed) { + current->generation = max_generation + 1; + pop_commit(&list); + } + } + } +} + void write_commit_graph(const char *obj_dir, const char **pack_indexes, int nr_packs, @@ -674,6 +718,8 @@ void write_commit_graph(const char *obj_dir, if (commits.nr >= GRAPH_PARENT_MISSING) die(_("too many commits to write graph")); + compute_generation_numbers(commits.list, commits.nr); + graph_name = get_commit_graph_filename(obj_dir); fd = hold_lock_file_for_update(&lk, graph_name, 0); diff --git a/commit.h b/commit.h index 3cadd386f3..bc7a3186c5 100644 --- a/commit.h +++ b/commit.h @@ -11,6 +11,7 @@ #define COMMIT_NOT_FROM_GRAPH 0x #define GENERATION_NUMBER_UNDEF 0x +#define GENERATION_NUMBER_MAX 0x3FFF #define GENERATION_NUMBER_NONE 0 struct commit_list { -- 2.17.0.rc0
[PATCH 1/6] object.c: parse commit in graph first
Most code paths load commits using lookup_commit() and then parse_commit(). In some cases, including some branch lookups, the commit is parsed using parse_object_buffer() which side-steps parse_commit() in favor of parse_commit_buffer(). Before adding generation numbers to the commit-graph, we need to ensure that any commit that exists in the graph is loaded from the graph, so check parse_commit_in_graph() before calling parse_commit_buffer(). Signed-off-by: Derrick Stolee --- object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/object.c b/object.c index e6ad3f61f0..4cd3e98e04 100644 --- a/object.c +++ b/object.c @@ -3,6 +3,7 @@ #include "blob.h" #include "tree.h" #include "commit.h" +#include "commit-graph.h" #include "tag.h" static struct object **obj_hash; @@ -207,7 +208,8 @@ struct object *parse_object_buffer(const struct object_id *oid, enum object_type } else if (type == OBJ_COMMIT) { struct commit *commit = lookup_commit(oid); if (commit) { - if (parse_commit_buffer(commit, buffer, size)) + if (!parse_commit_in_graph(commit) && + parse_commit_buffer(commit, buffer, size)) return NULL; if (!get_cached_commit_buffer(commit, NULL)) { set_commit_buffer(commit, buffer, size); -- 2.17.0.rc0
[PATCH 4/6] commit: use generations in paint_down_to_common()
Define compare_commits_by_gen_then_commit_date(), which uses generation numbers as a primary comparison and commit date to break ties (or as a comparison when both commits do not have computed generation numbers). Since the commit-graph file is closed under reachability, we know that all commits in the file have generation at most GENERATION_NUMBER_MAX which is less than GENERATION_NUMBER_UNDEF. This change does not affect the number of commits that are walked during the execution of paint_down_to_common(), only the order that those commits are inspected. In the case that commit dates violate topological order (i.e. a parent is "newer" than a child), the previous code could walk a commit twice: if a commit is reached with the PARENT1 bit, but later is re-visited with the PARENT2 bit, then that PARENT2 bit must be propagated to its parents. Using generation numbers avoids this extra effort, even if it is somewhat rare. Signed-off-by: Derrick Stolee --- commit.c | 19 ++- commit.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/commit.c b/commit.c index 3e39c86abf..95ae7e13a3 100644 --- a/commit.c +++ b/commit.c @@ -624,6 +624,23 @@ static int compare_commits_by_author_date(const void *a_, const void *b_, return 0; } +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused) +{ + const struct commit *a = a_, *b = b_; + + if (a->generation < b->generation) + return 1; + else if (a->generation > b->generation) + return -1; + + /* newer commits with larger date first */ + if (a->date < b->date) + return 1; + else if (a->date > b->date) + return -1; + return 0; +} + int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) { const struct commit *a = a_, *b = b_; @@ -773,7 +790,7 @@ static int queue_has_nonstale(struct prio_queue *queue) /* all input commits in one and twos[] must have been parsed! */ static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) { - struct prio_queue queue = { compare_commits_by_commit_date }; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; diff --git a/commit.h b/commit.h index bc7a3186c5..cb97b7636a 100644 --- a/commit.h +++ b/commit.h @@ -332,6 +332,7 @@ extern int remove_signature(struct strbuf *buf); extern int check_commit_signature(const struct commit *commit, struct signature_check *sigc); int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused); +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused); LAST_ARG_MUST_BE_NULL extern int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...); -- 2.17.0.rc0
[PATCH 5/6] commit.c: use generation to halt paint walk
In paint_down_to_common(), the walk is halted when the queue contains only stale commits. The queue_has_nonstale() method iterates over the entire queue looking for a nonstale commit. In a wide commit graph where the two sides share many commits in common, but have deep sets of different commits, this method may inspect many elements before finding a nonstale commit. In the worst case, this can give quadratic performance in paint_down_to_common(). Convert queue_has_nonstale() to use generation numbers for an O(1) termination condition. To properly take advantage of this condition, track the minimum generation number of a commit that enters the queue with nonstale status. Signed-off-by: Derrick Stolee --- commit.c | 37 ++--- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/commit.c b/commit.c index 95ae7e13a3..858f4fdbc9 100644 --- a/commit.c +++ b/commit.c @@ -776,14 +776,22 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT); -static int queue_has_nonstale(struct prio_queue *queue) +static int queue_has_nonstale(struct prio_queue *queue, uint32_t min_gen) { - int i; - for (i = 0; i < queue->nr; i++) { - struct commit *commit = queue->array[i].data; - if (!(commit->object.flags & STALE)) - return 1; + if (min_gen != GENERATION_NUMBER_UNDEF) { + if (queue->nr > 0) { + struct commit *commit = queue->array[0].data; + return commit->generation >= min_gen; + } + } else { + int i; + for (i = 0; i < queue->nr; i++) { + struct commit *commit = queue->array[i].data; + if (!(commit->object.flags & STALE)) + return 1; + } } + return 0; } @@ -793,6 +801,8 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; + uint32_t last_gen = GENERATION_NUMBER_UNDEF; + uint32_t min_nonstale_gen = GENERATION_NUMBER_UNDEF; one->object.flags |= PARENT1; if (!n) { @@ -800,17 +810,26 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc return result; } prio_queue_put(&queue, one); + if (one->generation < min_nonstale_gen) + min_nonstale_gen = one->generation; for (i = 0; i < n; i++) { twos[i]->object.flags |= PARENT2; prio_queue_put(&queue, twos[i]); + if (twos[i]->generation < min_nonstale_gen) + min_nonstale_gen = twos[i]->generation; } - while (queue_has_nonstale(&queue)) { + while (queue_has_nonstale(&queue, min_nonstale_gen)) { struct commit *commit = prio_queue_get(&queue); struct commit_list *parents; int flags; + if (commit->generation > last_gen) + BUG("bad generation skip"); + + last_gen = commit->generation; + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { @@ -830,6 +849,10 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc return NULL; p->object.flags |= flags; prio_queue_put(&queue, p); + + if (!(flags & STALE) && + p->generation < min_nonstale_gen) + min_nonstale_gen = p->generation; } } -- 2.17.0.rc0
[PATCH 6/6] commit-graph.txt: update future work
We now calculate generation numbers in the commit-graph file and use them in paint_down_to_common(). Signed-off-by: Derrick Stolee --- Documentation/technical/commit-graph.txt | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 0550c6d0dc..be68bee43d 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -98,17 +98,12 @@ Future Work - The 'commit-graph' subcommand does not have a "verify" mode that is necessary for integration with fsck. -- The file format includes room for precomputed generation numbers. These - are not currently computed, so all generation numbers will be marked as - 0 (or "uncomputed"). A later patch will include this calculation. - - After computing and storing generation numbers, we must make graph walks aware of generation numbers to gain the performance benefits they enable. This will mostly be accomplished by swapping a commit-date-ordered priority queue with one ordered by generation number. The following - operations are important candidates: + operation is an important candidate: -- paint_down_to_common() - 'log --topo-order' - Currently, parse_commit_gently() requires filling in the root tree -- 2.17.0.rc0
[PATCH 0/6] Compute and consume generation numbers
This is the first of several "small" patches that follow the serialized Git commit graph patch (ds/commit-graph). As described in Documentation/technical/commit-graph.txt, the generation number of a commit is one more than the maximum generation number among its parents (trivially, a commit with no parents has generation number one). This series makes the computation of generation numbers part of the commit-graph write process. Finally, generation numbers are used to order commits in the priority queue in paint_down_to_common(). This allows a constant-time check in queue_has_nonstale() instead of the previous linear-time check. This does not have a significant performance benefit in repositories of normal size, but in the Windows repository, some merge-base calculations improve from 3.1s to 2.9s. A modest speedup, but provides an actual consumer of generation numbers as a starting point. A more substantial refactoring of revision.c is required before making 'git log --graph' use generation numbers effectively. This patch series depends on v7 of ds/commit-graph. Derrick Stolee (6): object.c: parse commit in graph first commit: add generation number to struct commmit commit-graph: compute generation numbers commit: sort by generation number in paint_down_to_common() commit.c: use generation number to stop merge-base walks commit-graph.txt: update design doc with generation numbers Documentation/technical/commit-graph.txt | 7 +--- alloc.c | 1 + commit-graph.c | 48 + commit.c | 53 commit.h | 7 +++- object.c | 4 +- 6 files changed, 104 insertions(+), 16 deletions(-) -- 2.17.0.20.g9f30ba16e1
Re: [PATCH v2 00/15] Assorted fixes for `git config` (including the "empty sections" bug)
Hi team, On Tue, 3 Apr 2018, Johannes Schindelin wrote: > Johannes Schindelin (15): > git_config_set: fix off-by-two > t1300: rename it to reflect that `repo-config` was deprecated > t1300: demonstrate that --replace-all can "invent" newlines > config --replace-all: avoid extra line breaks > t1300: avoid relying on a bug > t1300: remove unreasonable expectation from TODO > t1300: `--unset-all` can leave an empty section behind (bug) > config: introduce an optional event stream while parsing > config: avoid using the global variable `store` > config_set_store: rename some fields for consistency > git_config_set: do not use a state machine > git_config_set: make use of the config parser's event stream > git config --unset: remove empty sections (in the common case) > git_config_set: reuse empty sections > TODOs Please note that the `TODOs` commit is a left-over of my internal book-keeping, and its diff is actually empty. Hence `format-patch` does not even generate a mail for it, so there is no [PATCH v2 15/15]. Thanks, Dscho
[PATCH v2 14/15] git_config_set: reuse empty sections
It can happen quite easily that the last setting in a config section is removed, and to avoid confusion when there are comments in the config about that section, we keep a lone section header, i.e. an empty section. Now that we use the `event_fn` callback, it is easy to add support for re-using empty sections, so let's do that. Note: t5512-ls-remote requires that this change is applied *after* the patch "git config --unset: remove empty sections (in the common case)": without that patch, there would be empty `transfer` and `uploadpack` sections ready for reuse, but in the *wrong* order (and sconsequently, t5512's "overrides work between mixed transfer/upload-pack hideRefs" would fail). Signed-off-by: Johannes Schindelin --- config.c | 14 +- t/t1300-config.sh | 2 +- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/config.c b/config.c index 271e9605ec1..ee7ea24123d 100644 --- a/config.c +++ b/config.c @@ -2345,6 +2345,12 @@ static int store_aux_event(enum config_event_t type, store->parsed[store->parsed_nr].is_keys_section = cf->var.len - 1 == store->baselen && !strncasecmp(cf->var.buf, store->key, store->baselen); + if (store->is_keys_section) { + store->section_seen = 1; + ALLOC_GROW(store->seen, store->seen_nr + 1, + store->seen_alloc); + store->seen[store->seen_nr] = store->parsed_nr; + } } store->parsed_nr++; @@ -2779,7 +2785,13 @@ int git_config_set_multivar_in_file_gently(const char *config_filename, new_line = 0; if (!store.key_seen) { - replace_end = copy_end = store.parsed[j].end; + copy_end = store.parsed[j].end; + /* include '\n' when copying section header */ + if (copy_end > 0 && copy_end < contents_sz && + contents[copy_end - 1] != '\n' && + contents[copy_end] == '\n') + copy_end++; + replace_end = copy_end; } else { replace_end = store.parsed[j].end; copy_end = store.parsed[j].begin; diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 6d34513eedd..6d0e13020d1 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -1463,7 +1463,7 @@ test_expect_success '--unset-all removes section if empty & uncommented' ' test_line_count = 0 .git/config ' -test_expect_failure 'adding a key into an empty section reuses header' ' +test_expect_success 'adding a key into an empty section reuses header' ' cat >.git/config <<-\EOF && [section] EOF -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 12/15] git_config_set: make use of the config parser's event stream
In the recent commit with the title "config: introduce an optional event stream while parsing", we introduced an optional callback to keep track of the config parser's events "comment", "white-space", "section header" and "entry". One motivation for this feature was to make use of it in the code that edits the config. And this commit makes it so. Note: this patch changes the meaning of the `seen` array that records whether we saw the config entry that is to be edited: previously, it contained the end offset of the found entry. Now, we introduce a new array `parsed` that keeps a record of *all* config parser events (with begin/end offsets), and the items in the `seen` array now point into the `parsed` array. There are two reasons why we do it this way: 1. To keep the implementation simple, the config parser's event stream reports the event only after the config callback was called, so we would not receive the begin offset otherwise. 2. In the following patches, we will re-use the `parsed` array to fix two long-standing bugs related to empty sections. Note that this also makes the code more robust with respect to finding the begin offset of the part(s) of the config file to be edited, as we no longer back-track to find the beginning of the line. Signed-off-by: Johannes Schindelin --- config.c | 170 ++- 1 file changed, 81 insertions(+), 89 deletions(-) diff --git a/config.c b/config.c index 84e8f7ffeb8..345b1d2f140 100644 --- a/config.c +++ b/config.c @@ -2303,8 +2303,11 @@ struct config_set_store { int do_not_match; regex_t *value_regex; int multi_replace; - size_t *seen; - unsigned int seen_nr, seen_alloc; + struct { + size_t begin, end; + enum config_event_t type; + } *parsed; + unsigned int parsed_nr, parsed_alloc, *seen, seen_nr, seen_alloc; unsigned int key_seen:1, section_seen:1, is_keys_section:1; }; @@ -2322,10 +2325,31 @@ static int matches(const char *key, const char *value, (value && !regexec(store->value_regex, value, 0, NULL, 0)); } +static int store_aux_event(enum config_event_t type, + size_t begin, size_t end, void *data) +{ + struct config_set_store *store = data; + + ALLOC_GROW(store->parsed, store->parsed_nr + 1, store->parsed_alloc); + store->parsed[store->parsed_nr].begin = begin; + store->parsed[store->parsed_nr].end = end; + store->parsed[store->parsed_nr].type = type; + store->parsed_nr++; + + if (type == CONFIG_EVENT_SECTION) { + if (cf->var.len < 2 || cf->var.buf[cf->var.len - 1] != '.') + BUG("Invalid section name '%s'", cf->var.buf); + + /* Is this the section we were looking for? */ + store->is_keys_section = cf->var.len - 1 == store->baselen && + !strncasecmp(cf->var.buf, store->key, store->baselen); + } + + return 0; +} + static int store_aux(const char *key, const char *value, void *cb) { - const char *ep; - size_t section_len; struct config_set_store *store = cb; if (store->key_seen) { @@ -2337,55 +2361,21 @@ static int store_aux(const char *key, const char *value, void *cb) ALLOC_GROW(store->seen, store->seen_nr + 1, store->seen_alloc); - store->seen[store->seen_nr] = cf->do_ftell(cf); + store->seen[store->seen_nr] = store->parsed_nr; store->seen_nr++; } - return 0; } else if (store->is_keys_section) { /* -* What we are looking for is in store->key (both -* section and var), and its section part is baselen -* long. We found key (again, both section and var). -* We would want to know if this key is in the same -* section as what we are looking for. We already -* know we are in the same section as what should -* hold store->key. +* Do not increment matches yet: this may not be a match, but we +* are in the desired section. */ - ep = strrchr(key, '.'); - section_len = ep - key; - - if ((section_len != store->baselen) || - memcmp(key, store->key, section_len+1)) { - store->is_keys_section = 0; - return 0; - } - /* -* Do not increment matches: this is no match, but we -* just made sure we are in the desired section. -*/ - ALLOC_GROW(store->seen, store->seen_nr + 1, - store->seen_alloc); - store->seen[store->seen_nr] = cf->do_ftel
[PATCH v2 10/15] config_set_store: rename some fields for consistency
The `seen` field is the actual length of the `offset` array, and the `offset_alloc` field records what was allocated (to avoid resizing wherever `seen` has to be incremented). Elsewhere, we use the convention `name` for the array, where `name` is descriptive enough to guess its purpose, `name_nr` for the actual length and `name_alloc` to record the maximum length without needing to resize. Let's make the names of the fields in question consistent with that convention. This will also help with the next steps where we will let the git_config_set() machinery use the config event stream that we just introduced. Signed-off-by: Johannes Schindelin --- config.c | 63 +++ 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/config.c b/config.c index 90ae71cb905..b73b48b5650 100644 --- a/config.c +++ b/config.c @@ -2303,10 +2303,9 @@ struct config_set_store { int do_not_match; regex_t *value_regex; int multi_replace; - size_t *offset; - unsigned int offset_alloc; + size_t *seen; + unsigned int seen_nr, seen_alloc; enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; - unsigned int seen; }; static int matches(const char *key, const char *value, @@ -2332,15 +2331,15 @@ static int store_aux(const char *key, const char *value, void *cb) switch (store->state) { case KEY_SEEN: if (matches(key, value, store)) { - if (store->seen == 1 && store->multi_replace == 0) { + if (store->seen_nr == 1 && store->multi_replace == 0) { warning(_("%s has multiple values"), key); } - ALLOC_GROW(store->offset, store->seen + 1, - store->offset_alloc); + ALLOC_GROW(store->seen, store->seen_nr + 1, + store->seen_alloc); - store->offset[store->seen] = cf->do_ftell(cf); - store->seen++; + store->seen[store->seen_nr] = cf->do_ftell(cf); + store->seen_nr++; } break; case SECTION_SEEN: @@ -2366,26 +2365,26 @@ static int store_aux(const char *key, const char *value, void *cb) * Do not increment matches: this is no match, but we * just made sure we are in the desired section. */ - ALLOC_GROW(store->offset, store->seen + 1, - store->offset_alloc); - store->offset[store->seen] = cf->do_ftell(cf); + ALLOC_GROW(store->seen, store->seen_nr + 1, + store->seen_alloc); + store->seen[store->seen_nr] = cf->do_ftell(cf); /* fallthru */ case SECTION_END_SEEN: case START: if (matches(key, value, store)) { - ALLOC_GROW(store->offset, store->seen + 1, - store->offset_alloc); - store->offset[store->seen] = cf->do_ftell(cf); + ALLOC_GROW(store->seen, store->seen_nr + 1, + store->seen_alloc); + store->seen[store->seen_nr] = cf->do_ftell(cf); store->state = KEY_SEEN; - store->seen++; + store->seen_nr++; } else { if (strrchr(key, '.') - key == store->baselen && !strncmp(key, store->key, store->baselen)) { store->state = SECTION_SEEN; - ALLOC_GROW(store->offset, - store->seen + 1, - store->offset_alloc); - store->offset[store->seen] = + ALLOC_GROW(store->seen, + store->seen_nr + 1, + store->seen_alloc); + store->seen[store->seen_nr] = cf->do_ftell(cf); } } @@ -2645,10 +2644,10 @@ int git_config_set_multivar_in_file_gently(const char *config_filename, } } - ALLOC_GROW(store.offset, 1, store.offset_alloc); - store.offset[0] = 0; + ALLOC_GROW(store.seen, 1, store.seen_alloc); + store.seen[0] = 0; store.state = START; - store.seen = 0; + store.seen_nr = 0; /* * After this, store.offset will contain the *end* offset @@ -2676,
[PATCH v2 13/15] git config --unset: remove empty sections (in the common case)
The original reasoning for not removing section headers upon removal of the last entry went like this: the user could have added comments about the section, or about the entries therein, and if there were other comments there, we would not know whether we should remove them. In particular, a concocted example was presented that looked like this (and was added to t1300): # some generic comment on the configuration file itself # a comment specific to this "section" section. [section] # some intervening lines # that should also be dropped key = value # please be careful when you update the above variable The ideal thing for `git config --unset section.key` in this case would be to leave only the first line behind, because all the other comments are now obsolete. However, this is unfeasible, short of adding a complete Natural Language Processing module to Git, which seems not only a lot of work, but a totally unreasonable feature (for little benefit to most users). Now, the real kicker about this problem is: most users do not edit their config files at all! In their use case, the config looks like this instead: [section] key = value ... and it is totally obvious what should happen if the entry is removed: the entire section should vanish. Let's generalize this observation to this conservative strategy: if we are removing the last entry from a section, and there are no comments inside that section nor surrounding it, then remove the entire section. Otherwise behave as before: leave the now-empty section (including those comments, even ones about the now-deleted entry). We have to be extra careful to handle the case where more than one entry is removed: any subset of them might be the last entries of their respective sections (and if there are no comments in or around that section, the section should be removed, too). Signed-off-by: Johannes Schindelin --- config.c | 93 +-- t/t1300-config.sh | 4 +-- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/config.c b/config.c index 345b1d2f140..271e9605ec1 100644 --- a/config.c +++ b/config.c @@ -2306,6 +2306,7 @@ struct config_set_store { struct { size_t begin, end; enum config_event_t type; + int is_keys_section; } *parsed; unsigned int parsed_nr, parsed_alloc, *seen, seen_nr, seen_alloc; unsigned int key_seen:1, section_seen:1, is_keys_section:1; @@ -2334,17 +2335,20 @@ static int store_aux_event(enum config_event_t type, store->parsed[store->parsed_nr].begin = begin; store->parsed[store->parsed_nr].end = end; store->parsed[store->parsed_nr].type = type; - store->parsed_nr++; if (type == CONFIG_EVENT_SECTION) { if (cf->var.len < 2 || cf->var.buf[cf->var.len - 1] != '.') BUG("Invalid section name '%s'", cf->var.buf); /* Is this the section we were looking for? */ - store->is_keys_section = cf->var.len - 1 == store->baselen && + store->is_keys_section = + store->parsed[store->parsed_nr].is_keys_section = + cf->var.len - 1 == store->baselen && !strncasecmp(cf->var.buf, store->key, store->baselen); } + store->parsed_nr++; + return 0; } @@ -2476,6 +2480,87 @@ static ssize_t write_pair(int fd, const char *key, const char *value, return ret; } +/* + * If we are about to unset the last key(s) in a section, and if there are + * no comments surrounding (or included in) the section, we will want to + * extend begin/end to remove the entire section. + * + * Note: the parameter `seen_ptr` points to the index into the store.seen + * array. * This index may be incremented if a section has more than one + * entry (which all are to be removed). + */ +static void maybe_remove_section(struct config_set_store *store, +const char *contents, +size_t *begin_offset, size_t *end_offset, +int *seen_ptr) +{ + size_t begin; + int i, seen, section_seen = 0; + + /* +* First, ensure that this is the first key, and that there are no +* comments before the entry nor before the section header. +*/ + seen = *seen_ptr; + for (i = store->seen[seen]; i > 0; i--) { + enum config_event_t type = store->parsed[i - 1].type; + + if (type == CONFIG_EVENT_COMMENT) + /* There is a comment before this entry or section */ + return; + if (type == CONFIG_EVENT_ENTRY) { + if (!section_seen) + /* This is not the section's first entry. */ +
[PATCH v2 08/15] config: introduce an optional event stream while parsing
This extends our config parser so that it can optionally produce an event stream via callback function, where it reports e.g. when a comment was parsed, or a section header, etc. This parser will be used subsequently to handle the scenarios better where removing config entries would make sections empty, or where a new entry could be added to an already-existing, empty section. Signed-off-by: Johannes Schindelin --- config.c | 102 +++ config.h | 25 2 files changed, 115 insertions(+), 12 deletions(-) diff --git a/config.c b/config.c index f10f8c6f52f..4cd745f6628 100644 --- a/config.c +++ b/config.c @@ -653,7 +653,46 @@ static int get_base_var(struct strbuf *name) } } -static int git_parse_source(config_fn_t fn, void *data) +struct parse_event_data { + enum config_event_t previous_type; + size_t previous_offset; + const struct config_options *opts; +}; + +static inline int do_event(enum config_event_t type, + struct parse_event_data *data) +{ + size_t offset; + + if (!data->opts || !data->opts->event_fn) + return 0; + + if (type == CONFIG_EVENT_WHITESPACE && + data->previous_type == type) + return 0; + + offset = cf->do_ftell(cf); + /* +* At EOF, the parser always "inserts" an extra '\n', therefore +* the end offset of the event is the current file position, otherwise +* we will already have advanced to the next event. +*/ + if (type != CONFIG_EVENT_EOF) + offset--; + + if (data->previous_type != CONFIG_EVENT_EOF && + data->opts->event_fn(data->previous_type, data->previous_offset, +offset, data->opts->event_fn_data) < 0) + return -1; + + data->previous_type = type; + data->previous_offset = offset; + + return 0; +} + +static int git_parse_source(config_fn_t fn, void *data, + const struct config_options *opts) { int comment = 0; int baselen = 0; @@ -664,8 +703,15 @@ static int git_parse_source(config_fn_t fn, void *data) /* U+FEFF Byte Order Mark in UTF8 */ const char *bomptr = utf8_bom; + /* For the parser event callback */ + struct parse_event_data event_data = { + CONFIG_EVENT_EOF, 0, opts + }; + for (;;) { - int c = get_next_char(); + int c; + + c = get_next_char(); if (bomptr && *bomptr) { /* We are at the file beginning; skip UTF8-encoded BOM * if present. Sane editors won't put this in on their @@ -682,18 +728,33 @@ static int git_parse_source(config_fn_t fn, void *data) } } if (c == '\n') { - if (cf->eof) + if (cf->eof) { + if (do_event(CONFIG_EVENT_EOF, &event_data) < 0) + return -1; return 0; + } + if (do_event(CONFIG_EVENT_WHITESPACE, &event_data) < 0) + return -1; comment = 0; continue; } - if (comment || isspace(c)) + if (comment) continue; + if (isspace(c)) { + if (do_event(CONFIG_EVENT_WHITESPACE, &event_data) < 0) + return -1; + continue; + } if (c == '#' || c == ';') { + if (do_event(CONFIG_EVENT_COMMENT, &event_data) < 0) + return -1; comment = 1; continue; } if (c == '[') { + if (do_event(CONFIG_EVENT_SECTION, &event_data) < 0) + return -1; + /* Reset prior to determining a new stem */ strbuf_reset(var); if (get_base_var(var) < 0 || var->len < 1) @@ -704,6 +765,10 @@ static int git_parse_source(config_fn_t fn, void *data) } if (!isalpha(c)) break; + + if (do_event(CONFIG_EVENT_ENTRY, &event_data) < 0) + return -1; + /* * Truncate the var name back to the section header * stem prior to grabbing the suffix part of the name @@ -715,6 +780,9 @@ static int git_parse_source(config_fn_t fn, void *data) break; } + if (do_event(CONFIG_EVENT_ERROR, &event_data) < 0) + return -1; + switch (c
[PATCH v2 07/15] t1300: `--unset-all` can leave an empty section behind (bug)
We already have a test demonstrating that removing the last entry from a config section fails to remove the section header of the now-empty section. The same can happen, of course, if we remove the last entries in one fell swoop. This is *also* a bug, and should be fixed at the same time. Signed-off-by: Johannes Schindelin --- t/t1300-config.sh | 11 +++ 1 file changed, 11 insertions(+) diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 187fc5b195f..10b9bf4b088 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -1452,6 +1452,17 @@ test_expect_failure '--unset last key removes section (except if commented)' ' test_cmp expect .git/config ' +test_expect_failure '--unset-all removes section if empty & uncommented' ' + cat >.git/config <<-\EOF && + [section] + key = value1 + key = value2 + EOF + + git config --unset-all section.key && + test_line_count = 0 .git/config +' + test_expect_failure 'adding a key into an empty section reuses header' ' cat >.git/config <<-\EOF && [section] -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 09/15] config: avoid using the global variable `store`
It is much easier to reason about, when the config code to set/unset variables or to remove/rename sections does not rely on a global (or file-local) variable. Signed-off-by: Johannes Schindelin --- config.c | 119 +++ 1 file changed, 66 insertions(+), 53 deletions(-) diff --git a/config.c b/config.c index 4cd745f6628..90ae71cb905 100644 --- a/config.c +++ b/config.c @@ -2297,7 +2297,7 @@ void git_die_config(const char *key, const char *err, ...) * Find all the stuff for git_config_set() below. */ -static struct { +struct config_set_store { int baselen; char *key; int do_not_match; @@ -2307,56 +2307,58 @@ static struct { unsigned int offset_alloc; enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; unsigned int seen; -} store; +}; -static int matches(const char *key, const char *value) +static int matches(const char *key, const char *value, + const struct config_set_store *store) { - if (strcmp(key, store.key)) + if (strcmp(key, store->key)) return 0; /* not ours */ - if (!store.value_regex) + if (!store->value_regex) return 1; /* always matches */ - if (store.value_regex == CONFIG_REGEX_NONE) + if (store->value_regex == CONFIG_REGEX_NONE) return 0; /* never matches */ - return store.do_not_match ^ - (value && !regexec(store.value_regex, value, 0, NULL, 0)); + return store->do_not_match ^ + (value && !regexec(store->value_regex, value, 0, NULL, 0)); } static int store_aux(const char *key, const char *value, void *cb) { const char *ep; size_t section_len; + struct config_set_store *store = cb; - switch (store.state) { + switch (store->state) { case KEY_SEEN: - if (matches(key, value)) { - if (store.seen == 1 && store.multi_replace == 0) { + if (matches(key, value, store)) { + if (store->seen == 1 && store->multi_replace == 0) { warning(_("%s has multiple values"), key); } - ALLOC_GROW(store.offset, store.seen + 1, - store.offset_alloc); + ALLOC_GROW(store->offset, store->seen + 1, + store->offset_alloc); - store.offset[store.seen] = cf->do_ftell(cf); - store.seen++; + store->offset[store->seen] = cf->do_ftell(cf); + store->seen++; } break; case SECTION_SEEN: /* -* What we are looking for is in store.key (both +* What we are looking for is in store->key (both * section and var), and its section part is baselen * long. We found key (again, both section and var). * We would want to know if this key is in the same * section as what we are looking for. We already * know we are in the same section as what should -* hold store.key. +* hold store->key. */ ep = strrchr(key, '.'); section_len = ep - key; - if ((section_len != store.baselen) || - memcmp(key, store.key, section_len+1)) { - store.state = SECTION_END_SEEN; + if ((section_len != store->baselen) || + memcmp(key, store->key, section_len+1)) { + store->state = SECTION_END_SEEN; break; } @@ -2364,26 +2366,27 @@ static int store_aux(const char *key, const char *value, void *cb) * Do not increment matches: this is no match, but we * just made sure we are in the desired section. */ - ALLOC_GROW(store.offset, store.seen + 1, - store.offset_alloc); - store.offset[store.seen] = cf->do_ftell(cf); + ALLOC_GROW(store->offset, store->seen + 1, + store->offset_alloc); + store->offset[store->seen] = cf->do_ftell(cf); /* fallthru */ case SECTION_END_SEEN: case START: - if (matches(key, value)) { - ALLOC_GROW(store.offset, store.seen + 1, - store.offset_alloc); - store.offset[store.seen] = cf->do_ftell(cf); - store.state = KEY_SEEN; - store.seen++; + if (matches(key, value, store)) { + ALLOC_GROW(store->offset, store->seen + 1, + store->offset_
[PATCH v2 11/15] git_config_set: do not use a state machine
While a neat theoretical construct, state machines are hard to read. In this instance, it does not even make a whole lot of sense because we are more interested in flags, anyway: has the section been seen? Has the key been seen? Does the current section match the key we are looking for? Besides, the state `SECTION_SEEN` was named in a misleading way: it did not indicate that we saw the section matching the key we are looking for, but it instead indicated that we are *currently* in that section. Let's just replace the state machine logic by clear and obvious flags. This will also make it easier to review the upcoming patches to use the newly-introduced `event_fn` callback of the config parser. Signed-off-by: Johannes Schindelin --- config.c | 59 +-- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/config.c b/config.c index b73b48b5650..84e8f7ffeb8 100644 --- a/config.c +++ b/config.c @@ -2305,7 +2305,7 @@ struct config_set_store { int multi_replace; size_t *seen; unsigned int seen_nr, seen_alloc; - enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; + unsigned int key_seen:1, section_seen:1, is_keys_section:1; }; static int matches(const char *key, const char *value, @@ -2328,8 +2328,7 @@ static int store_aux(const char *key, const char *value, void *cb) size_t section_len; struct config_set_store *store = cb; - switch (store->state) { - case KEY_SEEN: + if (store->key_seen) { if (matches(key, value, store)) { if (store->seen_nr == 1 && store->multi_replace == 0) { warning(_("%s has multiple values"), key); @@ -2341,8 +2340,8 @@ static int store_aux(const char *key, const char *value, void *cb) store->seen[store->seen_nr] = cf->do_ftell(cf); store->seen_nr++; } - break; - case SECTION_SEEN: + return 0; + } else if (store->is_keys_section) { /* * What we are looking for is in store->key (both * section and var), and its section part is baselen @@ -2357,10 +2356,9 @@ static int store_aux(const char *key, const char *value, void *cb) if ((section_len != store->baselen) || memcmp(key, store->key, section_len+1)) { - store->state = SECTION_END_SEEN; - break; + store->is_keys_section = 0; + return 0; } - /* * Do not increment matches: this is no match, but we * just made sure we are in the desired section. @@ -2368,27 +2366,29 @@ static int store_aux(const char *key, const char *value, void *cb) ALLOC_GROW(store->seen, store->seen_nr + 1, store->seen_alloc); store->seen[store->seen_nr] = cf->do_ftell(cf); - /* fallthru */ - case SECTION_END_SEEN: - case START: - if (matches(key, value, store)) { - ALLOC_GROW(store->seen, store->seen_nr + 1, - store->seen_alloc); - store->seen[store->seen_nr] = cf->do_ftell(cf); - store->state = KEY_SEEN; - store->seen_nr++; - } else { - if (strrchr(key, '.') - key == store->baselen && - !strncmp(key, store->key, store->baselen)) { - store->state = SECTION_SEEN; - ALLOC_GROW(store->seen, - store->seen_nr + 1, - store->seen_alloc); - store->seen[store->seen_nr] = - cf->do_ftell(cf); - } + } + + if (matches(key, value, store)) { + ALLOC_GROW(store->seen, store->seen_nr + 1, + store->seen_alloc); + store->seen[store->seen_nr] = cf->do_ftell(cf); + store->seen_nr++; + store->key_seen = 1; + store->section_seen = 1; + store->is_keys_section = 1; + } else { + if (strrchr(key, '.') - key == store->baselen && + !strncmp(key, store->key, store->baselen)) { + store->section_seen = 1; + store->is_keys_section = 1; + ALLOC_GROW(store->seen, + store->seen_nr + 1, + store->seen_alloc); + store->seen[store->se
[PATCH v2 05/15] t1300: avoid relying on a bug
The test case 'unset with cont. lines' relied on a bug that is about to be fixed: it tests *explicitly* that removing the last entry from a config section leaves an *empty* section behind. Let's fix this test case not to rely on that behavior, simply by preventing the section from becoming empty. Signed-off-by: Johannes Schindelin --- t/t1300-config.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/t1300-config.sh b/t/t1300-config.sh index aed12be492f..7c0ee208dea 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -108,6 +108,7 @@ bar = foo [beta] baz = multiple \ lines +foo = bar EOF test_expect_success 'unset with cont. lines' ' @@ -118,6 +119,7 @@ cat > expect <<\EOF [alpha] bar = foo [beta] +foo = bar EOF test_expect_success 'unset with cont. lines is correct' 'test_cmp expect .git/config' -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 04/15] config --replace-all: avoid extra line breaks
When replacing multiple config entries at once, we did not re-set the flag that indicates whether we need to insert a new-line before the new entry. As a consequence, an extra new-line was inserted under certain circumstances. Signed-off-by: Johannes Schindelin --- config.c | 1 + t/t1300-config.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/config.c b/config.c index 5cc049aaef0..f10f8c6f52f 100644 --- a/config.c +++ b/config.c @@ -2625,6 +2625,7 @@ int git_config_set_multivar_in_file_gently(const char *config_filename, store.seen = 1; for (i = 0, copy_begin = 0; i < store.seen; i++) { + new_line = 0; if (store.offset[i] == 0) { store.offset[i] = copy_end = contents_sz; } else if (store.state != KEY_SEEN) { diff --git a/t/t1300-config.sh b/t/t1300-config.sh index cc417687e8d..aed12be492f 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -1611,7 +1611,7 @@ test_expect_success '--local requires a repo' ' test_expect_code 128 nongit git config --local foo.bar ' -test_expect_failure '--replace-all does not invent newlines' ' +test_expect_success '--replace-all does not invent newlines' ' q_to_tab >.git/config <<-\EOF && [abc]key QkeepSection -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 03/15] t1300: demonstrate that --replace-all can "invent" newlines
Signed-off-by: Johannes Schindelin --- t/t1300-config.sh | 21 + 1 file changed, 21 insertions(+) diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 4f8e6f5fde3..cc417687e8d 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -1611,4 +1611,25 @@ test_expect_success '--local requires a repo' ' test_expect_code 128 nongit git config --local foo.bar ' +test_expect_failure '--replace-all does not invent newlines' ' + q_to_tab >.git/config <<-\EOF && + [abc]key + QkeepSection + [xyz] + Qkey = 1 + [abc] + Qkey = a + EOF + q_to_tab >expect <<-\EOF && + [abc] + QkeepSection + [xyz] + Qkey = 1 + [abc] + Qkey = b + EOF + git config --replace-all abc.key b && + test_cmp .git/config expect +' + test_done -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 02/15] t1300: rename it to reflect that `repo-config` was deprecated
Signed-off-by: Johannes Schindelin --- t/{t1300-repo-config.sh => t1300-config.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename t/{t1300-repo-config.sh => t1300-config.sh} (100%) diff --git a/t/t1300-repo-config.sh b/t/t1300-config.sh similarity index 100% rename from t/t1300-repo-config.sh rename to t/t1300-config.sh -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 06/15] t1300: remove unreasonable expectation from TODO
In https://public-inbox.org/git/7vvc8alzat@alter.siamese.dyndns.org/ a reasonable patch was made quite a bit less so by changing a test case demonstrating a bug to a test case that demonstrates that we ask for too much: the test case 'unsetting the last key in a section removes header' now expects a future bug fix to be able to determine whether a free-form comment above a section header refers to said section or not. Rather than shooting for the stars (and not even getting off the ground), let's start shooting for something obtainable and be reasonably confident that we *can* get it. Signed-off-by: Johannes Schindelin --- t/t1300-config.sh | 21 - 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/t/t1300-config.sh b/t/t1300-config.sh index 7c0ee208dea..187fc5b195f 100755 --- a/t/t1300-config.sh +++ b/t/t1300-config.sh @@ -1413,7 +1413,7 @@ test_expect_success 'urlmatch with wildcard' ' ' # good section hygiene -test_expect_failure 'unsetting the last key in a section removes header' ' +test_expect_failure '--unset last key removes section (except if commented)' ' cat >.git/config <<-\EOF && # some generic comment on the configuration file itself # a comment specific to this "section" section. @@ -1427,6 +1427,25 @@ test_expect_failure 'unsetting the last key in a section removes header' ' cat >expect <<-\EOF && # some generic comment on the configuration file itself + # a comment specific to this "section" section. + [section] + # some intervening lines + # that should also be dropped + + # please be careful when you update the above variable + EOF + + git config --unset section.key && + test_cmp expect .git/config && + + cat >.git/config <<-\EOF && + [section] + key = value + [next-section] + EOF + + cat >expect <<-\EOF && + [next-section] EOF git config --unset section.key && -- 2.16.2.windows.1.26.g2cc3565eb4b
[PATCH v2 00/15] Assorted fixes for `git config` (including the "empty sections" bug)
This patch series originally only tried to help fixing that annoying bug that has been reported several times over the years, where `git config --unset` would leave empty sections behind, and `git config --add` would not reuse them. The first patch is somewhat of a "while at it" bug fix that I first thought would be a lot more critical than it actually is: It really only affects config files that start with a section followed immediately (i.e. without a newline) by a one-letter boolean setting (i.e. without a `= ` part). So while it is a real bug fix, I doubt anybody ever got bitten by it. The next swath of patches add and fix some tests, while also fixing the bug where --replace-all would sometimes insert extra line breaks. These fixes are pretty straight-forward, and I always try to keep my added tests as concise as possible, so please tell me if you find a way to make them smaller (without giving up readability and debuggability). Then, I introduce a couple of building blocks: a "config parser event stream", i.e. an optional callback that can be used to report events such as "comment", "white-space", etc together with the corresponding extents in the config file. Finally, the interesting part, where I do two things, essentially (with preparatory steps for each thing): 1. I add the ability for `git config --unset/--unset-all` to detect that it can remove a section that has just become empty (see below for some more discussion of what I consider "become empty"), and 2. I add the ability for `git config [--add] key value` to re-use empty sections. I am very, very grateful for the time Peff spent on reviewing the previous iteration, and hope that he realizes just how much the elegance of the event-stream-based version is due to his excellent review. To reiterate why does this patch series not conflict with my very early statements that we cannot simply remove empty sections because we may end up with stale comments? Well, the patch in question takes pains to determine *iff* there are any comments surrounding, or included in, the section. If any are found: previous behavior. Under the assumption that the user edited the file, we keep it as intact as possible (see below for some argument against this). If no comments are found, and let's face it, this is probably *the* common case, as few people edit their config files by hand these days (neither should they because it is too easy to end up with an unparseable one), the now-empty section *is* removed. So what is the argument against this extra care to detect comments? Well, if you have something like this: [section] ; Here we comment about the variable called snarf snarf = froop and we run `git config --unset section.snarf`, we end up with this config: [section] ; Here we comment about the variable called snarf which obviously does not make sense. However, that is already established behavior for quite a few years, and I do not even try to think of a way how this could be solved. Changes since v1: - a new feature was introduced where the config parser can be asked to report all "events" (like, section header or comment) via a callback function. - the patches to reuse empty sections, and to remove sections that just became empty (without any surrounding comments) were rewritten to make use of that config parser event stream (incidentally fixing a couple of problems with the backtracking version which were pointed out by Peff). - to make those changes easier to review, they have been split up into several tiny logical steps: the file-local `store` was replaced with callback data, some fields were renamed for consistency, the state machine when parsing the config was replaced by easier-to-understand flags, etc. - while pouring over the code, I managed to find another obscure bug: under certain circumstances, --replace-all could produce extra new-lines. This is now fixed as part of the preparatory patches. Johannes Schindelin (15): git_config_set: fix off-by-two t1300: rename it to reflect that `repo-config` was deprecated t1300: demonstrate that --replace-all can "invent" newlines config --replace-all: avoid extra line breaks t1300: avoid relying on a bug t1300: remove unreasonable expectation from TODO t1300: `--unset-all` can leave an empty section behind (bug) config: introduce an optional event stream while parsing config: avoid using the global variable `store` config_set_store: rename some fields for consistency git_config_set: do not use a state machine git_config_set: make use of the config parser's event stream git config --unset: remove empty sections (in the common case) git_config_set: reuse empty sections TODOs config.c| 449 config.h| 25 ++ t/{t1300-repo-config.sh => t1300-config.sh} | 57 +++- 3 files
[PATCH v2 01/15] git_config_set: fix off-by-two
Currently, we are slightly overzealous When removing an entry from a config file of this form: [abc]a [xyz] key = value When calling `git config --unset abc.a` on this file, it leaves this (invalid) config behind: [ [xyz] key = value The reason is that we try to search for the beginning of the line (or for the end of the preceding section header on the same line) that defines abc.a, but as an optimization, we subtract 2 from the offset pointing just after the definition before we call find_beginning_of_line(). That function, however, *also* performs that optimization and promptly fails to find the section header correctly. Signed-off-by: Johannes Schindelin --- config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.c b/config.c index b0c20e6cb8a..5cc049aaef0 100644 --- a/config.c +++ b/config.c @@ -2632,7 +2632,7 @@ int git_config_set_multivar_in_file_gently(const char *config_filename, } else copy_end = find_beginning_of_line( contents, contents_sz, - store.offset[i]-2, &new_line); + store.offset[i], &new_line); if (copy_end > 0 && contents[copy_end-1] != '\n') new_line = 1; -- 2.16.2.windows.1.26.g2cc3565eb4b
Re: A potential approach to making tests faster on Windows
Hi Peff, On Tue, 3 Apr 2018, Jeff King wrote: > On Tue, Apr 03, 2018 at 01:43:10PM +0200, Johannes Schindelin wrote: > > > > I don't have time or interest to work on this now, but thought it was > > > interesting to share. This assumes that something in shellscript like: > > > > > > while echo foo; do echo bar; done > > > > > > Is no slower on Windows than *nix, since it's purely using built-ins, as > > > opposed to something that would shell out. > > > > It is still interpreting stuff. And it still goes through the POSIX > > emulation layer. > > > > I did see reports on the Git for Windows bug tracker that gave me the > > impression that such loops in Unix shell scripts may not, in fact, be as > > performant in MSYS2's Bash as you would like to believe: > > > > https://github.com/git-for-windows/git/issues/1533#issuecomment-372025449 > > The main problem with `read` loops in shell is that the shell makes one > read() syscall per character. It has to, because doing otherwise is > user-visible in cases where the descriptor may get passed to a different > process. Thank you for the explanation. Makes tons of sense now. > There's unfortunately no portable way to say "please just read this > quickly, I promise nobody else is going to read the descriptor". And nor > do I know of any shell which is smart enough to know that it's going to > consume to EOF anyway (as you would for something like "cmd | while > read"). > > If you know you have bash, you can use "-N" to get a more efficient > read: > > $ echo foo | strace -e read bash -c 'read foo' > [...] > read(0, "f", 1) = 1 > read(0, "o", 1) = 1 > read(0, "o", 1) = 1 > read(0, "\n", 1)= 1 > > $ echo foo | strace -e read bash -c 'read -N 10 foo' > [...] > read(0, "foo\n", 10)= 4 > read(0, "", 6) = 0 > > but then you have another problem: how to split the resulting buffer > into lines in shell. ;) True. > But if we're at the point of creating custom C builtins for > busybox/dash/etc, you should be able to create a primitive for "read > this using buffered stdio, other processes be damned, and return one > line at a time". Well, you know, I do not think that papering over the root cause will make anything better. And the root cause is that we use a test framework written in Unix shell. I will have to set aside some time to dig into the bottlenecks there and figure out what parts I can safely convert into "test builtins", i.e. into the test-tool Duy introduced, to avoid having shell scripts do the heavy-lifting. Ciao, Dscho
Re: A potential approach to making tests faster on Windows
Hi Ævar, On Tue, 3 Apr 2018, Ævar Arnfjörð Bjarmason wrote: > [...] I think it would be really interesting to see the third > approach I suggested, i.e. hack the shell to make the test_cmp a builtin > and test that. Then you won't fork, but will get the advantage of your > fast C codepath. That should be relatively equivalent to running in BusyBox-w32's ash. BusyBox-w32 is a pure-Win32 version of BusyBox (i.e. it does not use any POSIX emulation layer, not Cygwin nor MSYS2). I did not notice any Earth-shaking performance improvement when running a test with BusyBox-w32's ash. It was a couple of percent, maybe even 20% faster, but nowhere near the orders of magnitude I had been expecting. > Also, even if test_cmp is much faster, Peff's results over at > https://public-inbox.org/git/20161020123111.qnbsainul2g54...@sigill.intra.peff.net/ > suggest that you may not notice anyway. Aside from the points raised > there about the bin wrappers it seems the easiest wins are having a > builtin version of "rm" and "cat". In BusyBox-w32, `rm` and `cat` *are* built-ins. > Are you able to compile dash on Windows with some modification of the > patch I sent upthread? In theory, yes. In practice, I lack the time (and I do not expect this to have any performance benefit over using BusyBox-w32 to run the test suite). Ciao, Dscho
Re: [PATCH 1/9] git_config_set: fix off-by-two
Hi Duy, On Tue, 3 Apr 2018, Duy Nguyen wrote: > On Tue, Apr 3, 2018 at 11:31 AM, Johannes Schindelin > wrote: > > It is very frustrating to spend that much time with only little gains > > here and there (and BusyBox-w32 is simply not robust enough yet, apart > > from also not showing a significant improvement in performance). > > You still use busybox-w32? Yes. > It's amazing that people still use it after the linux subsystem comes. I use WSL myself. But you need to realize that WSL is only available on Windows 10 (many users still use Windows 7), and it is a little tricky to get to work in Docker containers, I heard, so I did not even try. Also, many Windows users are unfamiliar with Linux, and forcing them to learn and install a Linux distribution on their machine when all they want is to use Git is a bit... much. > busybox has a lot of commands built in (i.e. no new processes) and > unless rmyorston did something more, the "fork" in ash shell should be > as cheap as it could be: it simply serializes data and sends to the new > process. Yes, I had the pleasure of reading that code. It might surprise you, but I had to come up with quite a bit of patches to make the test suite pass. And it does not really pass, as I randomly get hangs... > If performance does not improve, I guess the process creation cost > dominates. There's not much we could do except moving away from the > zillion processes test framework: either something C-based or another > scripting language (ok I don't want to bring this up again) There is no need to guess. I now have .pdb files, and once I have a good example of a shell script construct that is particularly slow, and once I find some time to work on it, I will dig into the bottlenecks. Ciao, Dscho
Re: [PATCH 1/9] git_config_set: fix off-by-two
On Tue, Apr 3, 2018 at 11:31 AM, Johannes Schindelin wrote: > It is very frustrating to spend that much time with only little gains here > and there (and BusyBox-w32 is simply not robust enough yet, apart from > also not showing a significant improvement in performance). You still use busybox-w32? It's amazing that people still use it after the linux subsystem comes. busybox has a lot of commands built in (i.e. no new processes) and unless rmyorston did something more, the "fork" in ash shell should be as cheap as it could be: it simply serializes data and sends to the new process. If performance does not improve, I guess the process creation cost dominates. There's not much we could do except moving away from the zillion processes test framework: either something C-based or another scripting language (ok I don't want to bring this up again) -- Duy
Re: [PATCH] t2028: tighten grep expression to make "move worktree" test more robust
On Tue, Apr 3, 2018 at 11:25 AM, Eric Sunshine wrote: > Following a rename of worktree "source" to "destination", the "move > worktree" test uses grep to verify that the output of "git worktree list > --porcelain" does not contain "source" (and does contain "destination"). > Unfortunately, the grep expression is too loose and can match > unexpectedly. For example, if component of the test trash directory path > matches "source" (e.g. "/home/me/sources/git/t/trash*"), then the test > will be fooled into thinking that "source" still exists. Tighten the > expression to avoid such accidental matches. > > While at it, drop an unused variable ("toplevel") from the test and > tighten a similarly too-loose expression in a related test. > > Reported-by: Jens Krüger > Signed-off-by: Eric Sunshine > --- > > t2028 in 2.17.0 can be fooled into failing depending upon the path of > the test's trash directory. The problem is with the test being too > loose, not with Git itself. Problem report and diagnosis here[1]. > > [1]: > https://public-inbox.org/git/26a00c2b-c588-68d5-7085-22310c20e...@frm2.tum.de/T/#m994cdb29f141656b0ab48dd0d152432c7e86fc20 Thanks both. It was great to scroll to the latest mails and saw that I didn't have to do anything else :) -- Duy
Re: [PATCH 4/3] Makefile: untangle DEVELOPER and -Werror
On Tue, Apr 03, 2018 at 11:19:46AM +0200, Ævar Arnfjörð Bjarmason wrote: > > On Sat, Mar 31 2018, Duy Nguyen wrote: > > > On Sat, Mar 31, 2018 at 6:40 PM, Ævar Arnfjörð Bjarmason > > wrote: > >> Change the DEVELOPER flag, and the newly added EAGER_DEVELOPER flag > >> which (approximately) enables -Wextra so that any combination of them > >> and -Werror can be set. > >> > >> I've long wanted to use DEVELOPER=1 in my production builds, but on > >> some old systems I still get warnings, and thus the build would > >> fail. However if the build/tests fail for some other reason, it would > >> still be useful to scroll up and see what the relevant code is warning > >> about. > >> > >> This change allows for that. Now setting DEVELOPER will set -Werror as > >> before, but if DEVELOPER_NONFATAL is set you'll get the same warnings, > >> but without -Werror. > >> > >> I've renamed the newly added EAGER_DEVELOPER flag to > >> DEVELOPER_EXTRA. The reason is that it approximately turns on -Wextra, > >> and it'll be more consistent to add e.g. DEVELOPER_PEDANTIC later than > >> inventing some new name of our own (VERY_EAGER_DEVELOPER?). > > > > Before we go with zillions of *DEVELOPER* maybe we can have something > > like DEVOPTS where you can give multiple keywords to a single variable > > to influence config.mak.dev. This is similar to COMPILER_FEATURES we > > already have in there, but now it's driven by the dev instead of the > > compiler. So you can have keywords like "gentle" (no -Werror) "extra" > > (-Wextra with no suppression) and something else. > > We could do that, but I don't think it's that bad. This patch is one > extra option on top of yours, And that eager this was marked experimental because I was not sure if it was the right way :) > and it's not going to result in some combinatorial explosion of > options, i.e. if we add DEVELOPER_PEDANTIC we'll just add one extra > flag. > > But sure, we could make this some string we'd need to parse out similar > to COMPILER_FEATURES, it just seems more complex to me for this task. It's not that complex. With the EAGER_DEVELOPER patch removed, we can have something like this where eager devs just need to put DEVOPTS = gentle no-suppression and you put DEVOPTS = gentle (bad naming, I didn't spend time thinking about names) -- 8< -- diff --git a/Makefile b/Makefile index e6680a8977..7b4e038e1d 100644 --- a/Makefile +++ b/Makefile @@ -435,6 +435,11 @@ all:: # Define DEVELOPER to enable more compiler warnings. Compiler version # and faimily are auto detected, but could be overridden by defining # COMPILER_FEATURES (see config.mak.dev) +# +# When DEVELOPER is set, DEVOPTS can be used to control compiler options. +# This variable contains keywords separated by whitespace. Two keywords +# are recognized: "gentle" removes -Werror and "no-suppression" +# removes all "-Wno-" options. GIT-VERSION-FILE: FORCE @$(SHELL_PATH) ./GIT-VERSION-GEN diff --git a/config.mak.dev b/config.mak.dev index 716a14ecc7..1d7aba6a6a 100644 --- a/config.mak.dev +++ b/config.mak.dev @@ -1,4 +1,6 @@ +ifeq ($(filter gentle,$(DEVOPTS)),) CFLAGS += -Werror +endif CFLAGS += -Wdeclaration-after-statement CFLAGS += -Wno-format-zero-length CFLAGS += -Wold-style-definition @@ -21,6 +23,7 @@ CFLAGS += -Wextra # if a function is public, there should be a prototype and the right # header file should be included. If not, it should be static. CFLAGS += -Wmissing-prototypes +ifeq ($(filter no-suppression,$(DEVOPTS)),) # These are disabled because we have these all over the place. CFLAGS += -Wno-empty-body CFLAGS += -Wno-missing-field-initializers @@ -28,6 +31,7 @@ CFLAGS += -Wno-sign-compare CFLAGS += -Wno-unused-function CFLAGS += -Wno-unused-parameter endif +endif # uninitialized warnings on gcc 4.9.2 in xdiff/xdiffi.c and config.c # not worth fixing since newer compilers correctly stop complaining -- 8< --
[PATCH v3 2/2] t3200: verify "branch --list" sanity when rebasing from detached HEAD
From: Eric Sunshine "git branch --list" shows an in-progress rebase as: * (no branch, rebasing ) master ... However, if the rebase is started from a detached HEAD, then there is no , and it would attempt to print a NULL pointer. The previous commit fixed this problem, so add a test to verify that the output is sane in this situation. Signed-off-by: Eric Sunshine Signed-off-by: Kaartic Sivaraam --- t/t3200-branch.sh | 24 1 file changed, 24 insertions(+) diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh index 503a88d02..89fff3fa9 100755 --- a/t/t3200-branch.sh +++ b/t/t3200-branch.sh @@ -6,6 +6,7 @@ test_description='git branch assorted tests' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-rebase.sh test_expect_success 'prepare a trivial repository' ' echo Hello >A && @@ -1246,6 +1247,29 @@ test_expect_success '--merged is incompatible with --no-merged' ' test_must_fail git branch --merged HEAD --no-merged HEAD ' +test_expect_success '--list during rebase' ' + test_when_finished "reset_rebase" && + git checkout master && + FAKE_LINES="1 edit 2" && + export FAKE_LINES && + set_fake_editor && + git rebase -i HEAD~2 && + git branch --list >actual && + test_i18ngrep "rebasing master" actual +' + +test_expect_success '--list during rebase from detached HEAD' ' + test_when_finished "reset_rebase && git checkout master" && + git checkout master^0 && + oid=$(git rev-parse --short HEAD) && + FAKE_LINES="1 edit 2" && + export FAKE_LINES && + set_fake_editor && + git rebase -i HEAD~2 && + git branch --list >actual && + test_i18ngrep "rebasing detached HEAD $oid" actual +' + test_expect_success 'tracking with unexpected .fetch refspec' ' rm -rf a b c d && git init a && -- 2.17.0.484.g0c8726318
Re: Bad refspec messes up bundle.
Hi Luciano, On Sat, 31 Mar 2018, Luciano Joublanc wrote: > I've cloned the `maint` branch, built the project, and added the test > as you suggested - it's failing as expected. Excellent. > On 30 March 2018 at 11:20, Johannes Schindelin > wrote: > > > > However, this would be incorrect, as the flags are stored with the > > *commit*, not with the ref. So if two refs point to the same commit, > > that new code would skip the second one by mistake! > > Isn't that the point here? to deduplicate commits? My limited > understanding is that at a 'ref' is like an alias or pointer to a > commit. The point is to deduplicate refs, not commits ;-) Imagine that you have a git.git clone, and then you work on some topic, say, `bundle-refs` and then call `git checkout -b bundle-refs-wip` from there. If you now say git bundle create wip.bundle bundle-refs bundle-refs-wip you will want both bundle-refs and bundle-refs-wip to show up in that bundle, not just bundle-refs, even if both refs point at the same commit. > > By the way, this makes me think that there is another very real bug > > in the bundle code, in the part I showed above. Suppose you have a > > `master` and a `next` ref, and both point at the same commit, then > > you would want `git bundle create next.bundle master..next` to list > > `next`, don't you think? > > Doesn't this contradict what you just said, that we don't want to skip > refs with the same commit #? I would rather be able to generate such a wip.bundle as outlined above where calling `git ls-remote wip.bundle` would list *both* refs, with the same commit. > In fact, if you look in the calling function, there is a > `object_array_remove_duplicates(&revs.pending);` > Which to the best of my understanding removes duplicate refs (not > commits). However, I suspect this doesn't cover the `--all` case as > it's a switch rather than a revspec? Would that be right? Oh, I missed that! And I also missed that this is implemented with something *else* than a hashmap, so it won't have linear complexity but instead quadratic. Gross. But you got an interesting nugget there, as it indeed tries to deduplicate, but not by object ID, otherwise the bug you reported would not occur (but other bugs, as I outlined above). Instead, the object_array_remove_duplicates() code does this: -- snip -- void object_array_remove_duplicates(struct object_array *array) { unsigned nr = array->nr, src; struct object_array_entry *objects = array->objects; array->nr = 0; for (src = 0; src < nr; src++) { if (!contains_name(array, objects[src].name)) { if (src != array->nr) objects[array->nr] = objects[src]; array->nr++; } else { object_array_release_entry(&objects[src]); } } } -- snap -- And indeed, the `contains_name()` function iterates through all of the re-added entries and compares the *name*. Running this in a debugger shows the culprit, too: there is a `refs/heads/master`, a `HEAD` and a `master`. Note how the last entry (which was taken directly from the command-line arguments) lacks the `refs/heads/` prefix? *That* is the culprit... > > - most likely, the best way to avoid duplicate refs entries is to use the > > actual ref name and put it into a hash set. Luckily, we do have code > > for this, and examples how to use it, too. See e.g. fc65b00da7e > > (merge-recursive: change current file dir string_lists to hashmap, > > 2017-09-07). So you would define something like > > Separately, if I do end up including the hashmap code, it should be > refactored out into it's own file, right? I do not think that is necessary. Personally, I'd just add the hashmap as local variable to `write_bundle_refs()`, initialize it before the loop, add the struct for the hashmap entry and the _cmp function as file-local (i.e. `static`) function before `write_bundle_refs()`, then add all shown refs (as stored in the `display_ref` variable) to the hashmap, and add another conditional `goto skip_write_ref` after all the others, contingent on `display_ref` *not* being found in the hashmap via `hashmap_get_from_hash(&displayed_refs, strhash(display_ref), display_ref)`. In the same run, I would remove that `object_array_remove_duplicates()` function altogether, as its only caller is now no longer necessary. Ciao, Johannes
[PATCH 2/2] t5561: skip tests if curl is not available
It's possible to have libcurl installed but not the curl command-line utility. The latter is not generally needed for Git's http support, but we use it in t5561 for basic tests of http-backend's functionality. Let's detect when it's missing and skip this test. Note that we can't mark the individual tests with the CURL prerequisite. They're in a shared t556x_common that uses the GET and POST functions as a level of indirection, and it's only our implementations of those functions in t5561 that requires curl. It's not a problem, though, as literally every test in the script would depend on the prerequisite anyway. Reported-by: Jens Krüger Signed-off-by: Jeff King --- t/t5561-http-backend.sh | 6 ++ t/test-lib.sh | 4 2 files changed, 10 insertions(+) diff --git a/t/t5561-http-backend.sh b/t/t5561-http-backend.sh index 6167563986..84a955770a 100755 --- a/t/t5561-http-backend.sh +++ b/t/t5561-http-backend.sh @@ -3,6 +3,12 @@ test_description='test git-http-backend' . ./test-lib.sh . "$TEST_DIRECTORY"/lib-httpd.sh + +if ! test_have_prereq CURL; then + skip_all='skipping raw http-backend tests, curl not available' + test_done +fi + start_httpd GET() { diff --git a/t/test-lib.sh b/t/test-lib.sh index 7740d511d2..1aa39fe889 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1208,3 +1208,7 @@ test_lazy_prereq LONG_IS_64BIT ' test_lazy_prereq TIME_IS_64BIT 'test-date is64bit' test_lazy_prereq TIME_T_IS_64BIT 'test-date time_t-is64bit' + +test_lazy_prereq CURL ' + curl --version +' -- 2.17.0.686.g08b0810b04
[PATCH 1/2] t5561: drop curl stderr redirects
For a normal test run, stderr is already redirected to /dev/null by the test suite. When used with "-v", suppressing stderr is actively harmful, as it may hide the reason for curl failing. Reported-by: Jens Krüger Signed-off-by: Jeff King --- t/t5561-http-backend.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/t5561-http-backend.sh b/t/t5561-http-backend.sh index 90e0d6f0fe..6167563986 100755 --- a/t/t5561-http-backend.sh +++ b/t/t5561-http-backend.sh @@ -6,7 +6,7 @@ test_description='test git-http-backend' start_httpd GET() { - curl --include "$HTTPD_URL/$SMART/repo.git/$1" >out 2>/dev/null && + curl --include "$HTTPD_URL/$SMART/repo.git/$1" >out && tr '\015' Q out 2>/dev/null && + "$HTTPD_URL/smart/repo.git/$1" >out && tr '\015' Q
[PATCH 0/2] t5561 fails without curl installed
On Tue, Apr 03, 2018 at 09:14:47AM -0400, Jeff King wrote: > As far as code changes in Git, perhaps (assuming my guess is right): > > - drop the redirect of stderr here; the test suite already handles > hiding stderr from the user (without "-v"), and in "-v" mode you > probably would have gotten a useful error like "curl: not found" > > - it's rare but possible to have libcurl installed (which is needed > for the server side, and what we key on for running the httpd tests) > but not the curl binary. This test probably should check for the > existence of the curl binary as a prerequisite. So let's do this before we forget. [1/2]: t5561: drop curl stderr redirects [2/2]: t5561: skip tests if curl is not available t/t5561-http-backend.sh | 10 -- t/test-lib.sh | 4 2 files changed, 12 insertions(+), 2 deletions(-) -Peff
Re: [PATCH v2 1/5] core.aheadbehind: add new config setting
On 4/3/2018 7:39 AM, Derrick Stolee wrote: On 4/3/2018 6:18 AM, Ævar Arnfjörð Bjarmason wrote: On Tue, Apr 03 2018, Lars Schneider wrote: What is the state of this series? I can't find it in git/git nor in git-for-windows/git. I think Stolee mentioned the config in his Git Merge talk [1] and I was about to test it/roll it out :-) It's in the gvfs branch of g...@github.com:Microsoft/git.git, i.e. it's not in Git for Windows, but used in Microsoft's own in-house version used for Windows.git. Thanks for adding me to CC. I mentioned it in my talk because that was one thing we shipped internally as a "quick fix" until we could do the right thing. If I remember correctly, Jeff abandoned shipping this upstream because it did have the feel of a hack and we wanted to see if users used the config setting or really cared about the output values. We saw fast adoption of the feature and even turned the config setting on automatically in the following version of GVFS. I may be misunderstanding this feature, but my impression was that it was a kludge as a workaround until the commit graph code landed, because once we have that then surely we can just cheaply report the actual (or approximate?) number in the common case, but of course it may still be slow if your commit graph file is out of date. Right, the only thing in master are the changes to take the new command line option and to alter the output of status. We did not reach consensus on the need for the config setting and/or whether it should be in "core." or "status." or another namespace and/or how it should work. And yes, it was also seen as a hack (just turn it off) until the client-side commit graph was ready (at least for interactive use). Because there are callers that don't need the answer (regardless of whether it is cheap to compute) and so the explicit command line arg limitation is sufficient for them. This part is in upstream master: commit 4094e47fd2c49fcdbd0152d20ed4d610d72680d7 Merge: c710d182ea f39a757dd9 Author: Junio C Hamano Date: Thu Mar 8 12:36:24 2018 -0800 Merge branch 'jh/status-no-ahead-behind' These parts are in the 'gvfs' branch in the g...@github.com:Microsoft/git.git repo: commit 039f65946968fa654a9c3bca27a4f4e93c1c9381 Author: Jeff Hostetler Date: Wed Jan 10 13:50:24 2018 -0500 status: add warning when a/b calculation takes too long for long/normal format commit 0d6756f06d0ad6f1fdc8dba0ead7911e411c9704 Author: Jeff Hostetler Date: Mon Feb 5 09:44:04 2018 -0500 status: ignore status.aheadbehind in porcelain formats Teach porcelain V[12] formats to ignore the status.aheadbehind config setting. They only respect the --[no-]ahead-behind command line argument. This is for backwards compatibility with existing scripts. commit 0dd122d6cd43106a5928587d768a7381cfe9e7a3 Author: Jeff Hostetler Date: Tue Jan 9 14:16:07 2018 -0500 status: add status.aheadbehind setting Add "status.aheadbehind" config setting to change the default behavior of ALL git status formats. Hope this helps, Jeff
Re: Test 5561 failed
Am 03.04.2018 um 15:14 schrieb Jeff King: On Tue, Apr 03, 2018 at 01:43:37PM +0200, Jens Krüger wrote: expecting success: GET refs/heads/master "404 Not Found" not ok 2 - direct refs/heads/master not found That GET function is: GET() { curl --include "$HTTPD_URL/$SMART/repo.git/$1" >out 2>/dev/null && tr '\015' Q act && echo "HTTP/1.1 $2" >exp && test_cmp exp act } The tarball you sent shows "out" as empty, and "act" is missing. So "curl" produced no output, and we did not make it as far as the tr/sed pipe. Just a guess, but are you missing the "curl" command-line tool on your system? If so, "apt install curl" should fix the failure. Yes, the missing curl program was the reason, but I didn't find any hint about the missed program in the log output. The output will be suppressed by the '2>/dev/null' in lines: 9 and 22 of the t5561-http-backend.sh script. Nevertheless many thanks for helping. As far as code changes in Git, perhaps (assuming my guess is right): - drop the redirect of stderr here; the test suite already handles hiding stderr from the user (without "-v"), and in "-v" mode you probably would have gotten a useful error like "curl: not found" - it's rare but possible to have libcurl installed (which is needed for the server side, and what we key on for running the httpd tests) but not the curl binary. This test probably should check for the existence of the curl binary as a prerequisite. -Peff
Re: A potential approach to making tests faster on Windows
On Tue, Apr 03, 2018 at 01:43:10PM +0200, Johannes Schindelin wrote: > > I don't have time or interest to work on this now, but thought it was > > interesting to share. This assumes that something in shellscript like: > > > > while echo foo; do echo bar; done > > > > Is no slower on Windows than *nix, since it's purely using built-ins, as > > opposed to something that would shell out. > > It is still interpreting stuff. And it still goes through the POSIX > emulation layer. > > I did see reports on the Git for Windows bug tracker that gave me the > impression that such loops in Unix shell scripts may not, in fact, be as > performant in MSYS2's Bash as you would like to believe: > > https://github.com/git-for-windows/git/issues/1533#issuecomment-372025449 The main problem with `read` loops in shell is that the shell makes one read() syscall per character. It has to, because doing otherwise is user-visible in cases where the descriptor may get passed to a different process. There's unfortunately no portable way to say "please just read this quickly, I promise nobody else is going to read the descriptor". And nor do I know of any shell which is smart enough to know that it's going to consume to EOF anyway (as you would for something like "cmd | while read"). If you know you have bash, you can use "-N" to get a more efficient read: $ echo foo | strace -e read bash -c 'read foo' [...] read(0, "f", 1) = 1 read(0, "o", 1) = 1 read(0, "o", 1) = 1 read(0, "\n", 1)= 1 $ echo foo | strace -e read bash -c 'read -N 10 foo' [...] read(0, "foo\n", 10)= 4 read(0, "", 6) = 0 but then you have another problem: how to split the resulting buffer into lines in shell. ;) But if we're at the point of creating custom C builtins for busybox/dash/etc, you should be able to create a primitive for "read this using buffered stdio, other processes be damned, and return one line at a time". -Peff
Re: Test 5561 failed
On Tue, Apr 03, 2018 at 01:43:37PM +0200, Jens Krüger wrote: > expecting success: > GET refs/heads/master "404 Not Found" > > not ok 2 - direct refs/heads/master not found That GET function is: GET() { curl --include "$HTTPD_URL/$SMART/repo.git/$1" >out 2>/dev/null && tr '\015' Q act && echo "HTTP/1.1 $2" >exp && test_cmp exp act } The tarball you sent shows "out" as empty, and "act" is missing. So "curl" produced no output, and we did not make it as far as the tr/sed pipe. Just a guess, but are you missing the "curl" command-line tool on your system? If so, "apt install curl" should fix the failure. As far as code changes in Git, perhaps (assuming my guess is right): - drop the redirect of stderr here; the test suite already handles hiding stderr from the user (without "-v"), and in "-v" mode you probably would have gotten a useful error like "curl: not found" - it's rare but possible to have libcurl installed (which is needed for the server side, and what we key on for running the httpd tests) but not the curl binary. This test probably should check for the existence of the curl binary as a prerequisite. -Peff
Re: [PATCH 0/3] Lazy-load trees when reading commit-graph
On 4/3/2018 9:06 AM, Jeff King wrote: On Tue, Apr 03, 2018 at 08:00:54AM -0400, Derrick Stolee wrote: There are several commit-graph walks that require loading many commits but never walk the trees reachable from those commits. However, the current logic in parse_commit() requires the root tree to be loaded. This only uses lookup_tree(), but when reading commits from the commit- graph file, the hashcpy() to load the root tree hash and the time spent checking the object cache take more time than parsing the rest of the commit. In this patch series, all direct references to accessing the 'tree' member of struct commit are replaced instead by one of the following methods: struct tree *get_commit_tree(struct commit *) struct object_id *get_commit_tree_oid(struct commit *) This seems like a pretty sane thing to do. There may still be a few parts of the code, notably fsck, that are reliant on a "struct object" having been instantiated to determine whether an object is "used". I tried to clean that up around the time of c2d17b3b6e (fsck: check HAS_OBJ more consistently, 2017-01-16), but I won't be surprised if there's still some hidden assumptions. I peeked at the fsck.c parts of patch 2, and it looks like we may be OK, since you use get_commit_tree() during the walk. This replacement was assisted by a Coccinelle script, but the 'tree' member is overloaded in other types, so the script gave false-positives that were removed from the diff. That catches any existing in-tree callers, but not any topics in flight. We could add the Coccinelle script and re-run it to catch any future ones. But perhaps simpler still: can we rename the "tree" member to "maybe_tree" or something, since nobody should be accessing it directly? That will give us a compile error if an older topic is merged. That's a good idea. I can reorg in v2 to rename 'tree' to 'maybe_tree' (and add an explicit comment that 'maybe_tree' could be NULL, so don't reference it directly). The check that the rename patch is correct is simply "does it compile?" Then Coccinelle could do the transfer of "c->maybe_tree" to "get_commit_tree(c)" safely, and can be added to the scripts. I guess one caveat is to not include the mutators (c->maybe_tree = ...), but that's probably something Coccinelle can do. On the Linux repository, performance tests were run for the following command: git log --graph --oneline -1000 Before: 0.83s After: 0.65s Rel %: -21.6% Neat. Not strictly related, but I happened across another thing today that might be worth investigating here. We allocate "struct commit" in these nice big allocation blocks. But then for every commit we allocate at least one "struct commit_list" to store the parent pointer. I was looking at this from a memory consumption angle (I found a pathological repository full of single-parent commits, and this wastes an extra 16 bytes plus malloc overhead for every 64-byte "struct commit"). But I wonder if it could also have non-negligible overhead in calling malloc() for your case, since you've optimized out so much of the rest of the work. That is a pattern I'm seeing: we strip out one bit and something else shows up as a hot spot. This game of whack-a-mole is quite productive. I'm not sure what the exact solution would be, but I imagine something like variable-sized "struct commit"s with the parent pointers embedded, with some kind of flag to indicate the number of parents (and probably some fallback to break out to a linked list for extreme cases of more than 2 parents). It may end up pretty invasive, though, as there's a lot of open-coded traversals of that parent list. Anyway, not anything to do with this patch, but food for thought as you micro-optimize these traversals. One other thing that I've been thinking about is that 'struct commit' is so much bigger than the other structs in 'union any_object'. This means that the object cache, which I think creates blocks of 'union any_object' for memory-alignment reasons, is overly bloated. This would be especially true when walking many more trees than commits. Perhaps there are other memory concerns in that case (such as cached buffers) that the 'union any_object' is not a concern, but it is worth thinking about as we brainstorm how to reduce the parent-list memory. Thanks, -Stolee
js/runtime-prefix-windows, was Re: What's cooking in git.git (Mar 2018, #06; Fri, 30)
Hi Junio, On Fri, 30 Mar 2018, Junio C Hamano wrote: > * js/runtime-prefix-windows (2018-03-27) 2 commits > - mingw/msvc: use the new-style RUNTIME_PREFIX helper > - exec_cmd: provide a new-style RUNTIME_PREFIX helper for Windows > (this branch uses dj/runtime-prefix.) > > The Windows port was the first that allowed Git to be installed > anywhere by having its components refer to each other with relative > pathnames. The recent dj/runtime-prefix topic extends the idea to > other platforms, and its approach has been adopted back in the > Windows port. > > Is this, together with the dj/runtime-prefix topic, ready for > 'next'? As far as I am concerned: yes! Ciao, Dscho
Re: [PATCH 0/3] Lazy-load trees when reading commit-graph
On Tue, Apr 03, 2018 at 08:00:54AM -0400, Derrick Stolee wrote: > There are several commit-graph walks that require loading many commits > but never walk the trees reachable from those commits. However, the > current logic in parse_commit() requires the root tree to be loaded. > This only uses lookup_tree(), but when reading commits from the commit- > graph file, the hashcpy() to load the root tree hash and the time spent > checking the object cache take more time than parsing the rest of the > commit. > > In this patch series, all direct references to accessing the 'tree' > member of struct commit are replaced instead by one of the following > methods: > > struct tree *get_commit_tree(struct commit *) > struct object_id *get_commit_tree_oid(struct commit *) This seems like a pretty sane thing to do. There may still be a few parts of the code, notably fsck, that are reliant on a "struct object" having been instantiated to determine whether an object is "used". I tried to clean that up around the time of c2d17b3b6e (fsck: check HAS_OBJ more consistently, 2017-01-16), but I won't be surprised if there's still some hidden assumptions. I peeked at the fsck.c parts of patch 2, and it looks like we may be OK, since you use get_commit_tree() during the walk. > This replacement was assisted by a Coccinelle script, but the 'tree' > member is overloaded in other types, so the script gave false-positives > that were removed from the diff. That catches any existing in-tree callers, but not any topics in flight. We could add the Coccinelle script and re-run it to catch any future ones. But perhaps simpler still: can we rename the "tree" member to "maybe_tree" or something, since nobody should be accessing it directly? That will give us a compile error if an older topic is merged. > On the Linux repository, performance tests were run for the following > command: > > git log --graph --oneline -1000 > > Before: 0.83s > After: 0.65s > Rel %: -21.6% Neat. Not strictly related, but I happened across another thing today that might be worth investigating here. We allocate "struct commit" in these nice big allocation blocks. But then for every commit we allocate at least one "struct commit_list" to store the parent pointer. I was looking at this from a memory consumption angle (I found a pathological repository full of single-parent commits, and this wastes an extra 16 bytes plus malloc overhead for every 64-byte "struct commit"). But I wonder if it could also have non-negligible overhead in calling malloc() for your case, since you've optimized out so much of the rest of the work. I'm not sure what the exact solution would be, but I imagine something like variable-sized "struct commit"s with the parent pointers embedded, with some kind of flag to indicate the number of parents (and probably some fallback to break out to a linked list for extreme cases of more than 2 parents). It may end up pretty invasive, though, as there's a lot of open-coded traversals of that parent list. Anyway, not anything to do with this patch, but food for thought as you micro-optimize these traversals. -Peff
Re: [PATCH v2 2/2] t3200: verify "branch --list" sanity when rebasing from detached HEAD
On Tuesday 03 April 2018 01:30 PM, Eric Sunshine wrote: >> Note that the "detached HEAD" test case might actually fail in some cases >> as the actual output of "git branch --list" might contain remote branch >> names which is not considered by the test case as it is rare to happen >> in the test environment. > > This paragraph was not in the original patch[1]. I _think_ what you > are saying (which took a while to decipher) is that if a command such > as "git checkout origin/next" ever gets inserted into the script > before the test, the test will be fooled since "git branch --list" > will show "detached HEAD origin/next" rather than "detached HEAD > d3adb33f", the latter of which is what the test is expecting. > Yeah, you're right. To know the reason for the unclear paragraph, see below. > Unfortunately, this paragraph makes it sound as if the test can fail > randomly (which, I believe, is not the case), and nobody would want a > test added which is unreliable, thus this paragraph is not helping to > sell this patch (in fact, it's actively hurting it). Ideally, the test > should be entirely deterministic so that it can't be fooled like this. > Rather than including this (harmful) paragraph in the commit message, > let's ensure that the test is deterministic (see below). > Sorry for the harmful and not so clear paragraph! I actually kept that paragraph there to **remind me** that I have to fix the issue which it describes before sending out the patch but I somehow forgot about it after I added it initially :-( >> diff --git a/t/t3200-branch.sh b/t/t3200-branch.sh >> @@ -1246,6 +1247,29 @@ test_expect_success '--merged is incompatible with >> --no-merged' ' >> +test_expect_success '--list during rebase from detached HEAD' ' >> + test_when_finished "reset_rebase && git checkout master" && >> + git checkout HEAD^0 && > > This is the line which I think is causing concern for you. If someone > inserted a new test before this one which invoked "git checkout > origin/next" (or something), then even after "git checkout HEAD^0", > "git branch --list" would still report the unexpected "detached HEAD > origin/next". Let's fix this, and make the test deterministic, by > doing this instead: > > git checkout master^0 && > Nice idea, will re-send a v3 with this fix and the harmful paragraph removed. Thanks, Kaartic signature.asc Description: OpenPGP digital signature
[ANNOUNCE] Git for Windows 2.17.0
Dear Git users, It is my pleasure to announce that Git for Windows 2.17.0 is available from: https://gitforwindows.org/ Changes since Git for Windows v2.16.3 (March 23rd 2018) New Features * Comes with Git v2.17.0. * Comes with OpenSSL v1.0.2o. * Comes with Git Credential Manager v1.15.2. * Comes with OpenSSH v7.7p1. Bug Fixes * When git.exe is called with an invalid subcommand, it no longer complains about file handles. Filename | SHA-256 | --- Git-2.17.0-64-bit.exe | 39b3da8be4f1cf396663dc892cbf818cb4cfddb5bf08c13f13f5b784f6654496 Git-2.17.0-32-bit.exe | 65b710e39db3d83b04a8a4bd56f54e929fb0abbab728c0a9abbc0dace8e361d2 PortableGit-2.17.0-64-bit.7z.exe | 9625365ccb67d1c7c52f14824c5dd68af4cca2a1b83a2ba998ba9ba45b708551 PortableGit-2.17.0-32-bit.7z.exe | f2344ec1bb2d87a1ab7b05bd2e7bcf4de6ae8a6b1dc034b9de4b3a1f0ec9 MinGit-2.17.0-64-bit.zip | 14c780bfc7af2bb85f6860fd1927402c87393201b7639e5bc3ce0fdc5688931e MinGit-2.17.0-32-bit.zip | b1896b23d0d7ab7c1ad6705ce760763ae9802cff2d2e0aaee141806a2a319c4c MinGit-2.17.0-busybox-64-bit.zip | 6f4599f367f784087e96f5c7689b2807718d7f75296e2beb4280a0fc2fdfdc1c MinGit-2.17.0-busybox-32-bit.zip | 3ded3fc7245fd026cab3ef05e9cc867c138e3e858aea42f68369a0e0aad1067f Git-2.17.0-64-bit.tar.bz2 | e5faf8e26de8dcc57b3848793e754fa1d16253c0ce66b12bc3afa1091284e6a3 Git-2.17.0-32-bit.tar.bz2 | d589200952debd3848602ee1287075428c096a8d94776090641645a858d020ec pdbs-for-git-64-bit-2.17.0.1.e7621d891d-1.zip | dc387ca23b94170832a9894d5812e4ad31c56552e4f68de154491dfe3d471a3c pdbs-for-git-32-bit-2.17.0.1.e7621d891d-1.zip | e1c4478b2874560df3a669f15b36360fffe556e8de3885d3cdb96202947dba7b Ciao, Johannes
Re: [PATCH 0/3] Lazy-load trees when reading commit-graph
On 4/3/2018 8:00 AM, Derrick Stolee wrote: There are several commit-graph walks that require loading many commits but never walk the trees reachable from those commits. However, the current logic in parse_commit() requires the root tree to be loaded. This only uses lookup_tree(), but when reading commits from the commit- graph file, the hashcpy() to load the root tree hash and the time spent checking the object cache take more time than parsing the rest of the commit. In this patch series, all direct references to accessing the 'tree' member of struct commit are replaced instead by one of the following methods: struct tree *get_commit_tree(struct commit *) struct object_id *get_commit_tree_oid(struct commit *) This replacement was assisted by a Coccinelle script, but the 'tree' member is overloaded in other types, so the script gave false-positives that were removed from the diff. After all access is restricted to use these methods, we can then change the postcondition of parse_commit_in_graph() to allow 'tree' to be NULL. If the tree is accessed later, we can load the tree's OID from the commit-graph in constant time and perform the lookup_tree(). On the Linux repository, performance tests were run for the following command: git log --graph --oneline -1000 Before: 0.83s After: 0.65s Rel %: -21.6% Adding '-- kernel/' to the command requires loading the root tree for every commit that is walked. There was no measureable performance change as a result of this patch. This patch series depends on v7 of ds/commit-graph. Derrick Stolee (3): commit: create get_commit_tree() method treewide: use get_commit_tree() for tree access commit-graph: lazy-load trees This patch series is also available as a GitHub pull request [1] [1] https://github.com/derrickstolee/git/pull/4
[PATCH 3/3] commit-graph: lazy-load trees
The commit-graph file provides quick access to commit data, including the OID of the root tree for each commit in the graph. When performing a deep commit-graph walk, we may not need to load most of the trees for these commits. Delay loading the tree object for a commit loaded from the graph until requested via get_commit_tree(). Do not lazy-load trees for commits not in the graph, since that requires duplicate parsing and the relative peformance improvement when trees are not needed is small. On the Linux repository, performance tests were run for the following command: git log --graph --oneline -1000 Before: 0.83s After: 0.65s Rel %: -21.6% Adding '-- kernel/' to the command requires loading the root tree for every commit that is walked. There was no measureable performance change as a result of this patch. Signed-off-by: Derrick Stolee --- commit-graph.c | 25 ++--- commit-graph.h | 7 +++ commit.c | 10 -- 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 3080a87940..a3eeb25f22 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -247,7 +247,6 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos) { - struct object_id oid; uint32_t edge_value; uint32_t *parent_data_ptr; uint64_t date_low, date_high; @@ -257,8 +256,7 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin item->object.parsed = 1; item->graph_pos = pos; - hashcpy(oid.hash, commit_data); - item->tree = lookup_tree(&oid); + item->tree = NULL; date_high = get_be32(commit_data + g->hash_len + 8) & 0x3; date_low = get_be32(commit_data + g->hash_len + 12); @@ -317,6 +315,27 @@ int parse_commit_in_graph(struct commit *item) return 0; } +static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c) +{ + struct object_id oid; + const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * (c->graph_pos); + + hashcpy(oid.hash, commit_data); + c->tree = lookup_tree(&oid); + + return c->tree; +} + +struct tree *get_commit_tree_in_graph(const struct commit *c) +{ + if (c->tree) + return c->tree; + if (c->graph_pos == COMMIT_NOT_FROM_GRAPH) + BUG("get_commit_tree_in_graph called from non-commit-graph commit"); + + return load_tree_for_commit(commit_graph, (struct commit *)c); +} + static void write_graph_chunk_fanout(struct hashfile *f, struct commit **commits, int nr_commits) diff --git a/commit-graph.h b/commit-graph.h index e1d8580c98..3ab45818e2 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -17,6 +17,13 @@ char *get_commit_graph_filename(const char *obj_dir); */ int parse_commit_in_graph(struct commit *item); +/* + * For performance reasons, a commit loaded from the graph does not + * have a tree loaded until trying to consume it for the first time. + * Load that tree into the commit and return the object. + */ +struct tree *get_commit_tree_in_graph(const struct commit *c); + struct commit_graph { int graph_fd; diff --git a/commit.c b/commit.c index d65c7b3b47..d4293ae8f6 100644 --- a/commit.c +++ b/commit.c @@ -298,12 +298,18 @@ void free_commit_buffer(struct commit *commit) struct tree *get_commit_tree(const struct commit *commit) { - return commit->tree; + if (commit->tree || !commit->object.parsed) + return commit->tree; + + if (commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + BUG("commit has NULL tree, but was not loaded from commit-graph"); + + return get_commit_tree_in_graph(commit); } struct object_id *get_commit_tree_oid(const struct commit *commit) { - return &commit->tree->object.oid; + return &get_commit_tree(commit)->object.oid; } const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) -- 2.17.0.20.g9f30ba16e1
[PATCH 1/3] commit: create get_commit_tree() method
While walking the commit graph, we load struct commit objects into the object cache. During this process, we also load struct tree objects for the root tree of each of these commits. We load these objects even if we are only computing commit reachability information, such as a merge base or ahead/behind information. Create get_commit_tree() as a first step to removing direct references to the 'tree' member of struct commit. Create get_commit_tree_oid() as a shortcut for several references to "&commit->tree->object.oid" in the codebase. Signed-off-by: Derrick Stolee --- commit.c | 10 ++ commit.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/commit.c b/commit.c index 3e39c86abf..d65c7b3b47 100644 --- a/commit.c +++ b/commit.c @@ -296,6 +296,16 @@ void free_commit_buffer(struct commit *commit) } } +struct tree *get_commit_tree(const struct commit *commit) +{ + return commit->tree; +} + +struct object_id *get_commit_tree_oid(const struct commit *commit) +{ + return &commit->tree->object.oid; +} + const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) { struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit); diff --git a/commit.h b/commit.h index e57ae4b583..fa79cc4d1f 100644 --- a/commit.h +++ b/commit.h @@ -102,6 +102,9 @@ void unuse_commit_buffer(const struct commit *, const void *buffer); */ void free_commit_buffer(struct commit *); +struct tree *get_commit_tree(const struct commit *); +struct object_id *get_commit_tree_oid(const struct commit *); + /* * Disassociate any cached object buffer from the commit, but do not free it. * The buffer (or NULL, if none) is returned. -- 2.17.0.20.g9f30ba16e1
[PATCH 2/3] treewide: use get_commit_tree() for tree access
Replace all direct accesses of the 'tree' member in 'struct commit' with calls to get_commit_tree() or get_commit_tree_oid(). This patch was constructed starting with the following Coccinelle script, then removing false-positives: @@ expression c; @@ - &c->tree->object.oid + get_commit_tree_oid(c) @@ expression c; symbol m; @@ - c->tree->object.oid.m + get_commit_tree_oid(c)->m @@ expression c; @@ - c->tree + get_commit_tree(c) To ensure all references were removed, the 'tree' member was renamed to 'tree_renamed' along with the few allowed accessors. A successful compilation demonstrated a correct transformation. Signed-off-by: Derrick Stolee --- blame.c | 18 +- builtin/checkout.c| 17 + builtin/diff.c| 2 +- builtin/fast-export.c | 6 +++--- builtin/log.c | 4 ++-- builtin/reflog.c | 2 +- commit-graph.c| 2 +- fsck.c| 8 +--- http-push.c | 2 +- line-log.c| 4 ++-- list-objects.c| 10 +- log-tree.c| 6 +++--- merge-recursive.c | 3 ++- notes-merge.c | 8 packfile.c| 2 +- pretty.c | 5 +++-- ref-filter.c | 2 +- revision.c| 8 sequencer.c | 12 ++-- sha1_name.c | 2 +- tree.c| 4 ++-- walker.c | 2 +- 22 files changed, 67 insertions(+), 62 deletions(-) diff --git a/blame.c b/blame.c index 200e0ad9a2..7f5700b324 100644 --- a/blame.c +++ b/blame.c @@ -553,10 +553,10 @@ static struct blame_origin *find_origin(struct commit *parent, diff_setup_done(&diff_opts); if (is_null_oid(&origin->commit->object.oid)) - do_diff_cache(&parent->tree->object.oid, &diff_opts); + do_diff_cache(get_commit_tree_oid(parent), &diff_opts); else - diff_tree_oid(&parent->tree->object.oid, - &origin->commit->tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(origin->commit), "", &diff_opts); diffcore_std(&diff_opts); @@ -622,10 +622,10 @@ static struct blame_origin *find_rename(struct commit *parent, diff_setup_done(&diff_opts); if (is_null_oid(&origin->commit->object.oid)) - do_diff_cache(&parent->tree->object.oid, &diff_opts); + do_diff_cache(get_commit_tree_oid(parent), &diff_opts); else - diff_tree_oid(&parent->tree->object.oid, - &origin->commit->tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(origin->commit), "", &diff_opts); diffcore_std(&diff_opts); @@ -1257,10 +1257,10 @@ static void find_copy_in_parent(struct blame_scoreboard *sb, diff_opts.flags.find_copies_harder = 1; if (is_null_oid(&target->commit->object.oid)) - do_diff_cache(&parent->tree->object.oid, &diff_opts); + do_diff_cache(get_commit_tree_oid(parent), &diff_opts); else - diff_tree_oid(&parent->tree->object.oid, - &target->commit->tree->object.oid, + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(target->commit), "", &diff_opts); if (!diff_opts.flags.find_copies_harder) diff --git a/builtin/checkout.c b/builtin/checkout.c index d76e13c852..0b448fd179 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -484,7 +484,8 @@ static int merge_working_tree(const struct checkout_opts *opts, resolve_undo_clear(); if (opts->force) { - ret = reset_tree(new_branch_info->commit->tree, opts, 1, writeout_error); + ret = reset_tree(get_commit_tree(new_branch_info->commit), +opts, 1, writeout_error); if (ret) return ret; } else { @@ -570,19 +571,19 @@ static int merge_working_tree(const struct checkout_opts *opts, o.verbosity = 0; work = write_tree_from_memory(&o); - ret = reset_tree(new_branch_info->commit->tree, opts, 1, -writeout_error); + ret = reset_tree(get_commit_tree(new_branch_info->commit), +opts, 1, writeout_error); if (ret) return ret; o.ancestor = old_branch_info->name; o.branch1 = new_branch_info->name; o.branch2 = "local"; - ret = merge_trees(&o, new_branch_info->commit->tree, work, -