[RFC/PATCH 0/2] Introduce safe-include config feature

2014-10-02 Thread Rasmus Villemoes
[trimming Ccs]

This is an attempt at implementing the suggested safe-include config
feature. It mostly has the semantics Junio suggested in the parent
post, but it does not directly extend the current include directive;
instead, it uses a separate safe-include directive. This is done so
that if a repository is used with both old and new versions of git,
the older versions will just silently ignore the safe-include, instead
of ignoring include.safe and then proceeding to processing path =
../project.gitconfig.

Config variables are whitelisted using safe-include.whitelist; the
value is interpreted as a whitespace-separated list of, possibly
negated, patterns. Later patterns override earlier ones.

If the feature is deemed worthwhile and my approach is acceptable,
I'll go ahead and try to write some documentation. For now, there is
just a small test script.


Rasmus Villemoes (2):
  config: Add safe-include directive
  config: Add test of safe-include feature

 config.c   | 91 +--
 t/t1309-config-safe-include.sh | 96 ++
 2 files changed, 184 insertions(+), 3 deletions(-)
 create mode 100755 t/t1309-config-safe-include.sh

-- 
2.0.4

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 1/2] config: Add safe-include directive

2014-10-02 Thread Rasmus Villemoes
This adds a variant of the include directive, where only certain
config variables in the included files are honoured. The set of
honoured variables consists of those the user has mentioned in a
safe-include.whitelist directive, along with a small set of git.git
blessed ones.

This can, for example, be used by a project to supply a set of
suggested configuration variables, such as diff.renames = true. The
project would provide these in e.g project.gitconfig, and the user then
has to explicitly opt-in by putting

[safe-include]
path = ../project.gitconfig

into .git/config, possibly preceding the path directive with a
whitelist directive.

The problem with simply using the ordinary include directive for this
purpose is that certain configuration variables (e.g. diff.external)
can allow arbitrary programs to be run.

Older versions of git do not understand the safe-include directives,
so they will effectively just ignore them.

Obviously, we must ignore safe-include.whitelist directives when we
are processing a safe-included file.

Signed-off-by: Rasmus Villemoes r...@rasmusvillemoes.dk
---
 config.c | 91 +---
 1 file changed, 88 insertions(+), 3 deletions(-)

diff --git a/config.c b/config.c
index a677eb6..764cda1 100644
--- a/config.c
+++ b/config.c
@@ -11,6 +11,7 @@
 #include quote.h
 #include hashmap.h
 #include string-list.h
+#include wildmatch.h
 
 struct config_source {
struct config_source *prev;
@@ -39,6 +40,79 @@ static struct config_source *cf;
 
 static int zlib_compression_seen;
 
+struct safe_var {
+   struct safe_var *next;
+   const char *pattern;
+   int blacklisted;
+};
+
+static int safe_include_depth;
+static struct safe_var *safe_var_head;
+
+static const char *builtin_safe_patterns[] = {
+   diff.renames,
+};
+
+static int config_name_is_safe(const char *var)
+{
+   struct safe_var *sv;
+   unsigned i;
+
+   for (sv = safe_var_head; sv; sv = sv-next) {
+   /* Handle malformed patterns? */
+   if (wildmatch(sv-pattern, var, WM_CASEFOLD, NULL) == WM_MATCH)
+   return !sv-blacklisted;
+   }
+   for (i = 0; i  ARRAY_SIZE(builtin_safe_patterns); ++i) {
+   if (wildmatch(builtin_safe_patterns[i], var, WM_CASEFOLD, NULL) 
== WM_MATCH)
+   return 1;
+   }
+
+   return 0;
+}
+
+static void config_add_safe_pattern(const char *p)
+{
+   struct safe_var *sv;
+   int blacklist = 0;
+
+   if (*p == '!') {
+   blacklist = 1;
+   ++p;
+   }
+   if (!*p)
+   return;
+   sv = xmalloc(sizeof(*sv));
+   sv-pattern = xstrdup(p);
+   sv-blacklisted = blacklist;
+   sv-next = safe_var_head;
+   safe_var_head = sv;
+}
+
+static void config_add_safe_names(const char *value)
+{
+   char *patterns = xstrdup(value);
+   char *p, *save;
+
+   /*
+* This allows giving multiple patterns in a single line, e.g.
+*
+* whitelist = !* foo.bar squirrel.*
+*
+* to override the builtin list of safe vars and only declare
+* foo.bar and the squirrel section safe. But it has the
+* obvious drawback that one cannot match subsection names
+* containing whitespace. The alternative is that the above
+* would have to be written on three separate whitelist lines.
+*/
+   for (p = strtok_r(patterns,  \t, save); p; p = strtok_r(NULL,  \t, 
save)) {
+   config_add_safe_pattern(p);
+   }
+
+   free(patterns);
+}
+
+
 /*
  * Default config_set that contains key-value pairs from the usual set of 
config
  * config files (i.e repo specific .git/config, user wide ~/.gitconfig, XDG
@@ -142,12 +216,23 @@ int git_config_include(const char *var, const char 
*value, void *data)
 * Pass along all values, including include directives; this makes it
 * possible to query information on the includes themselves.
 */
-   ret = inc-fn(var, value, inc-data);
-   if (ret  0)
-   return ret;
+   if (safe_include_depth == 0 || config_name_is_safe(var)) {
+   ret = inc-fn(var, value, inc-data);
+   if (ret  0)
+   return ret;
+   }
 
if (!strcmp(var, include.path))
ret = handle_path_include(value, inc);
+   else if (safe_include_depth == 0
+ !strcmp(var, safe-include.whitelist)) {
+   config_add_safe_names(value);
+   }
+   else if (!strcmp(var, safe-include.path)) {
+   safe_include_depth++;
+   ret = handle_path_include(value, inc);
+   safe_include_depth--;
+   }
return ret;
 }
 
-- 
2.0.4

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 2/2] config: Add test of safe-include feature

2014-10-02 Thread Rasmus Villemoes
This adds a script for testing various aspects of the safe-include feature.

Signed-off-by: Rasmus Villemoes r...@rasmusvillemoes.dk
---
 t/t1309-config-safe-include.sh | 96 ++
 1 file changed, 96 insertions(+)
 create mode 100755 t/t1309-config-safe-include.sh

diff --git a/t/t1309-config-safe-include.sh b/t/t1309-config-safe-include.sh
new file mode 100755
index 000..b8ccc94
--- /dev/null
+++ b/t/t1309-config-safe-include.sh
@@ -0,0 +1,96 @@
+#!/bin/sh
+
+test_description='test config file safe-include directives'
+. ./test-lib.sh
+
+
+test_expect_success 'blacklist by default' '
+   echo [diff]external = badprog project 
+   echo [safe-include]path = project .gitconfig 
+   test_must_fail git config diff.external
+'
+
+
+test_expect_success 'builtin safe rules' '
+   echo [diff]renames = true project 
+   echo [safe-include]path = project .gitconfig 
+   echo true expect 
+   git config diff.renames actual 
+   test_cmp expect actual
+'
+
+test_expect_success 'user blacklist taking precedence' '
+   echo [diff]renames = true project 
+   cat .gitconfig -\EOF 
+   [diff]renames = false
+   [safe-include]whitelist = !diff.renames
+   [safe-include]path = project
+   EOF
+   echo false expect 
+   git config diff.renames actual 
+   test_cmp expect actual
+'
+
+test_expect_success 'wildcard matching' '
+   cat project -\EOF 
+   [test]beer = true
+   [test]bar = true
+   [test]foo = true
+   EOF
+   cat .gitconfig -\EOF 
+   [safe-include]whitelist = test.b*r
+   [safe-include]path = project
+   EOF
+   printf test.bar true\ntest.beer true\n | sort expect 
+   git config --get-regexp ^test | sort actual 
+   test_cmp expect actual
+'
+
+test_expect_success 'ignore whitelist directives in safe-included files' '
+   cat project -\EOF 
+   [safe-include]whitelist = *
+   [diff]external = badprog
+   EOF
+   echo [safe-include]path = project .gitconfig 
+   test_must_fail git config diff.external
+'
+
+test_expect_success 'multiple whitelist/blacklist patterns in one line' '
+   cat .gitconfig -\EOF 
+   [safe-include]whitelist = !* foo.bar squirrel.* !squirrel.xyz
+   [safe-include]path = project
+   EOF
+   cat project -\EOF 
+   [diff]renames = true
+   [foo]bar = bar
+   [squirrel]abc = abc
+   [squirrel]xyz = xyz
+   EOF
+   test_must_fail git config diff.renames 
+   test_must_fail git config squirrel.xyz 
+   echo bar expect 
+   git config foo.bar actual 
+   test_cmp expect actual
+   echo abc expect 
+   git config squirrel.abc actual 
+   test_cmp expect actual
+'
+
+test_expect_success 'case insensitivity' '
+   cat .gitconfig -\EOF 
+   [safe-include]whitelist = Test.Abc test.xyz
+   [safe-include]path = project
+   EOF
+   cat project -\EOF 
+   [test]abc = abc
+   [TeST]XyZ = XyZ
+   EOF
+   echo abc expect 
+   git config test.abc actual 
+   test_cmp expect actual 
+   echo XyZ expect 
+   git config test.xyz actual 
+   test_cmp expect actual
+'
+
+test_done
-- 
2.0.4

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC/PATCH 1/2] config: Add safe-include directive

2014-10-06 Thread Rasmus Villemoes
Junio C Hamano gits...@pobox.com wrote:

 (by the way, we do not do dashes in names for configuration by
 convention)

OK. Actually, I now think I'd prefer a subsection [include safe], but
I don't have any strong preferences regarding the names.

 That syntax _could_ be just a relative path (e.g. project.gitconfig names
 the file with that name at the top-level of the working tree), and if we are
 to do so, we should forbid any relative path that escapes from the working
 tree (e.g. ../project.gitconfig is forbidden, but down/down/../../.gitconfig
 could be OK as it is the same as .gitconfig). For that matter, anything with
 /./ and /../ in it can safely be forbidden without losing functionality.

I agree that it would be most useful to interpret relative paths as
being relative to the working tree. I'm not sure what would be gained by
checking for ./ and ../ components, a symlink could easily be used to
circumvent that.

 And we can allow absolute path, e.g. /etc/gitconfig, of course, but I'd
 prefer to at least initially forbid an absolute path, due to the same worries
 I have against the unset some variables defined in /etc/gitconfig topic
 we discussed earlier today in a separate thread.

One might (ab)use the feature to only use some settings from a global
file, e.g.

[include safe]
whitelist = !foo.*
path = ~/extra.gitconfig

But I'm fine with forbidding absolute paths until someone actually comes
with such a use case.

 Another design decision we would need to make is if it should be
 allowed for a safe-included file to use safe-include directive to
 include other files. Offhand I do not think of a reason we absolutely
 need to support it, but there may be an interesting workflow enabled
 if we did so. I dunno.

After one level of safe-include, any safe-include can also be done as a
normal include (but one may need to spell the path differently if the
two included files are not both at the top of the working tree). One
could imagine a project supplying lots of defaults and splitting those
into separate files, each included from a single project.gitconfig.

Anyway, my proposal allows nesting includes and safe-includes inside
safe-includes; forbidding it would just be a matter of adding a
safe_include_depth == 0 check in two places. (Then safe_include_depth
probably could/should be renamed in_safe_include.) I think I have a
slight preference to allowing nested includes, but if absolute paths are
forbidden for safe-includes, they should also be forbidden for
include-inside-safe-include.

Rasmus

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC] git-send-email: Cache generated message-ids, use them when prompting

2013-08-16 Thread Rasmus Villemoes
This is mostly a proof of concept/RFC patch. The idea is for
git-send-email to store the message-ids it generates, along with the
Subject and Date headers of the message. When prompting for which
Message-ID should be used in In-Reply-To, display a list of recent
emails (identifed using the Date/Subject pairs; the message-ids
themselves are not for human consumption). Choosing from that list
will then use the corresponding message-id; otherwise, the behaviour
is as usual.

When composing v2 or v3 of a patch or patch series, this avoids the
need to get one's MUA to display the Message-ID of the earlier email
(which is cumbersome in some MUAs) and then copy-paste that.

If this idea is accepted, I'm certain I'll get to use the feature
immediately, since the patch is not quite ready for inclusion :-)

Signed-off-by: Rasmus Villemoes r...@rasmusvillemoes.dk
---
 git-send-email.perl | 101 +---
 1 file changed, 96 insertions(+), 5 deletions(-)

diff --git a/git-send-email.perl b/git-send-email.perl
index f608d9b..2e3685c 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -26,6 +26,7 @@ use Data::Dumper;
 use Term::ANSIColor;
 use File::Temp qw/ tempdir tempfile /;
 use File::Spec::Functions qw(catfile);
+use Date::Parse;
 use Error qw(:try);
 use Git;
 
@@ -203,6 +204,7 @@ my ($validate, $confirm);
 my (@suppress_cc);
 my ($auto_8bit_encoding);
 my ($compose_encoding);
+my ($msgid_cache_file, $msgid_maxprompt);
 
 my ($debug_net_smtp) = 0;  # Net::SMTP, see send_message()
 
@@ -214,7 +216,7 @@ my %config_bool_settings = (
 signedoffcc = [\$signed_off_by_cc, undef],  # Deprecated
 validate = [\$validate, 1],
 multiedit = [\$multiedit, undef],
-annotate = [\$annotate, undef]
+annotate = [\$annotate, undef],
 );
 
 my %config_settings = (
@@ -237,6 +239,7 @@ my %config_settings = (
 from = \$sender,
 assume8bitencoding = \$auto_8bit_encoding,
 composeencoding = \$compose_encoding,
+msgidcachefile = \$msgid_cache_file,
 );
 
 my %config_path_settings = (
@@ -311,6 +314,7 @@ my $rc = GetOptions(h = \$help,
8bit-encoding=s = \$auto_8bit_encoding,
compose-encoding=s = \$compose_encoding,
force = \$force,
+   msgid-cache-file=s = \$msgid_cache_file,
 );
 
 usage() if $help;
@@ -784,10 +788,31 @@ sub expand_one_alias {
 @bcclist = validate_address_list(sanitize_address_list(@bcclist));
 
 if ($thread  !defined $initial_reply_to) {
-   $initial_reply_to = ask(
-   Message-ID to be used as In-Reply-To for the first email (if 
any)? ,
-   default = ,
-   valid_re = qr/\@.*\./, confirm_only = 1);
+   my @choices;
+   if ($msgid_cache_file) {
+   @choices = msgid_cache_getmatches();
+   }
+   if (@choices) {
+   my $prompt = '';
+   my $i = 0;
+   $prompt .= sprintf (%d) [%s] %s\n, $i++, $_-{date}, 
$_-{subject}
+   for (@choices);
+   $prompt .= sprintf Answer 0-%d to use the Message-ID of one of 
the above\n, $#choices;
+   $prompt .= Message-ID to be used as In-Reply-To for the first 
email (if any)? ;
+   $initial_reply_to = 
+   ask($prompt,
+   default = ,
+   valid_re = qr/\@.*\.|^[0-9]+$/, confirm_only = 1);
+   if ($initial_reply_to =~ /^[0-9]+$/  $initial_reply_to  
@choices) {
+   $initial_reply_to = $choices[$initial_reply_to]{id};
+   }
+   }
+   else {
+   $initial_reply_to = 
+   ask(Message-ID to be used as In-Reply-To for the first 
email (if any)? ,
+   default = ,
+   valid_re = qr/\@.*\./, confirm_only = 1);
+   }
 }
 if (defined $initial_reply_to) {
$initial_reply_to =~ s/^\s*?//;
@@ -1282,6 +1307,8 @@ X-Mailer: git-send-email $gitversion
}
}
 
+   msgid_cache_this($message_id, $subject, $date) if ($msgid_cache_file  
!$dry_run);
+
return 1;
 }
 
@@ -1508,6 +1535,8 @@ sub cleanup_compose_files {
 
 $smtp-quit if $smtp;
 
+msgid_cache_write() if $msgid_cache_file;
+
 sub unique_email_list {
my %seen;
my @emails;
@@ -1556,3 +1585,65 @@ sub body_or_subject_has_nonascii {
}
return 0;
 }
+
+my @msgid_new_entries;
+
+# For now, use a simple tab-separated format:
+#
+#$id\t$date\t$subject\n
+sub msgid_cache_read {
+   my $fh;
+   my $line;
+   my @entries;
+   if (not open ($fh, '', $msgid_cache_file)) {
+   # A non-existing cache file is ok, but should we warn if errno 
!= ENOENT?
+   return ();
+   }
+   while ($line = $fh) {
+   chomp($line);
+   my ($id, $date, $subject) = split /\t/, $line;
+   my $epoch = str2time($date

[PATCH v2 0/2] git-send-email: Message-ID caching

2013-08-21 Thread Rasmus Villemoes
This is a more polished attempt at implementing the message-id
caching. I apologize for the sloppiness (unrelated changes, unused
variables etc.) in the first version.

I have split the patch in two. The first half moves the choice-logic
inside ask(): If ask() decides to return the default immediately
(because the $term-{IN,OUT} checks fail), there's no reason to print
all the choices. In my first attempt, I handled this by passing a huge
prompt-string, but that made everything underlined (the prompt may be
emphasized in some other way on other terminals). Passing a different
valid_re and handling a numeric return is also slightly more
cumbersome for the caller than making ask() take care of it. There
might be other future uses of this 'choices' ability, but if the
message-id-caching is ultimately rejected, [1/2] can of course also be
dropped.

[2/2] is the new implementation. The two main changes are that old
entries are expunged when the file grows larger than, by default, 100
kB, and the old emails are scored according to a simple scheme (which
might need improvement). The input to the scoring is {first subject in
new batch, old subject, was the old email first in a batch?}. [*]

I also now simply store the unix time stamp instead of storing the
contents of the date header. This reduces processing time (no need to
parse the date header when reading the file), and eliminates the
Date::Parse dependency. The minor downside is that if the user has
changed time zone since the old email was sent, the date in the prompt
will not entirely match the Date:-header in the email that was sent.

I had to rearrange the existing code a little to make certain global
variables ($time, @files) contain the right thing at the right time,
but hopefully I haven't broken anything in the process.

[*] Suggestions for improving that heuristic are welcome. One thing
that might be worthwhile is to give words ending in a colon higher
weight.

Rasmus Villemoes (2):
  git-send-email: add optional 'choices' parameter to the ask sub
  git-send-email: Cache generated message-ids, use them when prompting

 git-send-email.perl |  144 ---
 1 file changed, 138 insertions(+), 6 deletions(-)


Thanks, Junio, for the comments. A few replies:

Junio C Hamano gits...@pobox.com writes:

 Rasmus Villemoes r...@rasmusvillemoes.dk writes:
  my %config_path_settings = (
 @@ -311,6 +314,7 @@ my $rc = GetOptions(h = \$help,
  8bit-encoding=s = \$auto_8bit_encoding,
  compose-encoding=s = \$compose_encoding,
  force = \$force,
 +msgid-cache-file=s = \$msgid_cache_file,
   );

 Is there a standard, recommended location we suggest users to store
 this?  

I don't know. It is obviously a local, per-repository, thing. I don't
know enough about git's internals to know if something breaks if one
puts it in .git (say, .git/msgid.cache). If that's a bad idea, then
I think it should be in the root of the repository, and one would then
probably want to add it to .git/info/exclude.

If storing it under .git is possible, one could consider making the
option a boolean ('msgid-use-cache' ?) and always use
.git/msgid.cache.

 +my @choices;
 +if ($msgid_cache_file) {
 +@choices = msgid_cache_getmatches();

 It is a bit strange that filename is specified = we will find the
 latest 10 before seeing if we even check to see if that file
 exists.  I would have expected that a two-step filename is given =
 try to read it and if we did read something = give choices
 process would be used.

I'm not sure I understand how this is different from what I was or am
doing. Sure, I don't do the test for existence before calling
msgid_cache_getmatches(), but that will just return an empty list if
the file does not exist. That will end up doing the same thing as if
no cache file was given.

 Also getmatches that does not take any clue from what the caller
 knows (the title of the series, for example) seems much less useful
 than ideal.

Fixed in the new version.

 +msgid_cache_this($message_id, $subject, $date) if ($msgid_cache_file  
 !$dry_run);

 Is this caching each and every one, even for things like [PATCH 23/47]?

Yes, but now I remember whether it was the first or not. 

 +msgid_cache_write() if $msgid_cache_file;

 Is this done under --dry-run?

Well, it was, but the msgid_cache_this() was not, so there was an
empty list of new entries. Of course, better to be explicit and safe,
so fixed.

 +if (not open ($fh, '', $msgid_cache_file)) {
 +# A non-existing cache file is ok, but should we warn if errno 
 != ENOENT?

 It should not be a warning but an informational message, creating a
 new cachefile, when bootstrapping, no?

If so, it should probably be when writing the new file. What I'm
trying to say is that errno == ENOENT is ok (and expected), but errno
== EPERM or anything else might be a condition we should warn or even

[PATCH 1/2] git-send-email: add optional 'choices' parameter to the ask sub

2013-08-21 Thread Rasmus Villemoes
Make it possible for callers of ask() to provide a list of
choices. Entering an appropriate integer chooses from that list,
otherwise the input is treated as usual.

Each choice can either be a single string, which is used both for the
prompt and for the return value, or a two-element array ref, where the
zeroth element is the choice and the first is used for the prompt.

Signed-off-by: Rasmus Villemoes r...@rasmusvillemoes.dk
---
 git-send-email.perl |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/git-send-email.perl b/git-send-email.perl
index 2162478..ac3b02d 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -680,11 +680,18 @@ sub ask {
my $valid_re = $arg{valid_re};
my $default = $arg{default};
my $confirm_only = $arg{confirm_only};
+   my $choices = $arg{choices} || [];
my $resp;
my $i = 0;
return defined $default ? $default : undef
unless defined $term-IN and defined fileno($term-IN) and
   defined $term-OUT and defined fileno($term-OUT);
+   for (@$choices) {
+   printf (%d) %s\n, $i++, ref($_) eq 'ARRAY' ? $_-[1] : $_;
+   }
+   printf Enter 0-%d to choose from the above list\n, $i-1
+   if (@$choices);
+   $i = 0;
while ($i++  10) {
$resp = $term-readline($prompt);
if (!defined $resp) { # EOF
@@ -694,6 +701,10 @@ sub ask {
if ($resp eq '' and defined $default) {
return $default;
}
+   if (@$choices  $resp =~ m/^[0-9]+$/  $resp  @$choices) {
+   my $c = $choices-[$resp];
+   return ref($c) eq 'ARRAY' ? $c-[0] : $c;
+   }
if (!defined $valid_re or $resp =~ /$valid_re/) {
return $resp;
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] git-send-email: Cache generated message-ids, use them when prompting

2013-08-21 Thread Rasmus Villemoes
Allow the user to specify a file (sendemail.msgidcachefile) in which
to store the message-ids generated by git-send-email, along with time
and subject information. When prompting for a Message-ID to be used in
In-Reply-To, that file can be used to generate a list of options.

When composing v2 or v3 of a patch or patch series, this avoids the
need to get one's MUA to display the Message-ID of the earlier email
(which is cumbersome in some MUAs) and then copy-paste that.

Listing all previously sent emails is useless, so currently only the
10 most relevant emails. Relevant is based on a simple scoring,
which might need to be revised: Count the words in the old subject
which also appear in the subject of the first email to be sent; add a
bonus if the old email was first in a batch (that is, [00/74] is more
likely to be relevant than [43/74]). Resort to comparing timestamps
(newer is more relevant) when the scores tie.

To limit disk usage, the oldest half of the cached entries are
expunged when the cache file exceeds sendemail.msgidcachemaxsize
(default 100kB). This also ensures that we will never have to read,
score, and sort 1000s of entries on each invocation of git-send-email.

Signed-off-by: Rasmus Villemoes r...@rasmusvillemoes.dk
---
 git-send-email.perl |  133 ---
 1 file changed, 127 insertions(+), 6 deletions(-)

diff --git a/git-send-email.perl b/git-send-email.perl
index ac3b02d..5094267 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -203,6 +203,7 @@ my ($validate, $confirm);
 my (@suppress_cc);
 my ($auto_8bit_encoding);
 my ($compose_encoding);
+my ($msgid_cache_file, $msgid_cache_maxsize);
 
 my ($debug_net_smtp) = 0;  # Net::SMTP, see send_message()
 
@@ -237,6 +238,8 @@ my %config_settings = (
 from = \$sender,
 assume8bitencoding = \$auto_8bit_encoding,
 composeencoding = \$compose_encoding,
+msgidcachefile = \$msgid_cache_file,
+msgidcachemaxsize = \$msgid_cache_maxsize,
 );
 
 my %config_path_settings = (
@@ -796,11 +799,23 @@ sub expand_one_alias {
 @bcclist = expand_aliases(@bcclist);
 @bcclist = validate_address_list(sanitize_address_list(@bcclist));
 
+if ($compose  $compose  0) {
+   @files = ($compose_filename . .final, @files);
+}
+
 if ($thread  !defined $initial_reply_to  $prompting) {
+   my @choices = ();
+   if ($msgid_cache_file) {
+   my $first_subject = get_patch_subject($files[0]);
+   $first_subject =~ s/^GIT: //;
+   @choices = msgid_cache_getmatches($first_subject, 10);
+   @choices = map {[$_-{id}, sprintf [%s] %s, 
format_2822_time($_-{epoch}), $_-{subject}]} @choices;
+   }
$initial_reply_to = ask(
Message-ID to be used as In-Reply-To for the first email (if 
any)? ,
default = ,
-   valid_re = qr/\@.*\./, confirm_only = 1);
+   valid_re = qr/\@.*\./, confirm_only = 1,
+   choices = \@choices);
 }
 if (defined $initial_reply_to) {
$initial_reply_to =~ s/^\s*?//;
@@ -818,10 +833,6 @@ if (!defined $smtp_server) {
$smtp_server ||= 'localhost'; # could be 127.0.0.1, too... *shrug*
 }
 
-if ($compose  $compose  0) {
-   @files = ($compose_filename . .final, @files);
-}
-
 # Variables we set as part of the loop over files
 our ($message_id, %mail, $subject, $reply_to, $references, $message,
$needs_confirm, $message_num, $ask_default);
@@ -1136,7 +1147,7 @@ sub send_message {
my $to = join (,\n\t, @recipients);
@recipients = unique_email_list(@recipients,@cc,@bcclist);
@recipients = (map { extract_valid_address_or_die($_) } @recipients);
-   my $date = format_2822_time($time++);
+   my $date = format_2822_time($time);
my $gitversion = '@@GIT_VERSION@@';
if ($gitversion =~ m/..GIT_VERSION../) {
$gitversion = Git::version();
@@ -1477,6 +1488,11 @@ foreach my $t (@files) {
 
my $message_was_sent = send_message();
 
+   if ($message_was_sent  $msgid_cache_file  !$dry_run) {
+   msgid_cache_this($message_id, $message_num == 1 ? 1 : 0, , 
$time, $subject);
+   }
+   $time++;
+
# set up for the next message
if ($thread  $message_was_sent 
($chain_reply_to || !defined $reply_to || length($reply_to) == 
0 ||
@@ -1521,6 +1537,8 @@ sub cleanup_compose_files {
 
 $smtp-quit if $smtp;
 
+msgid_cache_write() if $msgid_cache_file  !$dry_run;
+
 sub unique_email_list {
my %seen;
my @emails;
@@ -1569,3 +1587,106 @@ sub body_or_subject_has_nonascii {
}
return 0;
 }
+
+my @msgid_new_entries;
+sub msgid_cache_this {
+   my $msgid = shift;
+   my $first = shift;
+   my $epoch = shift;
+   my $subject = shift;
+   # Make sure there are no tabs which will confuse us, and save
+   # some valuable horizontal real-estate by removing redundant
+   # whitespace

Re: [PATCH 3/6] strbuf_getwholeline: use getc_unlocked

2015-04-07 Thread Rasmus Villemoes
On Tue, Apr 07 2015, Jeff King p...@peff.net wrote:

 On Tue, Apr 07, 2015 at 03:48:33PM +0200, Rasmus Villemoes wrote:

 Implementation-wise, I think strbuf_getwholeline could be implemented
 mostly as a simple wrapper for getdelim. If I'm reading the current code
 and the posix spec for getdelim correctly, something like this should do
 it (though obviously not meant to be included as-is):

 I think it's close to what we want. strbuf_grow calls xrealloc, which
 will call try_to_free_routine() and possibly die() for us. So we would
 probably want to check errno on failure and respond similarly if we get
 ENOMEM.

Hm, I'm afraid it's not that simple. It seems that data may be lost from
the stream if getdelim encounters ENOMEM: Looking at the glibc
implementation (libio/iogetdelim.c), if reallocating the user buffer
fails, -1 is returned (presumably with errno==ENOMEM set by realloc), and
there's no way of knowing how many bytes were already copied to the
buffer (cur_len).

For regular files, I suppose one could do a ftell/fseek dance. For
other cases, I don't see a reliable way to retry upon ENOMEM.

Related thread on the posix mailing list:
http://thread.gmane.org/gmane.comp.standards.posix.austin.general/10091

 I wonder if it is even worth doing the getc_unlocked dance at all. It
 would potentially speed up the fallback code, but my hope that would be
 that most systems would simply use the getdelim() version.

Well, it's not really an intrusive patch, and 42% speedup is rather
significant. And, of course, the above ENOMEM issue may mean that
getdelim isn't usable after all.

Rasmus
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 60/68] prefer memcpy to strcpy

2015-09-28 Thread Rasmus Villemoes
On Thu, Sep 24 2015, Jeff King  wrote:

> This also eliminates calls to strcpy, which make auditing
> the code base harder.

Maybe may English parser is broken, but this doesn't immediately sound
like what you meant to say. Also, in 29/68 you say "We drop calls to
strcpy, which makes auditing the code base easier." Maybe it's all ok,
since on second reading the first "make" probably refers to the plural
"calls for strcpy", while in the second case "makes" refers to "[the
dropping of] calls to strcpy".

Rasmus

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] git-send-email: Add auto-cc to all body signatures

2015-12-02 Thread Rasmus Villemoes
On Thu, Dec 08 2011, Joe Perches  wrote:

> On Thu, 2011-12-08 at 11:37 -0800, Junio C Hamano wrote:
>> Joe Perches  writes:
>> > Many types of signatures are used by various projects.
>> > The most common type is formatted:
>> >"[some_signature_type]-by: First Last "
>> > e.g:
>> >"Reported-by: First Last " (no quotes are used)
>> This is just a phrasing issue, but I am a bit reluctant about the name
>> "signature".
>
> I've called all these markings signatures.
> Maybe email-address-tags or another name could be used.
> I'm not bothered one way or another by any chosen name.

It's been four years, but I recently ran into this. I mistakenly thought
that git would actually pick up cc addresses also from Reported-by, so
the reporter ended up not being cc'ed. Is there any chance this could be
revisited, or should I use a --cc-cmd to do what I want?

Rasmus
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] shallow.c: bit manipulation tweaks

2016-12-02 Thread Rasmus Villemoes
First of all, 1 << 31 is technically undefined behaviour, so let's just
use an unsigned literal.

If i is 'signed int' and gcc doesn't know that i is positive, gcc
generates code to compute the C99-mandated values of "i / 32" and "i %
32", which is a lot more complicated than simple a simple shifts/mask.

The only caller of paint_down actually passes an "unsigned int" value,
but the prototype of paint_down causes (completely well-defined)
conversion to signed int, and gcc has no way of knowing that the
converted value is non-negative. Just make the id parameter unsigned.

In update_refstatus, the change in generated code is much smaller,
presumably because gcc is smart enough to see that i starts as 0 and is
only incremented, so it is allowed (per the UD of signed overflow) to
assume that i is always non-negative. But let's just help less smart
compilers generate good code anyway.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 shallow.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/shallow.c b/shallow.c
index 8b1c35d..5aec5a5 100644
--- a/shallow.c
+++ b/shallow.c
@@ -464,7 +464,7 @@ static uint32_t *paint_alloc(struct paint_info *info)
  * all walked commits.
  */
 static void paint_down(struct paint_info *info, const unsigned char *sha1,
-  int id)
+  unsigned int id)
 {
unsigned int i, nr;
struct commit_list *head = NULL;
@@ -476,7 +476,7 @@ static void paint_down(struct paint_info *info, const 
unsigned char *sha1,
if (!c)
return;
memset(bitmap, 0, bitmap_size);
-   bitmap[id / 32] |= (1 << (id % 32));
+   bitmap[id / 32] |= (1U << (id % 32));
commit_list_insert(c, );
while (head) {
struct commit_list *p;
@@ -650,11 +650,11 @@ static int add_ref(const char *refname, const struct 
object_id *oid,
 
 static void update_refstatus(int *ref_status, int nr, uint32_t *bitmap)
 {
-   int i;
+   unsigned int i;
if (!ref_status)
return;
for (i = 0; i < nr; i++)
-   if (bitmap[i / 32] & (1 << (i % 32)))
+   if (bitmap[i / 32] & (1U << (i % 32)))
ref_status[i]++;
 }
 
-- 
2.1.4



[PATCH 4/4] shallow.c: remove useless test

2016-12-02 Thread Rasmus Villemoes
It seems to be odd to do x=y if x==y. Maybe there's a bug somewhere near
this, but as is this is somewhat confusing.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 shallow.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shallow.c b/shallow.c
index 5aec5a5..7c28239 100644
--- a/shallow.c
+++ b/shallow.c
@@ -513,7 +513,7 @@ static void paint_down(struct paint_info *info, const 
unsigned char *sha1,
  p->item);
if (p->item->object.flags & SEEN)
continue;
-   if (*p_refs == NULL || *p_refs == *refs)
+   if (*p_refs == NULL)
*p_refs = *refs;
commit_list_insert(p->item, );
}
-- 
2.1.4



[PATCH 1/4] shallow.c: make paint_alloc slightly more robust

2016-12-02 Thread Rasmus Villemoes
I have no idea if this is a real issue, but it's not obvious to me that
paint_alloc cannot be called with info->nr_bits greater than about
4M (\approx 8*COMMIT_SLAB_SIZE). In that case the new slab would be too
small. So just round up the allocation to the maximum of
COMMIT_SLAB_SIZE and size.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 shallow.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/shallow.c b/shallow.c
index 4d0b005..e21534a 100644
--- a/shallow.c
+++ b/shallow.c
@@ -445,11 +445,13 @@ static uint32_t *paint_alloc(struct paint_info *info)
unsigned size = nr * sizeof(uint32_t);
void *p;
if (!info->slab_count || info->free + size > info->end) {
+   unsigned alloc_size = size < COMMIT_SLAB_SIZE ?
+   COMMIT_SLAB_SIZE : size;
info->slab_count++;
REALLOC_ARRAY(info->slab, info->slab_count);
-   info->free = xmalloc(COMMIT_SLAB_SIZE);
+   info->free = xmalloc(alloc_size);
info->slab[info->slab_count - 1] = info->free;
-   info->end = info->free + COMMIT_SLAB_SIZE;
+   info->end = info->free + alloc_size;
}
p = info->free;
info->free += size;
-- 
2.1.4



[PATCH 2/4] shallow.c: avoid theoretical pointer wrap-around

2016-12-02 Thread Rasmus Villemoes
The expression info->free+size is technically undefined behaviour in
exactly the case we want to test for. Moreover, the compiler is likely
to translate the expression to

  (unsigned long)info->free + size > (unsigned long)info->end

where there's at least a theoretical chance that the LHS could wrap
around 0, giving a false negative.

This might as well be written using pointer subtraction avoiding these
issues.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 shallow.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/shallow.c b/shallow.c
index e21534a..8b1c35d 100644
--- a/shallow.c
+++ b/shallow.c
@@ -444,7 +444,7 @@ static uint32_t *paint_alloc(struct paint_info *info)
unsigned nr = (info->nr_bits + 31) / 32;
unsigned size = nr * sizeof(uint32_t);
void *p;
-   if (!info->slab_count || info->free + size > info->end) {
+   if (!info->slab_count || size > info->end - info->free) {
unsigned alloc_size = size < COMMIT_SLAB_SIZE ?
COMMIT_SLAB_SIZE : size;
info->slab_count++;
-- 
2.1.4



[PATCH 1/2] Documentation/config: add sendemail.tocmd to list preceding "See git-send-email(1)"

2017-11-13 Thread Rasmus Villemoes
Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 Documentation/config.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 671fcbaa0..d88fc9f63 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3000,6 +3000,7 @@ sendemail.smtpPass::
 sendemail.suppresscc::
 sendemail.suppressFrom::
 sendemail.to::
+sendemail.tocmd::
 sendemail.smtpDomain::
 sendemail.smtpServer::
 sendemail.smtpServerPort::
-- 
2.11.0



[PATCH 2/2] completion: add git config sendemail.tocmd

2017-11-13 Thread Rasmus Villemoes
Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 contrib/completion/git-completion.bash | 1 +
 1 file changed, 1 insertion(+)

diff --git a/contrib/completion/git-completion.bash 
b/contrib/completion/git-completion.bash
index fdd984d34..10607cdf2 100644
--- a/contrib/completion/git-completion.bash
+++ b/contrib/completion/git-completion.bash
@@ -2642,6 +2642,7 @@ _git_config ()
sendemail.suppressfrom
sendemail.thread
sendemail.to
+   sendemail.tocmd
sendemail.validate
sendemail.smtpbatchsize
sendemail.smtprelogindelay
-- 
2.11.0



Documentation for update-index

2018-01-06 Thread Rasmus Villemoes
The man page for update-index says

   -q
   Quiet. If --refresh finds that the index needs an update,
the default behavior is to error out. This
   option makes git update-index continue anyway.

   --ignore-submodules
   Do not try to update submodules. This option is only
respected when passed before --refresh.


However, it seems that the "This option is only respected when passed
before --refresh." also applies to -q (and --unmerged); at least I get
different results from

  git update-index -q --refresh
  git update-index --refresh -q

>From the documentation, that doesn't seem to be intentional, but the
code in update-index.c seems to handle -q, --ignore-submodules,
--ignore-missing and --unmerged the same way.

Rasmus


[PATCH 1/3] grep: move grep_source_init outside critical section

2018-02-15 Thread Rasmus Villemoes
grep_source_init typically does three strdup()s, and in the threaded
case, the call from add_work() happens while holding grep_mutex.

We can thus reduce the time we hold grep_mutex by moving the
grep_source_init() call out of add_work(), and simply have add_work()
copy the initialized structure to the available slot in the todo
array.

This also simplifies the prototype of add_work(), since it no longer
needs to duplicate all the parameters of grep_source_init(). In the
callers of add_work(), we get to reduce the amount of code duplicated in
the threaded and non-threaded cases slightly (avoiding repeating the
"GREP_SOURCE_OID, pathbuf.buf, path, oid" argument list); a subsequent
cleanup patch will make that even more so.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 builtin/grep.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 3ca4ac80d..4a4f15172 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -92,8 +92,7 @@ static pthread_cond_t cond_result;
 
 static int skip_first_line;
 
-static void add_work(struct grep_opt *opt, enum grep_source_type type,
-const char *name, const char *path, const void *id)
+static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 {
grep_lock();
 
@@ -101,7 +100,7 @@ static void add_work(struct grep_opt *opt, enum 
grep_source_type type,
pthread_cond_wait(_write, _mutex);
}
 
-   grep_source_init([todo_end].source, type, name, path, id);
+   todo[todo_end].source = *gs;
if (opt->binary != GREP_BINARY_TEXT)
grep_source_load_driver([todo_end].source);
todo[todo_end].done = 0;
@@ -317,6 +316,7 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
 const char *path)
 {
struct strbuf pathbuf = STRBUF_INIT;
+   struct grep_source gs;
 
if (opt->relative && opt->prefix_length) {
quote_path_relative(filename + tree_name_len, opt->prefix, 
);
@@ -325,18 +325,18 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
strbuf_addstr(, filename);
}
 
+   grep_source_init(, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+
 #ifndef NO_PTHREADS
if (num_threads) {
-   add_work(opt, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+   add_work(opt, );
strbuf_release();
return 0;
} else
 #endif
{
-   struct grep_source gs;
int hit;
 
-   grep_source_init(, GREP_SOURCE_OID, pathbuf.buf, path, oid);
strbuf_release();
hit = grep_source(opt, );
 
@@ -348,24 +348,25 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
 static int grep_file(struct grep_opt *opt, const char *filename)
 {
struct strbuf buf = STRBUF_INIT;
+   struct grep_source gs;
 
if (opt->relative && opt->prefix_length)
quote_path_relative(filename, opt->prefix, );
else
strbuf_addstr(, filename);
 
+   grep_source_init(, GREP_SOURCE_FILE, buf.buf, filename, filename);
+
 #ifndef NO_PTHREADS
if (num_threads) {
-   add_work(opt, GREP_SOURCE_FILE, buf.buf, filename, filename);
+   add_work(opt, );
strbuf_release();
return 0;
} else
 #endif
{
-   struct grep_source gs;
int hit;
 
-   grep_source_init(, GREP_SOURCE_FILE, buf.buf, filename, 
filename);
strbuf_release();
hit = grep_source(opt, );
 
-- 
2.15.1



[PATCH 0/3] a few grep patches

2018-02-15 Thread Rasmus Villemoes
I believe the first two should be ok, but I'm not sure what I myself
think of the third one. Perhaps the saving is not worth the
complexity, but it does annoy my optimization nerve to see all the
unnecessary duplicated work being done.

Rasmus Villemoes (3):
  grep: move grep_source_init outside critical section
  grep: simplify grep_oid and grep_file
  grep: avoid one strdup() per file

 builtin/grep.c | 25 -
 grep.c |  8 ++--
 2 files changed, 18 insertions(+), 15 deletions(-)

-- 
2.15.1



[PATCH 2/3] grep: simplify grep_oid and grep_file

2018-02-15 Thread Rasmus Villemoes
In the NO_PTHREADS or !num_threads case, this doesn't change
anything. In the threaded case, note that grep_source_init duplicates
its third argument, so there is no need to keep [path]buf.buf alive
across the call of add_work().

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 builtin/grep.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 4a4f15172..593f48d59 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -326,18 +326,17 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
}
 
grep_source_init(, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+   strbuf_release();
 
 #ifndef NO_PTHREADS
if (num_threads) {
add_work(opt, );
-   strbuf_release();
return 0;
} else
 #endif
{
int hit;
 
-   strbuf_release();
hit = grep_source(opt, );
 
grep_source_clear();
@@ -356,18 +355,17 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
strbuf_addstr(, filename);
 
grep_source_init(, GREP_SOURCE_FILE, buf.buf, filename, filename);
+   strbuf_release();
 
 #ifndef NO_PTHREADS
if (num_threads) {
add_work(opt, );
-   strbuf_release();
return 0;
} else
 #endif
{
int hit;
 
-   strbuf_release();
hit = grep_source(opt, );
 
grep_source_clear();
-- 
2.15.1



[PATCH 3/3] grep: avoid one strdup() per file

2018-02-15 Thread Rasmus Villemoes
There is only one instance of grep_source_init(GREP_SOURCE_FILE), and in
that case the path and identifier arguments are equal - not just as
strings, but the same pointer is passed. So we can save some time and
memory by reusing the gs->path = xstrdup_or_null(path) we have already
done as gs->identifier, and changing grep_source_clear accordingly
to avoid a double free.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 grep.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index 3d7cd0e96..b1532b1b6 100644
--- a/grep.c
+++ b/grep.c
@@ -1972,7 +1972,8 @@ void grep_source_init(struct grep_source *gs, enum 
grep_source_type type,
 
switch (type) {
case GREP_SOURCE_FILE:
-   gs->identifier = xstrdup(identifier);
+   gs->identifier = identifier == path ?
+   gs->path : xstrdup(identifier);
break;
case GREP_SOURCE_OID:
gs->identifier = oiddup(identifier);
@@ -1986,7 +1987,10 @@ void grep_source_init(struct grep_source *gs, enum 
grep_source_type type,
 void grep_source_clear(struct grep_source *gs)
 {
FREE_AND_NULL(gs->name);
-   FREE_AND_NULL(gs->path);
+   if (gs->path == gs->identifier)
+   gs->path = NULL;
+   else
+   FREE_AND_NULL(gs->path);
FREE_AND_NULL(gs->identifier);
grep_source_clear_data(gs);
 }
-- 
2.15.1



[RFC PATCH] cherry: teach git cherry to respect pathspec

2018-02-15 Thread Rasmus Villemoes
This is a very naive attempt at teaching git cherry to restrict
attention to a given pathspec. Very much RFC, hence no documentation
update or test cases for now.

The first problem is the existing signature, with between 0 and 3
arguments. In order to preserve having defaults for limit, head,
upstream, I decided to make use of -- mandatory. The alternative seems
to be to only take a pathspec as arguments 4+, forcing the users to
write out the default values for head and/or limit (and how does one
actually write the default "no limit"?).

At first I just tried parse_pathspec directly to _data, but
that didn't seem to have any effect, and from staring at revision.c for
a while, it seems that one has to go through setup_revisions(). But that
fits well with insisting on the -- above, since we then have an argv[]
that ensures we never hit the handle_revision_arg() or
handle_revision_pseudo_opt() calls.

The motivation for this is that I'm in the process of switching a BSP
from a vendor kernel to one much closer to mainline's master branch. I
do need to apply/port some out-of-tree patches from the vendor kernel,
but it's much more managable if one can do a per-subsystem survey of
what might need porting. So I naively started by doing

  git cherry -v linus/master LSDK-17.12-V4.9 v4.9.62 -- drivers/mtd/spi-nor/

assuming that would Just Work. I was somewhat surprised that this didn't
give any output, but digging into the source revealed that (1) there
isn't actually any pathspec handling and (2) any argc other than 1..3 is
treated as 0 - something which is probably also worth fixing.

With this patch, the command above does give something reasonable, and
t3500-cherry.sh also seems to pass.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 builtin/log.c | 33 +++--
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/builtin/log.c b/builtin/log.c
index 46b4ca13e..2d4534b5c 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -1890,6 +1890,8 @@ int cmd_cherry(int argc, const char **argv, const char 
*prefix)
const char *head = "HEAD";
const char *limit = NULL;
int verbose = 0, abbrev = 0;
+   int pathspec_argc = 0;
+   int i;
 
struct option options[] = {
OPT__ABBREV(),
@@ -1897,17 +1899,27 @@ int cmd_cherry(int argc, const char **argv, const char 
*prefix)
OPT_END()
};
 
-   argc = parse_options(argc, argv, prefix, options, cherry_usage, 0);
+   argc = parse_options(argc, argv, prefix, options, cherry_usage,
+PARSE_OPT_KEEP_ARGV0 | PARSE_OPT_KEEP_UNKNOWN |
+PARSE_OPT_KEEP_DASHDASH);
+
+   for (i = 1; i < argc; ++i) {
+   if (!strcmp(argv[i], "--")) {
+   pathspec_argc = 1 + argc - i;
+   argc = i;
+   break;
+   }
+   }
 
switch (argc) {
+   case 4:
+   limit = argv[3];
+   /* FALLTHROUGH */
case 3:
-   limit = argv[2];
+   head = argv[2];
/* FALLTHROUGH */
case 2:
-   head = argv[1];
-   /* FALLTHROUGH */
-   case 1:
-   upstream = argv[0];
+   upstream = argv[1];
break;
default:
current_branch = branch_get(NULL);
@@ -1921,6 +1933,15 @@ int cmd_cherry(int argc, const char **argv, const char 
*prefix)
}
 
init_revisions(, prefix);
+   if (pathspec_argc) {
+   /*
+* hack: this reuses the element of argv before "--"
+* as argv[0] - setup_revisions assumes argv[0] is
+* present and ignores it, and starts its search for
+* "--" at argv[1].
+*/
+   setup_revisions(pathspec_argc, [argc-1], , NULL);
+   }
revs.max_parents = 1;
 
if (add_pending_commit(head, , 0))
-- 
2.15.1



[PATCH v2 0/2] two small grep patches

2018-02-23 Thread Rasmus Villemoes
Changes in v2:

- Drop patch 3 with dubious gain/complexity ratio
- Add comments regarding ownership of grep_source

I was a little torn between copy-pasting the comment or just saying
"see above" in the second case. I think a memset would be confusing,
at least unless one extends the comment to explain why one then does
the memset despite the first half of the comment.

Rasmus Villemoes (2):
  grep: move grep_source_init outside critical section
  grep: simplify grep_oid and grep_file

 builtin/grep.c | 33 -
 1 file changed, 20 insertions(+), 13 deletions(-)

-- 
2.15.1



[PATCH v2 1/2] grep: move grep_source_init outside critical section

2018-02-23 Thread Rasmus Villemoes
grep_source_init typically does three strdup()s, and in the threaded
case, the call from add_work() happens while holding grep_mutex.

We can thus reduce the time we hold grep_mutex by moving the
grep_source_init() call out of add_work(), and simply have add_work()
copy the initialized structure to the available slot in the todo
array.

This also simplifies the prototype of add_work(), since it no longer
needs to duplicate all the parameters of grep_source_init(). In the
callers of add_work(), we get to reduce the amount of code duplicated in
the threaded and non-threaded cases slightly (avoiding repeating the
long "GREP_SOURCE_OID, pathbuf.buf, path, oid" argument list); a
subsequent cleanup patch will make that even more so.

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 builtin/grep.c | 27 ++-
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 3ca4ac80d..aad422bb6 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -92,8 +92,7 @@ static pthread_cond_t cond_result;
 
 static int skip_first_line;
 
-static void add_work(struct grep_opt *opt, enum grep_source_type type,
-const char *name, const char *path, const void *id)
+static void add_work(struct grep_opt *opt, const struct grep_source *gs)
 {
grep_lock();
 
@@ -101,7 +100,7 @@ static void add_work(struct grep_opt *opt, enum 
grep_source_type type,
pthread_cond_wait(_write, _mutex);
}
 
-   grep_source_init([todo_end].source, type, name, path, id);
+   todo[todo_end].source = *gs;
if (opt->binary != GREP_BINARY_TEXT)
grep_source_load_driver([todo_end].source);
todo[todo_end].done = 0;
@@ -317,6 +316,7 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
 const char *path)
 {
struct strbuf pathbuf = STRBUF_INIT;
+   struct grep_source gs;
 
if (opt->relative && opt->prefix_length) {
quote_path_relative(filename + tree_name_len, opt->prefix, 
);
@@ -325,18 +325,22 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
strbuf_addstr(, filename);
}
 
+   grep_source_init(, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+
 #ifndef NO_PTHREADS
if (num_threads) {
-   add_work(opt, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+   /*
+* add_work() copies gs and thus assumes ownership of
+* its fields, so do not call grep_source_clear()
+*/
+   add_work(opt, );
strbuf_release();
return 0;
} else
 #endif
{
-   struct grep_source gs;
int hit;
 
-   grep_source_init(, GREP_SOURCE_OID, pathbuf.buf, path, oid);
strbuf_release();
hit = grep_source(opt, );
 
@@ -348,24 +352,29 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
 static int grep_file(struct grep_opt *opt, const char *filename)
 {
struct strbuf buf = STRBUF_INIT;
+   struct grep_source gs;
 
if (opt->relative && opt->prefix_length)
quote_path_relative(filename, opt->prefix, );
else
strbuf_addstr(, filename);
 
+   grep_source_init(, GREP_SOURCE_FILE, buf.buf, filename, filename);
+
 #ifndef NO_PTHREADS
if (num_threads) {
-   add_work(opt, GREP_SOURCE_FILE, buf.buf, filename, filename);
+   /*
+* add_work() copies gs and thus assumes ownership of
+* its fields, so do not call grep_source_clear()
+*/
+   add_work(opt, );
strbuf_release();
return 0;
} else
 #endif
{
-   struct grep_source gs;
int hit;
 
-   grep_source_init(, GREP_SOURCE_FILE, buf.buf, filename, 
filename);
strbuf_release();
hit = grep_source(opt, );
 
-- 
2.15.1



[PATCH v2 2/2] grep: simplify grep_oid and grep_file

2018-02-23 Thread Rasmus Villemoes
In the NO_PTHREADS or !num_threads case, this doesn't change
anything. In the threaded case, note that grep_source_init duplicates
its third argument, so there is no need to keep [path]buf.buf alive
across the call of add_work().

Signed-off-by: Rasmus Villemoes <r...@rasmusvillemoes.dk>
---
 builtin/grep.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index aad422bb6..9a8e4fada 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -326,6 +326,7 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
}
 
grep_source_init(, GREP_SOURCE_OID, pathbuf.buf, path, oid);
+   strbuf_release();
 
 #ifndef NO_PTHREADS
if (num_threads) {
@@ -334,14 +335,12 @@ static int grep_oid(struct grep_opt *opt, const struct 
object_id *oid,
 * its fields, so do not call grep_source_clear()
 */
add_work(opt, );
-   strbuf_release();
return 0;
} else
 #endif
{
int hit;
 
-   strbuf_release();
hit = grep_source(opt, );
 
grep_source_clear();
@@ -360,6 +359,7 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
strbuf_addstr(, filename);
 
grep_source_init(, GREP_SOURCE_FILE, buf.buf, filename, filename);
+   strbuf_release();
 
 #ifndef NO_PTHREADS
if (num_threads) {
@@ -368,14 +368,12 @@ static int grep_file(struct grep_opt *opt, const char 
*filename)
 * its fields, so do not call grep_source_clear()
 */
add_work(opt, );
-   strbuf_release();
return 0;
} else
 #endif
{
int hit;
 
-   strbuf_release();
hit = grep_source(opt, );
 
grep_source_clear();
-- 
2.15.1



Re: feature request: allow commit.email config setting

2018-08-30 Thread Rasmus Villemoes
On 2018-08-30 20:13, Eric Sunshine wrote:
> On Thu, Aug 30, 2018 at 7:26 AM Rasmus Villemoes  
> wrote:
>> I can set GIT_COMMITTER_EMAIL in the environment, but that is
>> rather inconvenient, since that means I have to remember to do that in
>> the shell I'm using for that particular project, and I can't use that
>> shell for other projects. So it would be really nice if I could set
>> commit.email = $private-email in the local .git/config for that
>> particular project.
> 
> Aside from modifying Git itself to support such a use-case, another
> (perhaps more pragmatic) approach would be to use a tool, such as
> direnv[1], which automatically sets environment variables for you
> depending upon your current working directory, or just use some ad-hoc
> shell programming to achieve the same (for instance, [2]).

Thanks for the hint! I've actually had "git" as a function in my .bashrc
for a long time, for implementing a ~/.githistory to help remember the
sometimes rather complex git invocations, and keeping track of the
context ($cwd, current branch, etc.) they were used in. It should be
trivial to hook the environment settings based on $cwd into that. The
only problem is that that gives me much less incentive to work on
implementing the config support in git, but if I'm the only one with a
use case, that's probably just as well.

Rasmus



feature request: allow commit.email config setting

2018-08-30 Thread Rasmus Villemoes
As part of my dayjob, I did and still do some work on an upstream
project. A while ago, I was granted commit access to that project.
However, upstream asked that I would register with their system using a
private email, or at least one that wouldn't change if I changed jobs,
rather than my work email.

Now, I (and my employer) would like that the work I do as part of my
current job on that project has my work email in the Author field, but
since the commit access was granted to me privately/personally, it would
be nice if the Committer email was the one I used to register with their
system. I can set GIT_COMMITTER_EMAIL in the environment, but that is
rather inconvenient, since that means I have to remember to do that in
the shell I'm using for that particular project, and I can't use that
shell for other projects. So it would be really nice if I could set
commit.email = $private-email in the local .git/config for that
particular project. I don't personally have a use for commit.name (when
missing, that should just use user.name as usual), but it would probably
be most consistent to allow that too.

I tried looking into ident.c, but it doesn't seem like it is
straight-forward to implement. Probably fmt_ident, ident_default_email
etc. would need to be passed information about what purpose the ident is
to be used for. So before trying to implement this, I want to hear if
this is a reasonable thing to support. Also, I'm sure there are some
subtle semantics that would need to be decided and gotchas to watch out for.

Rasmus


Re: [RFC PATCH] cherry: teach git cherry to respect pathspec

2018-03-10 Thread Rasmus Villemoes
On 16 February 2018 at 02:15, Rasmus Villemoes <r...@rasmusvillemoes.dk> wrote:
> This is a very naive attempt at teaching git cherry to restrict
> attention to a given pathspec. Very much RFC, hence no documentation
> update or test cases for now.
>

Ping, any comments on this?


[PATCH v2 3/3] send-email: also pick up cc addresses from -by trailers

2018-10-16 Thread Rasmus Villemoes
When rerolling a patch series, including various Reviewed-by etc. that
may have come in, it is quite convenient to have git-send-email
automatically cc those people.

So pick up any *-by lines, with a new suppression category 'misc-by',
but special-case Signed-off-by, since that already has its own
suppression category. It seems natural to make 'misc-by' implied by
'body'.

Based-on-patch-by: Joe Perches 
Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-send-email.txt |  5 -
 git-send-email.perl  | 19 ---
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index ea6ea512fe..f6010ac68b 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -329,8 +329,11 @@ Automating
   patch body (commit message) except for self (use 'self' for that).
 - 'sob' will avoid including anyone mentioned in Signed-off-by lines except
   for self (use 'self' for that).
+- 'misc-by' will avoid including anyone mentioned in Acked-by,
+  Reviewed-by, Tested-by and other "-by" lines in the patch body,
+  except Signed-off-by (use 'sob' for that).
 - 'cccmd' will avoid running the --cc-cmd.
-- 'body' is equivalent to 'sob' + 'bodycc'.
+- 'body' is equivalent to 'sob' + 'bodycc' + 'misc-by'.
 - 'all' will suppress all auto cc values.
 --
 +
diff --git a/git-send-email.perl b/git-send-email.perl
index 1916159d2a..58c6aa9d0e 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -94,7 +94,7 @@ sub usage {
 --identity* Use the sendemail. options.
 --to-cmd  * Email To: via ` \$patch_path`
 --cc-cmd  * Email Cc: via ` \$patch_path`
---suppress-cc * author, self, sob, cc, cccmd, body, 
bodycc, all.
+--suppress-cc * author, self, sob, cc, cccmd, body, 
bodycc, misc-by, all.
 --[no-]cc-cover* Email Cc: addresses in the cover letter.
 --[no-]to-cover* Email To: addresses in the cover letter.
 --[no-]signed-off-by-cc* Send to Signed-off-by: addresses. Default 
on.
@@ -454,13 +454,13 @@ sub read_config {
 if (@suppress_cc) {
foreach my $entry (@suppress_cc) {
die sprintf(__("Unknown --suppress-cc field: '%s'\n"), $entry)
-   unless $entry =~ 
/^(?:all|cccmd|cc|author|self|sob|body|bodycc)$/;
+   unless $entry =~ 
/^(?:all|cccmd|cc|author|self|sob|body|bodycc|misc-by)$/;
$suppress_cc{$entry} = 1;
}
 }
 
 if ($suppress_cc{'all'}) {
-   foreach my $entry (qw (cccmd cc author self sob body bodycc)) {
+   foreach my $entry (qw (cccmd cc author self sob body bodycc misc-by)) {
$suppress_cc{$entry} = 1;
}
delete $suppress_cc{'all'};
@@ -471,7 +471,7 @@ sub read_config {
 $suppress_cc{'sob'} = !$signed_off_by_cc if defined $signed_off_by_cc;
 
 if ($suppress_cc{'body'}) {
-   foreach my $entry (qw (sob bodycc)) {
+   foreach my $entry (qw (sob bodycc misc-by)) {
$suppress_cc{$entry} = 1;
}
delete $suppress_cc{'body'};
@@ -1681,7 +1681,7 @@ sub process_file {
# Now parse the message body
while(<$fh>) {
$message .=  $_;
-   if (/^(Signed-off-by|Cc): (.*)/i) {
+   if (/^([a-z-]*-by|Cc): (.*)/i) {
chomp;
my ($what, $c) = ($1, $2);
# strip garbage for the address we'll use:
@@ -1691,8 +1691,13 @@ sub process_file {
if ($sc eq $sender) {
next if ($suppress_cc{'self'});
} else {
-   next if $suppress_cc{'sob'} and $what =~ 
/Signed-off-by/i;
-   next if $suppress_cc{'bodycc'} and $what =~ 
/Cc/i;
+   if ($what =~ /^Signed-off-by$/i) {
+   next if $suppress_cc{'sob'};
+   } elsif ($what =~ /-by$/i) {
+   next if $suppress_cc{'misc-by'};
+   } elsif ($what =~ /Cc/i) {
+   next if $suppress_cc{'bodycc'};
+   }
}
if ($c !~ /.+@.+|<.+>/) {
printf("(body) Ignoring %s from line '%s'\n",
-- 
2.19.1.6.gbde171bbf5



[PATCH v2 2/3] send-email: only consider lines containing @ or <> for automatic Cc'ing

2018-10-16 Thread Rasmus Villemoes
While the address sanitizations routines do accept local addresses, that
is almost never what is meant in a Cc or Signed-off-by trailer.

Looking through all the signed-off-by lines in the linux kernel tree
without a @, there are mostly two patterns: Either just a full name, or
a full name followed by  (i.e., with the word at
instead of a @), and minor variations. For cc lines, the same patterns
appear, along with lots of "cc stable" variations that do not actually
name sta...@vger.kernel.org

  Cc: stable # introduced pre-git times
  cc: stable.kernel.org

In the  cases, one gets a chance to interactively
fix it. But when there is no <> pair, it seems we end up just using the
first word as a (local) address.

As the number of cases where a local address really was meant is
likely (and anecdotally) quite small compared to the number of cases
where we end up cc'ing a garbage address, insist on at least a @ or a <>
pair being present.

This is also preparation for the next patch, where we are likely to
encounter even more non-addresses in -by lines, such as

  Reported-by: Coverity
  Patch-generated-by: Coccinelle

Signed-off-by: Rasmus Villemoes 
---
 git-send-email.perl | 5 +
 1 file changed, 5 insertions(+)

diff --git a/git-send-email.perl b/git-send-email.perl
index 2be5dac337..1916159d2a 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -1694,6 +1694,11 @@ sub process_file {
next if $suppress_cc{'sob'} and $what =~ 
/Signed-off-by/i;
next if $suppress_cc{'bodycc'} and $what =~ 
/Cc/i;
}
+   if ($c !~ /.+@.+|<.+>/) {
+   printf("(body) Ignoring %s from line '%s'\n",
+   $what, $_) unless $quiet;
+   next;
+   }
push @cc, $c;
printf(__("(body) Adding cc: %s from line '%s'\n"),
$c, $_) unless $quiet;
-- 
2.19.1.6.gbde171bbf5



[PATCH v2 0/3] send-email: Also pick up cc addresses from -by trailers

2018-10-16 Thread Rasmus Villemoes
This series extends the logic in git-send-email so that addresses
appearing in various *-by: trailers (e.g. Reviewed-by, Acked-by,
Tested-by) are picked up and added to the Cc list, in addition to the
current logic that picks up Cc: and Signed-off-by: lines.

This deliberately only picks up -by trailers (as opposed to any
trailer), based on the heuristic that the -by suffix strongly suggests
there's a (name +) email address after the colon. This avoids having
to deal with BugID:, Link:, or other such tags. Still, widening to any
-by trailer does increase the risk that we will pick up stuff that is
not an email address, such as

  Reported-by: Coverity
  Patch-generated-by: Coccinelle

where send-email then ends up cc'ing the local 'coverity' user. Patch
2 tries to weed out the obvious no-email-address-here cases, which
should also help avoid cc'ing garbage (local) addresses for malformed
cc and signed-off-by lines. Patch 3 is then mostly mechanical,
introducing the misc-by suppression category and changing the regexp
for matching trailer lines to include .*-by.

Changes in v2: Rework logic in patch 3 as suggested by Junio.

v1 cover letter:

This has been attempted multiple times before, but I hope that this
can make it in this time around. That *-by addresses are not
automatically Cc'ed certainly still surprises people from time to
time.

I hope that this addresses all the concerns Junio had in
https://lkml.org/lkml/2016/8/31/768 .

For the name, I chose 'misc-by', since that has -by in its name. I am
fine with absolutely any other name (bodyby, body-by, by-trailers,
...). I doubt we can find a short token that is completely
self-explanatory, and note that one has to look in the man page anyway
to know what 'sob' means in this line from 'git send-email -h':

--suppress-cc * author, self, sob, cc, cccmd, body, 
bodycc, all.


Rasmus Villemoes (3):
  Documentation/git-send-email.txt: style fixes
  send-email: only consider lines containing @ or <> for automatic
Cc'ing
  send-email: also pick up cc addresses from -by trailers

 Documentation/git-send-email.txt | 11 +++
 git-send-email.perl  | 24 +---
 2 files changed, 24 insertions(+), 11 deletions(-)

-- 
2.19.1.6.gbde171bbf5



[PATCH v2 1/3] Documentation/git-send-email.txt: style fixes

2018-10-16 Thread Rasmus Villemoes
For consistency, add full stops in a few places and outdent a line by
one space.

Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-send-email.txt | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index 465a4ecbed..ea6ea512fe 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -321,16 +321,16 @@ Automating
auto-cc of:
 +
 --
-- 'author' will avoid including the patch author
-- 'self' will avoid including the sender
+- 'author' will avoid including the patch author.
+- 'self' will avoid including the sender.
 - 'cc' will avoid including anyone mentioned in Cc lines in the patch header
   except for self (use 'self' for that).
 - 'bodycc' will avoid including anyone mentioned in Cc lines in the
   patch body (commit message) except for self (use 'self' for that).
 - 'sob' will avoid including anyone mentioned in Signed-off-by lines except
-   for self (use 'self' for that).
+  for self (use 'self' for that).
 - 'cccmd' will avoid running the --cc-cmd.
-- 'body' is equivalent to 'sob' + 'bodycc'
+- 'body' is equivalent to 'sob' + 'bodycc'.
 - 'all' will suppress all auto cc values.
 --
 +
-- 
2.19.1.6.gbde171bbf5



Re: [PATCH 3/3] send-email: also pick up cc addresses from -by trailers

2018-10-16 Thread Rasmus Villemoes
On 2018-10-16 07:57, Junio C Hamano wrote:
> Rasmus Villemoes  writes:
> 
>>> I wonder if it would make it easier to grok if we made the logic
>>> inside out, i.e.
>>>
>>> if ($sc eq $sender) {
>>> ...
>>> } else {
>>> if ($what =~ /^Signed-off-by$/i) {
>>> next if $suppress_cc{'sob'};
>>> } elsif ($what =~ /-by$/i) {
>>> next if $suppress_cc{'misc'};
>>> } elsif ($what =~ /^Cc$/i) {
>>> next if $suppress_cc{'bodycc'};>}
>>
>> ...yes, that's probably more readable.
> 
> OK, unless there is more comments and suggestions for improvements,
> can you send in a final version sometime not in so distant future so
> that we won't forget?

Will do, I was just waiting for more comments to come in.

 It may be surprising to existing users that
> the command now suddenly adds more addresses the user did not think
> would be added, but it would probably be easy enough for them to
> work around. 

Yeah, I thought about that, but unfortunately the whole auto-cc business
is not built around some config options where we can add a new and
default false. Also note that there are also cases currently where the
user sends off a patch series and is surprised that lots of intended
recipients were not cc'ed (that's how I picked this subject up again; I
had a long series where I had put specific Cc's in each patch, at v2,
some of those had given a Reviewed-by, so I changed the tags, and a
--dry-run told me they wouldn't get the new version).

I suppose one could make use of -by addresses dependent on a new opt-in
config option, but that's not very elegant. Another option is, for a
release or two, to make a little (more) noise when picking up a -by
address - something like setting a flag in the ($what =~ /-by/) branch,
and testing that flag somewhere in send_message(). But I suppose the
message printed when needs_confirm eq "inform" is generic enough.

Rasmus


[PATCH v3 2/3] git.c: handle_alias: prepend alias info when first argument is -h

2018-10-03 Thread Rasmus Villemoes
Most git commands respond to -h anywhere in the command line, or at
least as a first and lone argument, by printing the usage
information. For aliases, we can provide a little more information that
might be useful in interpreting/understanding the following output by
prepending a line telling that the command is an alias, and for what.

When one invokes a simple alias, such as "cp = cherry-pick"
with -h, this results in

$ git cp -h
'cp' is aliased to 'cherry-pick'
usage: git cherry-pick [] ...
...

When the alias consists of more than one word, this provides the
additional benefit of informing the user which options are implicit in
using the alias, e.g. with "cp = cherry-pick -n":

$ git cp -h
'cp' is aliased to 'cherry-pick -n'
usage: git cherry-pick [] ...
...

For shell commands, we cannot know how it responds to -h, but printing
this line to stderr should not hurt, and can help in figuring out what
is happening in a case like

$ git sc -h
'sc' is aliased to '!somecommand'
somecommand: invalid option '-h'

Suggested-by: Jeff King 
Signed-off-by: Rasmus Villemoes 
---
 git.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/git.c b/git.c
index a6f4b44af5..0211c2d4c0 100644
--- a/git.c
+++ b/git.c
@@ -318,6 +318,9 @@ static int handle_alias(int *argcp, const char ***argv)
alias_command = (*argv)[0];
alias_string = alias_lookup(alias_command);
if (alias_string) {
+   if (*argcp > 1 && !strcmp((*argv)[1], "-h"))
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"),
+  alias_command, alias_string);
if (alias_string[0] == '!') {
struct child_process child = CHILD_PROCESS_INIT;
int nongit_ok;
-- 
2.19.0



[PATCH v3 0/3] alias help tweaks

2018-10-03 Thread Rasmus Villemoes
v2: Added patches 2 and 3, made "git cmd --help" unconditionally (no
config option, no delay) redirect to the aliased command's help,
preserve pre-existing behaviour of the spelling "git help cmd".

v3: Add some additional comments in patch 1 and avoid triggering leak
checker reports. Use better wording in patch 3.

Rasmus Villemoes (3):
  help: redirect to aliased commands for "git cmd --help"
  git.c: handle_alias: prepend alias info when first argument is -h
  git-help.txt: document "git help cmd" vs "git cmd --help" for aliases

 Documentation/git-help.txt |  4 
 builtin/help.c | 34 +++---
 git.c  |  3 +++
 3 files changed, 38 insertions(+), 3 deletions(-)

-- 
2.19.0



[PATCH v3 3/3] git-help.txt: document "git help cmd" vs "git cmd --help" for aliases

2018-10-03 Thread Rasmus Villemoes
This documents the existing behaviour of "git help cmd" when cmd is an
alias, as well as providing a hint to use the "git cmd --help" form to
be taken directly to the man page for the aliased command.

Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-help.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/git-help.txt b/Documentation/git-help.txt
index 83d25d825a..86a6b42345 100644
--- a/Documentation/git-help.txt
+++ b/Documentation/git-help.txt
@@ -29,6 +29,10 @@ guide is brought up. The 'man' program is used by default 
for this
 purpose, but this can be overridden by other options or configuration
 variables.
 
+If an alias is given, git shows the definition of the alias on
+standard output. To get the manual page for the aliased command, use
+`git COMMAND --help`.
+
 Note that `git --help ...` is identical to `git help ...` because the
 former is internally converted into the latter.
 
-- 
2.19.0



[PATCH v3 1/3] help: redirect to aliased commands for "git cmd --help"

2018-10-03 Thread Rasmus Villemoes
As discussed in the thread for v1 of this patch [1] [2], this changes the
rules for "git foo --help" when foo is an alias.

(0) When invoked as "git help foo", we continue to print the "foo is
aliased to bar" message and nothing else.

(1) If foo is an alias for a shell command, print "foo is aliased to
!bar" as usual.

(2) Otherwise, break the alias string into words, and pretend that "git
word0 --help" was called.

At least for me, getting the man page for git-cherry-pick directly with
"git cp --help" is more useful (and how I expect an alias to behave)
than the short "is aliased to" notice. It is also consistent with
"--help" generally providing more comprehensive help than "-h".

I believe that printing the "is aliased to" message also in case (2) has
value: Depending on pager setup, or if the user has help.format=web, the
message is still present immediately above the prompt when the user
quits the pager/returns to the terminal. That serves as an explanation
for why one was redirected to "man git-cherry-pick" from "git cp
--help", and if cp is actually 'cherry-pick -n', it reminds the user
that using cp has some flag implicitly set before firing off the next
command.

It also provides some useful info in case we end up erroring out, either
in the "bad alias string" check, or in the "No manual entry for gitbar"
case.

[1] https://public-inbox.org/git/20180926102636.30691-1...@rasmusvillemoes.dk/
[2] https://public-inbox.org/git/20180926184914.gc30...@sigill.intra.peff.net/

Signed-off-by: Rasmus Villemoes 
---
 builtin/help.c | 34 +++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/builtin/help.c b/builtin/help.c
index 8d4f6dd301..e0e3fe62e9 100644
--- a/builtin/help.c
+++ b/builtin/help.c
@@ -415,9 +415,37 @@ static const char *check_git_cmd(const char* cmd)
 
alias = alias_lookup(cmd);
if (alias) {
-   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
-   free(alias);
-   exit(0);
+   const char **argv;
+   int count;
+
+   /*
+* handle_builtin() in git.c rewrites "git cmd --help"
+* to "git help --exclude-guides cmd", so we can use
+* exclude_guides to distinguish "git cmd --help" from
+* "git help cmd". In the latter case, or if cmd is an
+* alias for a shell command, just print the alias
+* definition.
+*/
+   if (!exclude_guides || alias[0] == '!') {
+   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
+   free(alias);
+   exit(0);
+   }
+   /*
+* Otherwise, we pretend that the command was "git
+* word0 --help". We use split_cmdline() to get the
+* first word of the alias, to ensure that we use the
+* same rules as when the alias is actually
+* used. split_cmdline() modifies alias in-place.
+*/
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"), cmd, alias);
+   count = split_cmdline(alias, );
+   if (count < 0)
+   die(_("bad alias.%s string: %s"), cmd,
+   split_cmdline_strerror(count));
+   free(argv);
+   UNLEAK(alias);
+   return alias;
}
 
if (exclude_guides)
-- 
2.19.0



Re: [PATCH v3 1/3] help: redirect to aliased commands for "git cmd --help"

2018-10-05 Thread Rasmus Villemoes
On 2018-10-05 10:19, Junio C Hamano wrote:
> Rasmus Villemoes  writes:
> 
>>
>> I believe that printing the "is aliased to" message also in case (2) has
>> value: Depending on pager setup, or if the user has help.format=web, the
>> message is still present immediately above the prompt when the user
>> quits the pager/returns to the terminal. That serves as an explanation
>> for why one was redirected to "man git-cherry-pick" from "git cp
>> --help", and if cp is actually 'cherry-pick -n', it reminds the user
>> that using cp has some flag implicitly set before firing off the next
>> command.
>>
>> It also provides some useful info in case we end up erroring out, either
>> in the "bad alias string" check, or in the "No manual entry for gitbar"
>> case.
> 
> These two paragraphs were misleading, because they sounded as if you
> were lamenting that you were somehow forbidden from doing so even
> though you believe doing it is the right thing.
> 
> But that is not what is happening.  I think we should update the (2)
> above to mention what you actually do in the code, perhaps like so:

Yes, what I wrote was probably better placed below ---.

> and hopefully remain visible when help.format=web is used,
>"git bar --help" errors out, or the manpage of "git bar" is
>short enough. It may not help if the help shows manpage on

or, as in my case, the pager does not clear the terminal. I even think
that's the default behaviour (due to X in $LESS) - at least, I don't
have any magic in the environment or .gitconfig to achieve this. So it's
not visible while the man page is shown in the pager, but upon exit from
the pager.

> It also is strange to count from (0); if the patchset is rerolled
> again, I'd prefer to see these start counting from (1), in which
> case this item will become (3).

If you prefer, I can send a v4.

Rasmus


[PATCH v4 0/3] alias help tweaks

2018-10-09 Thread Rasmus Villemoes
v2: Added patches 2 and 3, made "git cmd --help" unconditionally (no
config option, no delay) redirect to the aliased command's help,
preserve pre-existing behaviour of the spelling "git help cmd".

v3: Add some additional comments in patch 1 and avoid triggering leak
checker reports. Use better wording in patch 3.

v4: Reword commit log in patch 1.

Rasmus Villemoes (3):
  help: redirect to aliased commands for "git cmd --help"
  git.c: handle_alias: prepend alias info when first argument is -h
  git-help.txt: document "git help cmd" vs "git cmd --help" for aliases

 Documentation/git-help.txt |  4 
 builtin/help.c | 34 +++---
 git.c  |  3 +++
 3 files changed, 38 insertions(+), 3 deletions(-)

-- 
2.19.1.4.g721af0fda3



[PATCH v4 1/3] help: redirect to aliased commands for "git cmd --help"

2018-10-09 Thread Rasmus Villemoes
As discussed in the thread for v1 of this patch [1] [2], this changes the
rules for "git foo --help" when foo is an alias.

(1) When invoked as "git help foo", we continue to print the "foo is
aliased to bar" message and nothing else.

(2) If foo is an alias for a shell command, print "foo is aliased to
!bar" as usual.

(3) Otherwise, print "foo is aliased to bar" to the standard error
stream, and then break the alias string into words and pretend as if
"git word[0] --help" were called.

Getting the man page for git-cherry-pick directly with "git cp --help"
is consistent with "--help" generally providing more comprehensive help
than "-h". Printing the alias definition to stderr means that in certain
cases (e.g. if help.format=web or if the pager uses an alternate screen
and does not clear the terminal), one has

'cp' is aliased to 'cherry-pick -n'

above the prompt when one returns to the terminal/quits the pager, which
is a useful reminder that using 'cp' has some flag implicitly set. There
are cases where this information vanishes or gets scrolled
away, but being printed to stderr, it should never hurt.

[1] https://public-inbox.org/git/20180926102636.30691-1...@rasmusvillemoes.dk/
[2] https://public-inbox.org/git/20180926184914.gc30...@sigill.intra.peff.net/

Signed-off-by: Rasmus Villemoes 
---
 builtin/help.c | 34 +++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/builtin/help.c b/builtin/help.c
index 8d4f6dd301..e0e3fe62e9 100644
--- a/builtin/help.c
+++ b/builtin/help.c
@@ -415,9 +415,37 @@ static const char *check_git_cmd(const char* cmd)
 
alias = alias_lookup(cmd);
if (alias) {
-   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
-   free(alias);
-   exit(0);
+   const char **argv;
+   int count;
+
+   /*
+* handle_builtin() in git.c rewrites "git cmd --help"
+* to "git help --exclude-guides cmd", so we can use
+* exclude_guides to distinguish "git cmd --help" from
+* "git help cmd". In the latter case, or if cmd is an
+* alias for a shell command, just print the alias
+* definition.
+*/
+   if (!exclude_guides || alias[0] == '!') {
+   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
+   free(alias);
+   exit(0);
+   }
+   /*
+* Otherwise, we pretend that the command was "git
+* word0 --help". We use split_cmdline() to get the
+* first word of the alias, to ensure that we use the
+* same rules as when the alias is actually
+* used. split_cmdline() modifies alias in-place.
+*/
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"), cmd, alias);
+   count = split_cmdline(alias, );
+   if (count < 0)
+   die(_("bad alias.%s string: %s"), cmd,
+   split_cmdline_strerror(count));
+   free(argv);
+   UNLEAK(alias);
+   return alias;
}
 
if (exclude_guides)
-- 
2.19.1.4.g721af0fda3



[PATCH v4 2/3] git.c: handle_alias: prepend alias info when first argument is -h

2018-10-09 Thread Rasmus Villemoes
Most git commands respond to -h anywhere in the command line, or at
least as a first and lone argument, by printing the usage
information. For aliases, we can provide a little more information that
might be useful in interpreting/understanding the following output by
prepending a line telling that the command is an alias, and for what.

When one invokes a simple alias, such as "cp = cherry-pick"
with -h, this results in

$ git cp -h
'cp' is aliased to 'cherry-pick'
usage: git cherry-pick [] ...
...

When the alias consists of more than one word, this provides the
additional benefit of informing the user which options are implicit in
using the alias, e.g. with "cp = cherry-pick -n":

$ git cp -h
'cp' is aliased to 'cherry-pick -n'
usage: git cherry-pick [] ...
...

For shell commands, we cannot know how it responds to -h, but printing
this line to stderr should not hurt, and can help in figuring out what
is happening in a case like

$ git sc -h
'sc' is aliased to '!somecommand'
somecommand: invalid option '-h'

Suggested-by: Jeff King 
Signed-off-by: Rasmus Villemoes 
---
 git.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/git.c b/git.c
index a6f4b44af5..0211c2d4c0 100644
--- a/git.c
+++ b/git.c
@@ -318,6 +318,9 @@ static int handle_alias(int *argcp, const char ***argv)
alias_command = (*argv)[0];
alias_string = alias_lookup(alias_command);
if (alias_string) {
+   if (*argcp > 1 && !strcmp((*argv)[1], "-h"))
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"),
+  alias_command, alias_string);
if (alias_string[0] == '!') {
struct child_process child = CHILD_PROCESS_INIT;
int nongit_ok;
-- 
2.19.1.4.g721af0fda3



[PATCH v4 3/3] git-help.txt: document "git help cmd" vs "git cmd --help" for aliases

2018-10-09 Thread Rasmus Villemoes
This documents the existing behaviour of "git help cmd" when cmd is an
alias, as well as providing a hint to use the "git cmd --help" form to
be taken directly to the man page for the aliased command.

Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-help.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/git-help.txt b/Documentation/git-help.txt
index 83d25d825a..86a6b42345 100644
--- a/Documentation/git-help.txt
+++ b/Documentation/git-help.txt
@@ -29,6 +29,10 @@ guide is brought up. The 'man' program is used by default 
for this
 purpose, but this can be overridden by other options or configuration
 variables.
 
+If an alias is given, git shows the definition of the alias on
+standard output. To get the manual page for the aliased command, use
+`git COMMAND --help`.
+
 Note that `git --help ...` is identical to `git help ...` because the
 former is internally converted into the latter.
 
-- 
2.19.1.4.g721af0fda3



Re: [PATCH v2 1/3] help: redirect to aliased commands for "git cmd --help"

2018-10-03 Thread Rasmus Villemoes
On 2018-10-03 04:13, Jeff King wrote:
>> +/*
>> + * If we were invoked as "git help cmd", or cmd is an
>> + * alias for a shell command, we inform the user what
>> + * cmd is an alias for and do nothing else.
>> + */
>> +if (!exclude_guides || alias[0] == '!') {
>> +printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
>> +free(alias);
>> +exit(0);
>> +}
> 
> I'm not sure I understand why exclude_guides is relevant. We check it
> below when we know that we _don't_ have an alias. Hrm. I guess you're
> using it here as a proxy for "git foo --help" being used instead of "git
> help foo".

Exactly. Perhaps it's abusing the existing machinery, but I didn't know
how else to distinguish the two cases, and didn't feel like introducing
another way of passing on the exact same information.

> The comment probably needs to spell out that exclude_guides
> is the same as your "we were invoked as...".

Will do. That will also make the string --exclude-guides (i.e., with a
dash) appear in the comment, making it more likely to be found should
anyone change when and how --exclude-guides is implied.

> I wonder if we could change the name of that option. It is an
> undocumented, hidden option that we use internally, so it should be OK
> to do so (or we could always add another one). That might prevent
> somebody in the future from using --exclude-guides in more places and
> breaking your assumption here.

Perhaps, but I think that's better left for a separate patch, if really
necessary even with the expanded comment.

>> +count = split_cmdline(alias, );
>> +if (count < 0)
>> +die(_("bad alias.%s string: %s"), cmd,
>> +split_cmdline_strerror(count));
>> +return alias;
> 
> So we split only to find argv[0] here. But then we don't return it. That
> works because the split is done in place, meaning we must have inserted
> a NUL in alias. That's sufficiently subtle that it might be worth
> spelling it out in a comment.

OK, I actually had precisely

+   /*
+* We use split_cmdline() to get the first word of the
+* alias, to ensure that we use the same rules as when
+* the alias is actually used. split_cmdline()
+* modifies alias in-place.
+*/

in v1, but thought it might be overly verbose. I'll put it back in.

> We don't need to free alias here as we do above, because we're passing
> it back. We should free argv, though, I think (not its elements, just
> the array itself).

Yeah, I thought about this, and removing free(argv) was the last thing I
did before sending v1 - because we were going to leak alias anyway. I'm
happy to put it back in, along with...

> Unfortunately the caller is going to leak our returned "alias", [...] I think 
> it may be OK to overlook
> that and just UNLEAK() it in cmd_help().

...this. Except I'd rather do the UNLEAK in check_git_cmd (the
documentation does say "only from cmd_* functions or their direct
helpers") to make it a more targeted annotation.

Thanks,
Rasmus


Re: [PATCH v2 3/3] git-help.txt: document "git help cmd" vs "git cmd --help" for aliases

2018-10-03 Thread Rasmus Villemoes
On 2018-10-03 04:18, Jeff King wrote:
> On Mon, Oct 01, 2018 at 01:21:07PM +0200, Rasmus Villemoes wrote:
> 
>>  
>> +If an alias is given, git prints a note explaining what it is an alias
>> +for on standard output. To get the manual page for the aliased
>> +command, use `git COMMAND --help`.
> 
> Funny English: "what it is an...". Maybe:
> 
>   If an alias is given, git shows the definition of the alias on
>   standard output. To get the manual page...

Much better, thanks.

Rasmus


Re: [PATCH] help: allow redirecting to help for aliased command

2018-09-28 Thread Rasmus Villemoes
On 2018-09-26 20:12, Taylor Blau wrote:
> 
> In the case where you are scripting (and want to know what 'git co'
> means for programmatic usage), I think that there are two options. One,
> which you note above, is the 'git -c help.followAlias=false ...'
> approach, which I don't think is so bad for callers, given the tradeoff.
> 
> Another way to go is 'git config alias.co', which should provide the
> same answer. I think that either would be fine.

The latter seems much more robust, since that will also tell you
precisely whether co is an alias at all, and you don't have to parse
-h/--help output (stripping out the 'is aliased to...' stuff, which
might be complicated by i18n etc. etc.). So I don't think we should
worry too much about scripted use of -h/--help.

Rasmus


Re: [PATCH] help: allow redirecting to help for aliased command

2018-09-28 Thread Rasmus Villemoes
On 2018-09-26 20:49, Jeff King wrote:
> On Wed, Sep 26, 2018 at 08:16:36AM -0700, Junio C Hamano wrote:
> 
>>
>> If we expect users to use "git cp --help" a lot more often than "git
>> help cp" (or the other way around), one way to give a nicer experience
>> may be to unconditionally make "git cp --help" to directly show the
>> manpage of cherry-pick, while keeping "git help cp" to never do
>> that.  Then those who want to remember what "co" is aliased to can
>> ask "git help co".
> 
> I like that direction much better. I also wondered if we could leverage
> the "-h" versus "--help" distinction. The problem with printing the
> alias definition along with "--help" is that the latter will start a
> pager that obliterates what we wrote before (and hence all of this delay
> trickery).
> 
> But for "-h" we generally expect the command to output a usage message.
> 
> So what if the rules were:
> 
>   - "git help cp" shows "cp is an alias for cherry-pick" (as it does
> now)

Sounds good.

>   - "git cp -h" shows "cp is an alias for cherry-pick", followed by
> actually running "cherry-pick -h", which will show the usage
> message. For a single-word command that does very little, since the
> usage message starts with "cherry-pick". But if your alias is
> actually "cp = cherry-pick -n", then it _is_ telling you extra
> information.

Funny, I never noticed this difference, and that '-h' for an alias would
actually give more information than '--help'. I sort-of knew that -h
would give the synopsis, so I guess I've just gotten used to always use
--help, and just noticed that for aliases that doesn't provide much help.

Adding the 'is an alias for' info to -h sounds quite sensible.

And this could even work with "!" aliases: we define
> it, and then it is up to the alias to handle "-h" sensibly.

I'd be nervous about doing this, though, especially if we introduce this
without a new opt-in config option (which seems to be the direction the
discussion is taking). There are lots of commands that don't respond
with a help message to -h, or that only recognize -h as the first word,
or... There are really too many ways this could cause headaches.

But, now that I test it, it seems that we already let the alias handle
-h (and any other following words, with --help as the first word
special-cased). So what you're suggesting is (correct me if I'm wrong)
to _also_ intercept -h as the first word, and then print the alias info,
in addition to spawning the alias with the entire argv as usual. The
alias info would probably need to go to stderr in this case.

>   - "git cp --help" opens the manpage for cherry-pick. We don't bother
> with the alias definition, as it's available through other means
> (and thus we skip the obliteration/timing thing totally).

It sounds like you suggest doing this unconditionally, and without any
opt-in via config option or a short wait? That would certainly work for
me. It is, in fact, how I expect 'git cp --help' to work, until I get
reminded that it does not... Also, as Junio noted, is consistent with
--help generally providing more information than -h - except that one
loses the 'is an alias for' part for --help.

> This really only works for non-! aliases. Those would continue to
> show the alias definition.

Yes.

Thanks,
Rasmus


Re: [PATCH] help: allow redirecting to help for aliased command

2018-09-28 Thread Rasmus Villemoes
On 2018-09-26 17:19, Duy Nguyen wrote:
> On Wed, Sep 26, 2018 at 4:42 PM Taylor Blau  wrote:
>>> +
>>> + /*
>>> +  * We use split_cmdline() to get the first word of the
>>> +  * alias, to ensure that we use the same rules as when
>>> +  * the alias is actually used. split_cmdline()
>>> +  * modifies alias in-place.
>>> +  */
>>> + count = split_cmdline(alias, );
>>> + if (count < 0)
>>> + die("Bad alias.%s string: %s", cmd,
>>> + split_cmdline_strerror(count));
>>
>> Please wrap this in _() so that translators can translate it.
> 
> Yes! And another nit. die(), error(), warning()... usually start the
> message with a lowercase letter because we already start the sentence
> with a prefix, like
> 
> fatal: bad alias.blah blah
> 

I'll keep these points in mind, but this was pure copy-paste from git.c.

Rasmus



Re: [PATCH] help: allow redirecting to help for aliased command

2018-09-28 Thread Rasmus Villemoes
On 2018-09-26 17:16, Junio C Hamano wrote:
> Rasmus Villemoes  writes:
> 
>> +/*
>> + * We use split_cmdline() to get the first word of the
>> + * alias, to ensure that we use the same rules as when
>> + * the alias is actually used. split_cmdline()
>> + * modifies alias in-place.
>> + */
>> +count = split_cmdline(alias, );
>> +if (count < 0)
>> +die("Bad alias.%s string: %s", cmd,
>> +split_cmdline_strerror(count));
>> +
>> +if (follow_alias > 0) {
>> +fprintf_ln(stderr,
>> +   _("Continuing to help for %s in %0.1f 
>> seconds."),
>> +   alias, follow_alias/10.0);
>> +sleep_millisec(follow_alias * 100);
>> +}
>> +return alias;
> 
> If you have "[alias] cp = cherry-pick -n", split_cmdline discards
> "-n" and the follow-alias prompt does not even tell you that it did
> so,

That's not really true, as I deliberately did the split_cmdline after
printing the "is an alias for", but before "continuing to help for", so
this would precisely tell you

  cp is an alias for 'cherry-pick -n'
  continuing to help for 'cherry-pick' in 1.5 seconds

> and you get "git help cherry-pick".  This code somehow expects
> you to know to jump to the section that describes the "--no-commit"
> option.  I do not think that is a reasonable expectation.

No, in that case I would not expect git cp --help to jump to that
section anymore than I would expect "git cherry-pick -n --help" to
magically do that (and that would be impossible in general, if more
options are bundled in the alias).

> When you have "[alias] cp = cherry-pick -n", "git cp --help" should
> not do "git help cherry-pick".  Only a single word that exactly
> matches a git command should get this treatment.

I considered that, and could certainly live with that. But it seems the
discussion took a different turn in another part of the thread, so I'll
continue there.

Rasmus


Re: [PATCH 2/3] send-email: only consider lines containing @ or <> for automatic Cc'ing

2018-10-10 Thread Rasmus Villemoes
On 2018-10-10 14:57, Ævar Arnfjörð Bjarmason wrote:
> 
> On Wed, Oct 10 2018, Rasmus Villemoes wrote:
> 
>> +if ($c !~ /.+@.+|<.+>/) {
>> +printf("(body) Ignoring %s from line '%s'\n",
>> +$what, $_) unless $quiet;
>> +next;
>> +}
>>  push @cc, $c;
>>  printf(__("(body) Adding cc: %s from line '%s'\n"),
>>  $c, $_) unless $quiet;
> 
> There's a extract_valid_address() function in git-send-email already,
> shouldn't this be:
> 
> if (!extract_valid_address($c)) {
> [...]
> 
> Or is there a good reason not to use that function in this case?
> 

I considered that (and also had a version where I simply insisted on a @
being present), but that means the user no longer would get prompted
about the cases where the address was just slightly obfuscated, e.g. the

Cc: John Doe 

cases, which would be a regression, I guess. So I do want to pass such
cases through, and have them be dealt with when process_address_list
gets called.

So this is just a rather minimal and simple heuristic, which should
still be able to handle the vast majority of cases correctly, and at
least almost never exclude anything that might have a chance of becoming
a real address.

Rasmus


[PATCH 1/3] Documentation/git-send-email.txt: style fixes

2018-10-10 Thread Rasmus Villemoes
For consistency, add full stops in a few places and outdent a line by
one space.

Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-send-email.txt | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index 465a4ecbed..ea6ea512fe 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -321,16 +321,16 @@ Automating
auto-cc of:
 +
 --
-- 'author' will avoid including the patch author
-- 'self' will avoid including the sender
+- 'author' will avoid including the patch author.
+- 'self' will avoid including the sender.
 - 'cc' will avoid including anyone mentioned in Cc lines in the patch header
   except for self (use 'self' for that).
 - 'bodycc' will avoid including anyone mentioned in Cc lines in the
   patch body (commit message) except for self (use 'self' for that).
 - 'sob' will avoid including anyone mentioned in Signed-off-by lines except
-   for self (use 'self' for that).
+  for self (use 'self' for that).
 - 'cccmd' will avoid running the --cc-cmd.
-- 'body' is equivalent to 'sob' + 'bodycc'
+- 'body' is equivalent to 'sob' + 'bodycc'.
 - 'all' will suppress all auto cc values.
 --
 +
-- 
2.19.1.6.g084f1d7761



[PATCH 0/3] send-email: Also pick up cc addresses from -by trailers

2018-10-10 Thread Rasmus Villemoes
This has been attempted multiple times before, but I hope that this
can make it in this time around. That *-by addresses are not
automatically Cc'ed certainly still surprises people from time to
time.

I hope that this addresses all the concerns Junio had in
https://lkml.org/lkml/2016/8/31/768 .

For the name, I chose 'misc-by', since that has -by in its name. I am
fine with absolutely any other name (bodyby, body-by, by-trailers,
...). I doubt we can find a short token that is completely
self-explanatory, and note that one has to look in the man page anyway
to know what 'sob' means in this line from 'git send-email -h':

--suppress-cc * author, self, sob, cc, cccmd, body, 
bodycc, all.

Rasmus Villemoes (3):
  Documentation/git-send-email.txt: style fixes
  send-email: only consider lines containing @ or <> for automatic
Cc'ing
  send-email: also pick up cc addresses from -by trailers

 Documentation/git-send-email.txt | 11 +++
 git-send-email.perl  | 19 +--
 2 files changed, 20 insertions(+), 10 deletions(-)

-- 
2.19.1.6.g084f1d7761



[PATCH 3/3] send-email: also pick up cc addresses from -by trailers

2018-10-10 Thread Rasmus Villemoes
When rerolling a patch series, including various Reviewed-by etc. that
may have come in, it is quite convenient to have git-send-email
automatically cc those people.

So pick up any *-by lines, with a new suppression category 'misc-by',
but special-case Signed-off-by, since that already has its own
suppression category. It seems natural to make 'misc-by' implied by
'body'.

Based-on-patch-by: Joe Perches 
Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-send-email.txt |  5 -
 git-send-email.perl  | 14 --
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-send-email.txt b/Documentation/git-send-email.txt
index ea6ea512fe..f6010ac68b 100644
--- a/Documentation/git-send-email.txt
+++ b/Documentation/git-send-email.txt
@@ -329,8 +329,11 @@ Automating
   patch body (commit message) except for self (use 'self' for that).
 - 'sob' will avoid including anyone mentioned in Signed-off-by lines except
   for self (use 'self' for that).
+- 'misc-by' will avoid including anyone mentioned in Acked-by,
+  Reviewed-by, Tested-by and other "-by" lines in the patch body,
+  except Signed-off-by (use 'sob' for that).
 - 'cccmd' will avoid running the --cc-cmd.
-- 'body' is equivalent to 'sob' + 'bodycc'.
+- 'body' is equivalent to 'sob' + 'bodycc' + 'misc-by'.
 - 'all' will suppress all auto cc values.
 --
 +
diff --git a/git-send-email.perl b/git-send-email.perl
index 1916159d2a..7a6391e5d8 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -94,7 +94,7 @@ sub usage {
 --identity* Use the sendemail. options.
 --to-cmd  * Email To: via ` \$patch_path`
 --cc-cmd  * Email Cc: via ` \$patch_path`
---suppress-cc * author, self, sob, cc, cccmd, body, 
bodycc, all.
+--suppress-cc * author, self, sob, cc, cccmd, body, 
bodycc, misc-by, all.
 --[no-]cc-cover* Email Cc: addresses in the cover letter.
 --[no-]to-cover* Email To: addresses in the cover letter.
 --[no-]signed-off-by-cc* Send to Signed-off-by: addresses. Default 
on.
@@ -454,13 +454,13 @@ sub read_config {
 if (@suppress_cc) {
foreach my $entry (@suppress_cc) {
die sprintf(__("Unknown --suppress-cc field: '%s'\n"), $entry)
-   unless $entry =~ 
/^(?:all|cccmd|cc|author|self|sob|body|bodycc)$/;
+   unless $entry =~ 
/^(?:all|cccmd|cc|author|self|sob|body|bodycc|misc-by)$/;
$suppress_cc{$entry} = 1;
}
 }
 
 if ($suppress_cc{'all'}) {
-   foreach my $entry (qw (cccmd cc author self sob body bodycc)) {
+   foreach my $entry (qw (cccmd cc author self sob body bodycc misc-by)) {
$suppress_cc{$entry} = 1;
}
delete $suppress_cc{'all'};
@@ -471,7 +471,7 @@ sub read_config {
 $suppress_cc{'sob'} = !$signed_off_by_cc if defined $signed_off_by_cc;
 
 if ($suppress_cc{'body'}) {
-   foreach my $entry (qw (sob bodycc)) {
+   foreach my $entry (qw (sob bodycc misc-by)) {
$suppress_cc{$entry} = 1;
}
delete $suppress_cc{'body'};
@@ -1681,7 +1681,7 @@ sub process_file {
# Now parse the message body
while(<$fh>) {
$message .=  $_;
-   if (/^(Signed-off-by|Cc): (.*)/i) {
+   if (/^([a-z-]*-by|Cc): (.*)/i) {
chomp;
my ($what, $c) = ($1, $2);
# strip garbage for the address we'll use:
@@ -1691,7 +1691,9 @@ sub process_file {
if ($sc eq $sender) {
next if ($suppress_cc{'self'});
} else {
-   next if $suppress_cc{'sob'} and $what =~ 
/Signed-off-by/i;
+   next if $suppress_cc{'sob'} and $what =~ 
/^Signed-off-by$/i;
+   next if $suppress_cc{'misc-by'}
+   and $what =~ /-by$/i and $what !~ 
/^Signed-off-by$/i;
next if $suppress_cc{'bodycc'} and $what =~ 
/Cc/i;
}
if ($c !~ /.+@.+|<.+>/) {
-- 
2.19.1.6.g084f1d7761



[PATCH 2/3] send-email: only consider lines containing @ or <> for automatic Cc'ing

2018-10-10 Thread Rasmus Villemoes
While the address sanitizations routines do accept local addresses, that
is almost never what is meant in a Cc or Signed-off-by trailer.

Looking through all the signed-off-by lines in the linux kernel tree
without a @, there are mostly two patterns: Either just a full name, or
a full name followed by  (i.e., with the word at
instead of a @), and minor variations. For cc lines, the same patterns
appear, along with lots of "cc stable" variations that do not actually
name sta...@vger.kernel.org

  Cc: stable # introduced pre-git times
  cc: stable.kernel.org

In the  cases, one gets a chance to interactively
fix it. But when there is no <> pair, it seems we end up just using the
first word as a (local) address.

As the number of cases where a local address really was meant is
likely (and anecdotally) quite small compared to the number of cases
where we end up cc'ing a garbage address, insist on at least a @ or a <>
pair being present.

This is also preparation for the next patch, where we are likely to
encounter even more non-addresses in -by lines, such as

  Reported-by: Coverity
  Patch-generated-by: Coccinelle

Signed-off-by: Rasmus Villemoes 
---
 git-send-email.perl | 5 +
 1 file changed, 5 insertions(+)

diff --git a/git-send-email.perl b/git-send-email.perl
index 2be5dac337..1916159d2a 100755
--- a/git-send-email.perl
+++ b/git-send-email.perl
@@ -1694,6 +1694,11 @@ sub process_file {
next if $suppress_cc{'sob'} and $what =~ 
/Signed-off-by/i;
next if $suppress_cc{'bodycc'} and $what =~ 
/Cc/i;
}
+   if ($c !~ /.+@.+|<.+>/) {
+   printf("(body) Ignoring %s from line '%s'\n",
+   $what, $_) unless $quiet;
+   next;
+   }
push @cc, $c;
printf(__("(body) Adding cc: %s from line '%s'\n"),
$c, $_) unless $quiet;
-- 
2.19.1.6.g084f1d7761



Re: [PATCH 2/3] send-email: only consider lines containing @ or <> for automatic Cc'ing

2018-10-11 Thread Rasmus Villemoes
On 2018-10-11 08:06, Junio C Hamano wrote:
> Rasmus Villemoes  writes:
> 
>> I considered that (and also had a version where I simply insisted on a @
>> being present), but that means the user no longer would get prompted
>> about the cases where the address was just slightly obfuscated, e.g. the
>>
>> Cc: John Doe 
>>
>> cases, which would be a regression, I guess. So I do want to pass such
>> cases through, and have them be dealt with when process_address_list
>> gets called.
> 
> We are only tightening with this patch, and we were passing any
> random things through with the original code anyway, so without
> [PATCH 3/3], this step must be making it only better, but I have to
> wonder one thing.
> 
> You keep saying "get prompted" but are we sure we always stop and
> ask (and preferrably---fail and abort when the end user is not
> available at the terminal to interact) when we have such a
> questionable address?
> 

I dunno. I guess I've never considered non-interactive use of
send-email. But the ask() in validate_address does have default q[uit],
which I suppose gets used if stdin is /dev/null? I did do an experiment
adding a bunch of the random odd patterns found in kernel commit
messages to see how send-email reacted before/after this, and the only
things that got filtered away (i.e., no longer prompted about) were
things where the user probably couldn't easily fix it anyway. In the
cases where there was a "Cc: stable" that might be fixed to the proper
sta...@vger.kernel.org, the logic in extract_valid_address simply saw
that as a local address, so we didn't use to be prompted, but simply
sent to stable@localhost. Now we simply don't pass that through. So, for
non-interactive use, I guess the effect of this patch is to allow more
cases to complete succesfully, since we filter away (some) cases where
extract_valid_address would cause us to prompt (and thus quit).

So, it seems you're ok with this tightening, but some comment on the
non-interactive use case should be made in the commit log? Or am I
misunderstanding?

Thanks,
Rasmus


Re: [PATCH 3/3] send-email: also pick up cc addresses from -by trailers

2018-10-11 Thread Rasmus Villemoes
On 2018-10-11 08:18, Junio C Hamano wrote:
> Rasmus Villemoes  writes:

>  we now ...
> 
>> +next if $suppress_cc{'sob'} and $what =~ 
>> /^Signed-off-by$/i;
> 
> ... must make sure what we have is _exactly_ "signed-off-by" when
> 'sob' is suppressed.  Makes sense.
> 
>> +next if $suppress_cc{'misc-by'}
>> +and $what =~ /-by$/i and $what !~ 
>> /^Signed-off-by$/i;
> 
> And this is the opposite side of the same coin, which also makes sense.

Yup, I started by just adding the misc-by line, then remembered that
people sometimes use not-signed-off-by variants, and went back and
anchored the s-o-b case. So now it's no longer so minimal, and...

> I wonder if it would make it easier to grok if we made the logic
> inside out, i.e.
> 
>   if ($sc eq $sender) {
>   ...
>   } else {
>   if ($what =~ /^Signed-off-by$/i) {
>   next if $suppress_cc{'sob'};
>   } elsif ($what =~ /-by$/i) {
>   next if $suppress_cc{'misc'};
>   } elsif ($what =~ /^Cc$/i) {
>   next if $suppress_cc{'bodycc'};>}

...yes, that's probably more readable.

Thanks,
Rasmus


[PATCH v2 3/3] git-help.txt: document "git help cmd" vs "git cmd --help" for aliases

2018-10-01 Thread Rasmus Villemoes
This documents the existing behaviour of "git help cmd" when cmd is an
alias, as well as providing a hint to use the "git cmd --help" form to
be taken directly to the man page for the aliased command.

Signed-off-by: Rasmus Villemoes 
---
 Documentation/git-help.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/Documentation/git-help.txt b/Documentation/git-help.txt
index 83d25d825a..37e85868fd 100644
--- a/Documentation/git-help.txt
+++ b/Documentation/git-help.txt
@@ -29,6 +29,10 @@ guide is brought up. The 'man' program is used by default 
for this
 purpose, but this can be overridden by other options or configuration
 variables.
 
+If an alias is given, git prints a note explaining what it is an alias
+for on standard output. To get the manual page for the aliased
+command, use `git COMMAND --help`.
+
 Note that `git --help ...` is identical to `git help ...` because the
 former is internally converted into the latter.
 
-- 
2.19.0



[PATCH v2 2/3] git.c: handle_alias: prepend alias info when first argument is -h

2018-10-01 Thread Rasmus Villemoes
Most git commands respond to -h anywhere in the command line, or at
least as a first and lone argument, by printing the usage
information. For aliases, we can provide a little more information that
might be useful in interpreting/understanding the following output by
prepending a line telling that the command is an alias, and for what.

When one invokes a simple alias, such as "cp = cherry-pick"
with -h, this results in

$ git cp -h
'cp' is aliased to 'cherry-pick'
usage: git cherry-pick [] ...
...

When the alias consists of more than one word, this provides the
additional benefit of informing the user which options are implicit in
using the alias, e.g. with "cp = cherry-pick -n":

$ git cp -h
'cp' is aliased to 'cherry-pick -n'
usage: git cherry-pick [] ...
...

For shell commands, we cannot know how it responds to -h, but printing
this line to stderr should not hurt, and can help in figuring out what
is happening in a case like

$ git sc -h
'sc' is aliased to '!somecommand'
somecommand: invalid option '-h'

Suggested-by: Jeff King 
Signed-off-by: Rasmus Villemoes 
---
 git.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/git.c b/git.c
index a6f4b44af5..0211c2d4c0 100644
--- a/git.c
+++ b/git.c
@@ -318,6 +318,9 @@ static int handle_alias(int *argcp, const char ***argv)
alias_command = (*argv)[0];
alias_string = alias_lookup(alias_command);
if (alias_string) {
+   if (*argcp > 1 && !strcmp((*argv)[1], "-h"))
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"),
+  alias_command, alias_string);
if (alias_string[0] == '!') {
struct child_process child = CHILD_PROCESS_INIT;
int nongit_ok;
-- 
2.19.0



[PATCH v2 1/3] help: redirect to aliased commands for "git cmd --help"

2018-10-01 Thread Rasmus Villemoes
As discussed in the thread for v1 of this patch [1] [2], this changes the
rules for "git foo --help" when foo is an alias.

(0) When invoked as "git help foo", we continue to print the "foo is
aliased to bar" message and nothing else.

(1) If foo is an alias for a shell command, print "foo is aliased to
!bar" as usual.

(2) Otherwise, break the alias string into words, and pretend that "git
word0 --help" was called.

At least for me, getting the man page for git-cherry-pick directly with
"git cp --help" is more useful (and how I expect an alias to behave)
than the short "is aliased to" notice. It is also consistent with
"--help" generally providing more comprehensive help than "-h".

I believe that printing the "is aliased to" message also in case (2) has
value: Depending on pager setup, or if the user has help.format=web, the
message is still present immediately above the prompt when the user
quits the pager/returns to the terminal. That serves as an explanation
for why one was redirected to "man git-cherry-pick" from "git cp
--help", and if cp is actually 'cherry-pick -n', it reminds the user
that using cp has some flag implicitly set before firing off the next
command.

It also provides some useful info in case we end up erroring out, either
in the "bad alias string" check, or in the "No manual entry for gitbar"
case.

[1] https://public-inbox.org/git/20180926102636.30691-1...@rasmusvillemoes.dk/
[2] https://public-inbox.org/git/20180926184914.gc30...@sigill.intra.peff.net/

Signed-off-by: Rasmus Villemoes 
---
 builtin/help.c | 26 +++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/builtin/help.c b/builtin/help.c
index 8d4f6dd301..4802a06f37 100644
--- a/builtin/help.c
+++ b/builtin/help.c
@@ -415,9 +415,29 @@ static const char *check_git_cmd(const char* cmd)
 
alias = alias_lookup(cmd);
if (alias) {
-   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
-   free(alias);
-   exit(0);
+   const char **argv;
+   int count;
+
+   /*
+* If we were invoked as "git help cmd", or cmd is an
+* alias for a shell command, we inform the user what
+* cmd is an alias for and do nothing else.
+*/
+   if (!exclude_guides || alias[0] == '!') {
+   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
+   free(alias);
+   exit(0);
+   }
+   /*
+* Otherwise, we pretend that the command was "git
+* word0 --help.
+*/
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"), cmd, alias);
+   count = split_cmdline(alias, );
+   if (count < 0)
+   die(_("bad alias.%s string: %s"), cmd,
+   split_cmdline_strerror(count));
+   return alias;
}
 
if (exclude_guides)
-- 
2.19.0



[PATCH] help: allow redirecting to help for aliased command

2018-09-26 Thread Rasmus Villemoes
I often use 'git  --help' as a quick way to get the documentation
for a command. However, I've also trained my muscle memory to use my
aliases (cp=cherry-pick, co=checkout etc.), which means that I often end
up doing

  git cp --help

to which git correctly informs me that cp is an alias for
cherry-pick. However, I already knew that, and what I really wanted was
the man page for the cherry-pick command.

This introduces a help.followAlias config option that transparently
redirects to (the first word of) the alias text (provided of course it
is not a shell command), similar to the option for autocorrect of
misspelled commands.

The documentation in config.txt could probably be improved. Also, I
mimicked the autocorrect case in that the "Continuing to ..." text goes
to stderr, but because of that, I also print the "is aliased to" text to
stderr, which is different from the current behaviour of using
stdout. I'm not sure what the most correct thing is, but I assume --help
is mostly used interactively with stdout and stderr pointing at the same
place.

Signed-off-by: Rasmus Villemoes 
---
 Documentation/config.txt | 10 ++
 builtin/help.c   | 36 +---
 2 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ad0f4510c3..8a1fc8064e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -2105,6 +2105,16 @@ help.autoCorrect::
value is 0 - the command will be just shown but not executed.
This is the default.
 
+help.followAlias::
+   When requesting help for an alias, git prints a line of the
+   form "'' is aliased to ''". If this option is
+   set to a positive integer, git proceeds to show the help for
+   the first word of  after the given number of
+   deciseconds. If the value of this option is negative, the
+   redirect happens immediately. If the value is 0 (which is the
+   default), or  begins with an exclamation point, no
+   redirect takes place.
+
 help.htmlPath::
Specify the path where the HTML documentation resides. File system paths
and URLs are supported. HTML pages will be prefixed with this path when
diff --git a/builtin/help.c b/builtin/help.c
index 8d4f6dd301..ef1c3f0916 100644
--- a/builtin/help.c
+++ b/builtin/help.c
@@ -34,6 +34,7 @@ enum help_format {
 };
 
 static const char *html_path;
+static int follow_alias;
 
 static int show_all = 0;
 static int show_guides = 0;
@@ -273,6 +274,10 @@ static int git_help_config(const char *var, const char 
*value, void *cb)
html_path = xstrdup(value);
return 0;
}
+   if (!strcmp(var, "help.followalias")) {
+   follow_alias = git_config_int(var, value);
+   return 0;
+   }
if (!strcmp(var, "man.viewer")) {
if (!value)
return config_error_nonbool(var);
@@ -415,9 +420,34 @@ static const char *check_git_cmd(const char* cmd)
 
alias = alias_lookup(cmd);
if (alias) {
-   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
-   free(alias);
-   exit(0);
+   const char **argv;
+   int count;
+
+   if (!follow_alias || alias[0] == '!') {
+   printf_ln(_("'%s' is aliased to '%s'"), cmd, alias);
+   free(alias);
+   exit(0);
+   }
+   fprintf_ln(stderr, _("'%s' is aliased to '%s'"), cmd, alias);
+
+   /*
+* We use split_cmdline() to get the first word of the
+* alias, to ensure that we use the same rules as when
+* the alias is actually used. split_cmdline()
+* modifies alias in-place.
+*/
+   count = split_cmdline(alias, );
+   if (count < 0)
+   die("Bad alias.%s string: %s", cmd,
+   split_cmdline_strerror(count));
+
+   if (follow_alias > 0) {
+   fprintf_ln(stderr,
+  _("Continuing to help for %s in %0.1f 
seconds."),
+  alias, follow_alias/10.0);
+   sleep_millisec(follow_alias * 100);
+   }
+   return alias;
}
 
if (exclude_guides)
-- 
2.16.4