Re: [PATCH 14/22] sequencer: prepare for rebase -i's commit functionality

2016-08-31 Thread Johannes Schindelin
Hi Junio,

On Tue, 30 Aug 2016, Junio C Hamano wrote:

> From: Junio C Hamano 
> Date: Tue, 30 Aug 2016 12:36:42 -0700
> Subject: [PATCH] am: refactor read_author_script()
> 
> [...]

Thank you so much for that! I will use that as a starting point to
refactor the two read_author_script() functions into one.

Just don't look surprised when you see this patch as part of my patch
series, Signed-off-by me...

Ciao,
Dscho


Re: [PATCH 14/22] sequencer: prepare for rebase -i's commit functionality

2016-08-30 Thread Junio C Hamano
Junio C Hamano  writes:

> Two functions with the same name reading from the same format, even
> when they expect to produce the same result in different internal
> format, without even being aware of each other is a bad enough
> "regression" in maintainability of the code.  One of them not even
> using sq_dequote() helper and reinventing is even worse.

So, here is what I did as a lunch-break-hack.  The "same result in
different internal format" calls for a fairly light-weight mechanism
to convey the necessary information that can be shared by callers
with different needs, and I chose string-list for that.

Totally untested, but parse_key_value_squoted() would be a good
foundation to build on.  The caller in "am" deliberately wants to be
strict (e.g. it wants to error out when the user mucked with the
file, so it insists on the three variables to appear in a known
order and rejects input that violates its assumption), but the
function does not mind if other callers want to be more lenient.

-- >8 --
From: Junio C Hamano 
Date: Tue, 30 Aug 2016 12:36:42 -0700
Subject: [PATCH] am: refactor read_author_script()

By splitting the part that reads from a file and the part that
parses the variable definitions from the contents, make the latter
can be more reusable in the future.

Signed-off-by: Junio C Hamano 
---
 builtin/am.c | 103 ++-
 1 file changed, 45 insertions(+), 58 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index 739b34d..b36d1f0 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -28,6 +28,7 @@
 #include "rerere.h"
 #include "prompt.h"
 #include "mailinfo.h"
+#include "string-list.h"
 
 /**
  * Returns 1 if the file is empty or does not exist, 0 otherwise.
@@ -258,38 +259,29 @@ static int read_state_file(struct strbuf *sb, const 
struct am_state *state,
 }
 
 /**
- * Reads a KEY=VALUE shell variable assignment from `fp`, returning the VALUE
- * as a newly-allocated string. VALUE must be a quoted string, and the KEY must
- * match `key`. Returns NULL on failure.
- *
- * This is used by read_author_script() to read the GIT_AUTHOR_* variables from
- * the author-script.
+ * Take a series of KEY='VALUE' lines where VALUE part is
+ * sq-quoted, and append  at the end of the string list
  */
-static char *read_shell_var(FILE *fp, const char *key)
+static int parse_key_value_squoted(char *buf, struct string_list *list)
 {
-   struct strbuf sb = STRBUF_INIT;
-   const char *str;
-
-   if (strbuf_getline_lf(, fp))
-   goto fail;
-
-   if (!skip_prefix(sb.buf, key, ))
-   goto fail;
-
-   if (!skip_prefix(str, "=", ))
-   goto fail;
-
-   strbuf_remove(, 0, str - sb.buf);
-
-   str = sq_dequote(sb.buf);
-   if (!str)
-   goto fail;
-
-   return strbuf_detach(, NULL);
-
-fail:
-   strbuf_release();
-   return NULL;
+   while (*buf) {
+   struct string_list_item *item;
+   char *np;
+   char *cp = strchr(buf, '=');
+   if (!cp)
+   return -1;
+   np = strchrnul(cp, '\n');
+   *cp++ = '\0';
+   item = string_list_append(list, buf);
+
+   buf = np + (*np == '\n');
+   *np = '\0';
+   cp = sq_dequote(cp);
+   if (!cp)
+   return -1;
+   item->util = xstrdup(cp);
+   }
+   return 0;
 }
 
 /**
@@ -311,44 +303,39 @@ static char *read_shell_var(FILE *fp, const char *key)
 static int read_author_script(struct am_state *state)
 {
const char *filename = am_path(state, "author-script");
-   FILE *fp;
+   struct strbuf buf = STRBUF_INIT;
+   struct string_list kv = STRING_LIST_INIT_DUP;
+   int retval = -1; /* assume failure */
+   int fd;
 
assert(!state->author_name);
assert(!state->author_email);
assert(!state->author_date);
 
-   fp = fopen(filename, "r");
-   if (!fp) {
+   fd = open(filename, O_RDONLY);
+   if (fd < 0) {
if (errno == ENOENT)
return 0;
die_errno(_("could not open '%s' for reading"), filename);
}
+   strbuf_read(, fd, 0);
+   close(fd);
+   if (parse_key_value_squoted(buf.buf, ))
+   goto finish;
 
-   state->author_name = read_shell_var(fp, "GIT_AUTHOR_NAME");
-   if (!state->author_name) {
-   fclose(fp);
-   return -1;
-   }
-
-   state->author_email = read_shell_var(fp, "GIT_AUTHOR_EMAIL");
-   if (!state->author_email) {
-   fclose(fp);
-   return -1;
-   }
-
-   state->author_date = read_shell_var(fp, "GIT_AUTHOR_DATE");
-   if (!state->author_date) {
-   fclose(fp);
-   return -1;
-   }
-
-   if (fgetc(fp) != EOF) {
-

Re: [PATCH 14/22] sequencer: prepare for rebase -i's commit functionality

2016-08-30 Thread Johannes Schindelin
Hi Junio,

On Tue, 30 Aug 2016, Junio C Hamano wrote:

> Johannes Schindelin  writes:
> 
> >> > +static char **read_author_script(void)
> >> > +{
> >> > +struct strbuf script = STRBUF_INIT;
> >> > +int i, count = 0;
> >> > +char *p, *p2, **env;
> >> > +size_t env_size;
> >> > +
> >> > +if (strbuf_read_file(, rebase_path_author_script(), 256) 
> >> > <= 0)
> >> > +return NULL;
> >> > +
> >> > +for (p = script.buf; *p; p++)
> >> > +if (skip_prefix(p, "'''", (const char **)))
> >> > +strbuf_splice(, p - script.buf, p2 - p, 
> >> > "'", 1);
> >> > +else if (*p == '\'')
> >> > +strbuf_splice(, p-- - script.buf, 1, "", 
> >> > 0);
> >> > +else if (*p == '\n') {
> >> > +*p = '\0';
> >> > +count++;
> >> > +}
> >> 
> >> Hmph.  What is this loop doing?  Is it decoding a sq-quoted buffer
> >> or something?  Don't we have a helper function to do that?
> >
> > Well, it is not just decoding an sq-quoted buffer, but several lines with
> > definitions we sq-quoted ourselves, individually.
> >
> > The quote.[ch] code currently has no code to dequote lines individually.
> 
> There is a function with exactly the same name in builtin/am.c and I
> assume that it is reading from a file with the same format, which
> uses a helper to read one variable line at a time.  Hopefully that
> can be refactored so that more parsing is shared between the two
> users.
> 
> Two functions with the same name reading from the same format, even
> when they expect to produce the same result in different internal
> format, without even being aware of each other is a bad enough
> "regression" in maintainability of the code.  One of them not even
> using sq_dequote() helper and reinventing is even worse.

First of all, builtin/am's read_author_script() really, really, really
only wants to read stuff into the am_state struct.

That alone is already so incompatible that I do not think it can be
repaired.

Further, builtin/am's read_author_script() reads *only* the
GIT_AUTHOR_NAME, GIT_AUTHOR_EMAIL and GIT_AUTHOR_DATE lines (opening the
file three times for the task). And then it *requires* the corresponding
values to be sq-quoted.

I do not have a use case for this myself, but rebase -i explicitly eval's
the author script, so it is conceivable that some enterprisey user makes
use of this feature to set other environment variables. The thing is that
rebase -i cannot necessarily expect all of those files in its state
directory to be under tight control -- in stark contrast to git-am.

I could imagine that this could be fixed by abstracting the functionality
more, and use your favored sq_dequote() (which may actually fail in case
of an enterprisey usage as outlined above), and adapting builtin/am's
read_author_script() to make use of that refactored approach.

This is a huge task, make no mistake, in particular because we need to
ensure compatibility with non-standard usage. So I do not think I can
tackle that any time soon. Certainly not as part of an iteration of this
here patch series.

Ciao,
Dscho


Re: [PATCH 14/22] sequencer: prepare for rebase -i's commit functionality

2016-08-30 Thread Junio C Hamano
Johannes Schindelin  writes:

>> > +static char **read_author_script(void)
>> > +{
>> > +  struct strbuf script = STRBUF_INIT;
>> > +  int i, count = 0;
>> > +  char *p, *p2, **env;
>> > +  size_t env_size;
>> > +
>> > +  if (strbuf_read_file(, rebase_path_author_script(), 256) <= 0)
>> > +  return NULL;
>> > +
>> > +  for (p = script.buf; *p; p++)
>> > +  if (skip_prefix(p, "'''", (const char **)))
>> > +  strbuf_splice(, p - script.buf, p2 - p, "'", 1);
>> > +  else if (*p == '\'')
>> > +  strbuf_splice(, p-- - script.buf, 1, "", 0);
>> > +  else if (*p == '\n') {
>> > +  *p = '\0';
>> > +  count++;
>> > +  }
>> 
>> Hmph.  What is this loop doing?  Is it decoding a sq-quoted buffer
>> or something?  Don't we have a helper function to do that?
>
> Well, it is not just decoding an sq-quoted buffer, but several lines with
> definitions we sq-quoted ourselves, individually.
>
> The quote.[ch] code currently has no code to dequote lines individually.

There is a function with exactly the same name in builtin/am.c and I
assume that it is reading from a file with the same format, which
uses a helper to read one variable line at a time.  Hopefully that
can be refactored so that more parsing is shared between the two
users.

Two functions with the same name reading from the same format, even
when they expect to produce the same result in different internal
format, without even being aware of each other is a bad enough
"regression" in maintainability of the code.  One of them not even
using sq_dequote() helper and reinventing is even worse.


Re: [PATCH 14/22] sequencer: prepare for rebase -i's commit functionality

2016-08-29 Thread Junio C Hamano
Johannes Schindelin  writes:

> +/* We will introduce the 'interactive rebase' mode later */
> +#define IS_REBASE_I() 0

I do not see a point in naming this all caps.  The use site would be
a lot more pleasant to read when the reader does not have to care if
this is implemented as a preprocessor macro or a helper function.

> @@ -377,20 +387,72 @@ static int is_index_unchanged(void)
>   return !hashcmp(active_cache_tree->sha1, 
> head_commit->tree->object.oid.hash);
>  }
>  
> +static char **read_author_script(void)
> +{
> + struct strbuf script = STRBUF_INIT;
> + int i, count = 0;
> + char *p, *p2, **env;
> + size_t env_size;
> +
> + if (strbuf_read_file(, rebase_path_author_script(), 256) <= 0)
> + return NULL;
> +
> + for (p = script.buf; *p; p++)
> + if (skip_prefix(p, "'''", (const char **)))
> + strbuf_splice(, p - script.buf, p2 - p, "'", 1);
> + else if (*p == '\'')
> + strbuf_splice(, p-- - script.buf, 1, "", 0);
> + else if (*p == '\n') {
> + *p = '\0';
> + count++;
> + }

Hmph.  What is this loop doing?  Is it decoding a sq-quoted buffer
or something?  Don't we have a helper function to do that?

> + env_size = (count + 1) * sizeof(*env);
> + strbuf_grow(, env_size);
> + memmove(script.buf + env_size, script.buf, script.len);
> + p = script.buf + env_size;
> + env = (char **)strbuf_detach(, NULL);
> +
> + for (i = 0; i < count; i++) {
> + env[i] = p;
> + p += strlen(p) + 1;
> + }
> + env[count] = NULL;
> +
> + return env;
> +}
> +
>  /*
>   * If we are cherry-pick, and if the merge did not result in
>   * hand-editing, we will hit this commit and inherit the original
>   * author date and name.
>   * If we are revert, or if our cherry-pick results in a hand merge,
> - * we had better say that the current user is responsible for that.
> + * we had better say that the current user is responsible for that
> + * (except, of course, while running an interactive rebase).
>   */

The added "(except, ...)" reads as if "even if we are reverting, if
that is done as part of an interactive rebase, the authorship rule
for a revert does not apply".

If that is not what you meant, i.e. if you did not mean to imply
that "rebase -i" doing a revert is a normal thing, this needs to be
rephrased to avoid the misinterpretation.