> On 26 Jul 2017, at 20:17, Jonathan Tan <[email protected]> wrote:
>
> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
>
> As you can see in the change to t0021, this commit changes the error
> message reported when the long-running process does not introduce itself
> with the expected "server"-terminated line. Originally, the error
> message reports that the filter "does not support filter protocol
> version 2", differentiating between the old single-file filter protocol
> and the new multi-file filter protocol - I have updated it to something
> more generic and useful.
>
> Signed-off-by: Jonathan Tan <[email protected]>
> ---
> convert.c | 75 ++++--------------------------------
> pkt-line.c | 19 ----------
> pkt-line.h | 2 -
> sub-process.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
> sub-process.h | 26 +++++++++++++
> t/t0021-conversion.sh | 2 +-
> 6 files changed, 137 insertions(+), 90 deletions(-)
>
> diff --git a/convert.c b/convert.c
> index dbdbb24e4..1012462e3 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -513,78 +513,17 @@ static struct hashmap subprocess_map;
>
> static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
> {
> - int err, i;
> - struct cmd2process *entry = (struct cmd2process *)subprocess;
> - struct string_list cap_list = STRING_LIST_INIT_NODUP;
> - char *cap_buf;
> - const char *cap_name;
> - struct child_process *process = &subprocess->process;
> - const char *cmd = subprocess->cmd;
> -
> - static const struct {
> - const char *name;
> - unsigned int cap;
> - } known_caps[] = {
> + static int versions[] = {2, 0};
> + static struct subprocess_capability capabilities[] = {
> { "clean", CAP_CLEAN },
> { "smudge", CAP_SMUDGE },
> { "delay", CAP_DELAY },
> + { NULL, 0 }
> };
> -
> - sigchain_push(SIGPIPE, SIG_IGN);
> -
> - err = packet_writel(process->in, "git-filter-client", "version=2",
> NULL);
> - if (err)
> - goto done;
> -
> - err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
> - if (err) {
> - error("external filter '%s' does not support filter protocol
> version 2", cmd);
> - goto done;
> - }
> - err = strcmp(packet_read_line(process->out, NULL), "version=2");
> - if (err)
> - goto done;
> - err = packet_read_line(process->out, NULL) != NULL;
> - if (err)
> - goto done;
> -
> - for (i = 0; i < ARRAY_SIZE(known_caps); ++i) {
> - err = packet_write_fmt_gently(
> - process->in, "capability=%s\n", known_caps[i].name);
> - if (err)
> - goto done;
> - }
> - err = packet_flush_gently(process->in);
> - if (err)
> - goto done;
> -
> - for (;;) {
> - cap_buf = packet_read_line(process->out, NULL);
> - if (!cap_buf)
> - break;
> - string_list_split_in_place(&cap_list, cap_buf, '=', 1);
> -
> - if (cap_list.nr != 2 || strcmp(cap_list.items[0].string,
> "capability"))
> - continue;
> -
> - cap_name = cap_list.items[1].string;
> - i = ARRAY_SIZE(known_caps) - 1;
> - while (i >= 0 && strcmp(cap_name, known_caps[i].name))
> - i--;
> -
> - if (i >= 0)
> - entry->supported_capabilities |= known_caps[i].cap;
> - else
> - warning("external filter '%s' requested unsupported
> filter capability '%s'",
> - cmd, cap_name);
> -
> - string_list_clear(&cap_list, 0);
> - }
> -
> -done:
> - sigchain_pop(SIGPIPE);
> -
> - return err;
> + struct cmd2process *entry = (struct cmd2process *)subprocess;
> + return subprocess_handshake(subprocess, "git-filter", versions, NULL,
> + capabilities,
> + &entry->supported_capabilities);
Wouldn't it make sense to add `supported_capabilities` to `struct
subprocess_entry` ?
> }
>
> static void handle_filter_error(const struct strbuf *filter_status,
> diff --git a/pkt-line.c b/pkt-line.c
> index 9d845ecc3..7db911957 100644
> --- a/pkt-line.c
> +++ b/pkt-line.c
> @@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
> return status;
> }
>
> -int packet_writel(int fd, const char *line, ...)
> -{
> - va_list args;
> - int err;
> - va_start(args, line);
> - for (;;) {
> - if (!line)
> - break;
> - if (strlen(line) > LARGE_PACKET_DATA_MAX)
> - return -1;
> - err = packet_write_fmt_gently(fd, "%s\n", line);
> - if (err)
> - return err;
> - line = va_arg(args, const char*);
> - }
> - va_end(args);
> - return packet_flush_gently(fd);
> -}
> -
> static int packet_write_gently(const int fd_out, const char *buf, size_t size)
> {
> static char packet_write_buffer[LARGE_PACKET_MAX];
> diff --git a/pkt-line.h b/pkt-line.h
> index 450183b64..66ef610fc 100644
> --- a/pkt-line.h
> +++ b/pkt-line.h
> @@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
> void packet_buf_write(struct strbuf *buf, const char *fmt, ...)
> __attribute__((format (printf, 2, 3)));
> int packet_flush_gently(int fd);
> int packet_write_fmt_gently(int fd, const char *fmt, ...)
> __attribute__((format (printf, 2, 3)));
> -LAST_ARG_MUST_BE_NULL
> -int packet_writel(int fd, const char *line, ...);
> int write_packetized_from_fd(int fd_in, int fd_out);
> int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
>
> diff --git a/sub-process.c b/sub-process.c
> index 6cbffa440..37b4bd0ad 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -108,3 +108,106 @@ int subprocess_start(struct hashmap *hashmap, struct
> subprocess_entry *entry, co
> hashmap_add(hashmap, entry);
> return 0;
> }
> +
> +static int handshake_version(struct child_process *process,
> + const char *welcome_prefix, int *versions,
Maybe it would be less ambiguous if we call it `supported_versions` ?
> + int *chosen_version)
> +{
> + int version_scratch;
> + int i;
> + char *line;
> + const char *p;
> +
> + if (!chosen_version)
> + chosen_version = &version_scratch;
I am not an C expert but wouldn't 'version_scratch' go out of scope as soon
as the function returns? Why don't you error here right away?
> + if (packet_write_fmt_gently(process->in, "%s-client\n",
> + welcome_prefix))
> + return error("Could not write client identification");
Nit: Would it make sense to rename `welcome_prefix` to `client_id`?
Alternatively, could we rename the error messages to "welcome prefix"?
> + for (i = 0; versions[i]; i++) {
> + if (packet_write_fmt_gently(process->in, "version=%d\n",
> + versions[i]))
> + return error("Could not write requested version");
Maybe: "Could not write supported versions"?
> + }
> + if (packet_flush_gently(process->in))
> + return error("Could not write flush packet");
I feel this error is too generic.
Maybe: "Could not finish writing supported versions"?
> +
> + if (!(line = packet_read_line(process->out, NULL)) ||
> + !skip_prefix(line, welcome_prefix, &p) ||
> + strcmp(p, "-server"))
> + return error("Unexpected line '%s', expected %s-server",
> + line ? line : "<flush packet>", welcome_prefix);
> + if (!(line = packet_read_line(process->out, NULL)) ||
> + !skip_prefix(line, "version=", &p) ||
> + strtol_i(p, 10, chosen_version))
Maybe `strlen("version=")` would be more clear than 10?
> + return error("Unexpected line '%s', expected version",
Maybe "... expected version number" ?
> + line ? line : "<flush packet>");
> + if ((line = packet_read_line(process->out, NULL)))
> + return error("Unexpected line '%s', expected flush", line);
> +
> + /* Check to make sure that the version received is supported */
> + for (i = 0; versions[i]; i++) {
> + if (versions[i] == *chosen_version)
> + break;
> + }
> + if (!versions[i])
> + return error("Version %d not supported", *chosen_version);
> +
> + return 0;
> +}
> +
> +static int handshake_capabilities(struct child_process *process,
> + struct subprocess_capability *capabilities,
> + unsigned int *supported_capabilities)
I feel the naming could be misleading. I think ...
`capabilities` is really `supported_capabilities`
and
`supported_capabilities` is really `negiotated_capabilties` or
`agreed_capabilites`
> +{
> + int i;
> + char *line;
> +
> + for (i = 0; capabilities[i].name; i++) {
> + if (packet_write_fmt_gently(process->in, "capability=%s\n",
> + capabilities[i].name))
> + return error("Could not write requested capability");
I think this should be "Could not write supported capability", no?
> + }
> + if (packet_flush_gently(process->in))
> + return error("Could not write flush packet");
Maybe " "Could not finish writing supported capability" ?
> + while ((line = packet_read_line(process->out, NULL))) {
> + const char *p;
> + if (!skip_prefix(line, "capability=", &p))
> + continue;
Shouldn't we write an error in this case?
> + for (i = 0;
> + capabilities[i].name && strcmp(p, capabilities[i].name);
> + i++)
> + ;
> + if (capabilities[i].name) {
> + if (supported_capabilities)
> + *supported_capabilities |= capabilities[i].flag;
> + } else {
> + warning("external filter requested unsupported filter
> capability '%s'",
> + p);
> + }
> + }
> +
> + return 0;
> +}
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> + const char *welcome_prefix,
> + int *versions,
> + int *chosen_version,
> + struct subprocess_capability *capabilities,
> + unsigned int *supported_capabilities) {
> + int retval;
> + struct child_process *process = &entry->process;
> +
> + sigchain_push(SIGPIPE, SIG_IGN);
> +
> + retval = handshake_version(process, welcome_prefix, versions,
> + chosen_version) ||
> + handshake_capabilities(process, capabilities,
> + supported_capabilities);
> +
> + sigchain_pop(SIGPIPE);
> + return retval;
> +}
> diff --git a/sub-process.h b/sub-process.h
> index d37c1499a..6857eb1b5 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -29,6 +29,16 @@ struct subprocess_entry {
> struct child_process process;
> };
>
> +struct subprocess_capability {
> + const char *name;
> +
> + /*
> + * subprocess_handshake will "|=" this value to supported_capabilities
> + * if the server reports that it supports this capability.
> + */
> + unsigned int flag;
> +};
> +
> /* subprocess functions */
>
> /* Function to test two subprocess hashmap entries for equality. */
> @@ -62,6 +72,22 @@ static inline struct child_process
> *subprocess_get_child_process(
> return &entry->process;
> }
>
> +/*
> + * Perform the version and capability negotiation as described in the "Long
> + * Running Filter Process" section of the gitattributes documentation using
> the
> + * given requested versions and capabilities. The "versions" and
> "capabilities"
> + * parameters are arrays terminated by a 0 or blank struct.
Should we reference the "gitattributes docs" if we want to make this generic?
I thought "technical/api-sub-process.txt" would explain this kind of stuff
and I was surprised that you deleted it in an earlier patch.
> + *
> + * This function is typically called when a subprocess is started (as part of
> + * the "startfn" passed to subprocess_start).
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> + const char *welcome_prefix,
> + int *versions,
> + int *chosen_version,
> + struct subprocess_capability *capabilities,
> + unsigned int *supported_capabilities);
> +
> /*
> * Helper function that will read packets looking for "status=<foo>"
> * key/value pairs and return the value from the last "status" packet
> diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
> index eb3d83744..46f8e583c 100755
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must
> fail (and not hang!)' '
>
> cp "$TEST_ROOT/test.o" test.r &&
> test_must_fail git add . 2>git-stderr.log &&
> - grep "does not support filter protocol version" git-stderr.log
> + grep "expected git-filter-server" git-stderr.log
> )
> '
>
The generalization of this protocol is nice to see.
Thanks for working on it,
Lars