On Fri, Sep 20, 2013 at 12:32:26AM +0300, Michael S. Tsirkin wrote:
> On Tue, Sep 17, 2013 at 04:56:16PM -0400, Jeff King wrote:
> > On Tue, Sep 17, 2013 at 11:38:07PM +0300, Michael S. Tsirkin wrote:
> >
> > > > A problem with both schemes, though, is that they are not
> > > > backwards-compatible with existing git-patch-id implementations.
> > >
> > > Could you clarify?
> > > We never send patch IDs on the wire - how isn't this compatible?
> >
> > I meant that you might be comparing patch-ids generated by different
> > implementations, or across time. There are no dedicated tools to do so,
> > but it is very easy to do so with standard tools like "join".
> >
> > For example, you can do:
> >
> > patch_ids() {
> > git rev-list "$1" |
> > git diff-tree --stdin -p |
> > git patch-id |
> > sort
> > }
> >
> > patch_ids origin..topic1 >us
> > patch_ids origin..topic2 >them
> >
> > join us them | cut -d' ' -f2-3
> >
> > to get a list of correlated commits between two branches. If the "them"
> > was on another machine with a different implementation (or is saved from
> > an earlier time), your patch-ids would not match.
> >
> > It may be esoteric enough not to worry about, though. By far the most
> > common use of patch-ids is going to be in a single "rev-list
> > --cherry-pick" situation where you are trying to omit commits during
> > a rebase.
> >
> > I am mostly thinking of the problems we had with the "kup" tool, which
> > expected stability across diffs that would be signed by both kernel.org.
> > But as far as I know, they do not use patch-id. More details in case you
> > are curious (including me arguing that we should not care, and it is
> > kup's problem!) are here:
> >
> > http://thread.gmane.org/gmane.comp.version-control.git/192331/focus=192424
> >
> > rerere is mentioned in that thread, but I believe that it does its own
> > hash, and does not rely on patch-id.
> >
> > -Peff
>
> OK so far I came up with the following.
> Needs documentation and tests obviously.
> But besides that -
> would something like this be enough to address the
> issue Junio raised?
Ping. Junio could you comment on this please?
> --->
>
> patch-id: make it more stable
>
> Add a new patch-id algorithm making it stable against
> hunk reodering:
> - prepend header to each hunk (if not there)
> - calculate SHA1 hash for each hunk separately
> - sum all hashes to get patch id
>
> Add --order-sensitive to get historical unstable behaviour.
>
> Signed-off-by: Michael S. Tsirkin <[email protected]>
> ---
> builtin/patch-id.c | 65
> +++++++++++++++++++++++++++++++++++++++++-------------
> 1 file changed, 50 insertions(+), 15 deletions(-)
>
> diff --git a/builtin/patch-id.c b/builtin/patch-id.c
> index 3cfe02d..d1902ff 100644
> --- a/builtin/patch-id.c
> +++ b/builtin/patch-id.c
> @@ -1,17 +1,14 @@
> #include "builtin.h"
>
> -static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c)
> +static void flush_current_id(int patchlen, unsigned char *id, unsigned char
> *result)
> {
> - unsigned char result[20];
> char name[50];
>
> if (!patchlen)
> return;
>
> - git_SHA1_Final(result, c);
> memcpy(name, sha1_to_hex(id), 41);
> printf("%s %s\n", sha1_to_hex(result), name);
> - git_SHA1_Init(c);
> }
>
> static int remove_space(char *line)
> @@ -56,10 +53,30 @@ static int scan_hunk_header(const char *p, int *p_before,
> int *p_after)
> return 1;
> }
>
> -static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx,
> struct strbuf *line_buf)
> +static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx)
> {
> - int patchlen = 0, found_next = 0;
> + unsigned char hash[20];
> + unsigned short carry = 0;
> + int i;
> +
> + git_SHA1_Final(hash, ctx);
> + git_SHA1_Init(ctx);
> + /* 20-byte sum, with carry */
> + for (i = 0; i < 20; ++i) {
> + carry += result[i] + hash[i];
> + result[i] = carry;
> + carry >>= 8;
> + }
> +}
> +static int get_one_patchid(unsigned char *next_sha1, unsigned char *result,
> + struct strbuf *line_buf, int stable)
> +{
> + int patchlen = 0, found_next = 0, hunks = 0;
> int before = -1, after = -1;
> + git_SHA_CTX ctx, header_ctx;
> +
> + git_SHA1_Init(&ctx);
> + hashclr(result);
>
> while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) {
> char *line = line_buf->buf;
> @@ -99,6 +116,18 @@ static int get_one_patchid(unsigned char *next_sha1,
> git_SHA_CTX *ctx, struct st
> if (!memcmp(line, "@@ -", 4)) {
> /* Parse next hunk, but ignore line numbers. */
> scan_hunk_header(line, &before, &after);
> + if (stable) {
> + if (hunks) {
> + flush_one_hunk(result, &ctx);
> + memcpy(&ctx, &header_ctx,
> + sizeof ctx);
> + } else {
> + /* Save ctx for next hunk. */
> + memcpy(&header_ctx, &ctx,
> + sizeof ctx);
> + }
> + }
> + hunks++;
> continue;
> }
>
> @@ -108,6 +137,7 @@ static int get_one_patchid(unsigned char *next_sha1,
> git_SHA_CTX *ctx, struct st
>
> /* Else we're parsing another header. */
> before = after = -1;
> + hunks = 0;
> }
>
> /* If we get here, we're inside a hunk. */
> @@ -119,27 +149,27 @@ static int get_one_patchid(unsigned char *next_sha1,
> git_SHA_CTX *ctx, struct st
> /* Compute the sha without whitespace */
> len = remove_space(line);
> patchlen += len;
> - git_SHA1_Update(ctx, line, len);
> + git_SHA1_Update(&ctx, line, len);
> }
>
> if (!found_next)
> hashclr(next_sha1);
>
> + flush_one_hunk(result, &ctx);
> +
> return patchlen;
> }
>
> -static void generate_id_list(void)
> +static void generate_id_list(int stable)
> {
> - unsigned char sha1[20], n[20];
> - git_SHA_CTX ctx;
> + unsigned char sha1[20], n[20], result[20];
> int patchlen;
> struct strbuf line_buf = STRBUF_INIT;
>
> - git_SHA1_Init(&ctx);
> hashclr(sha1);
> while (!feof(stdin)) {
> - patchlen = get_one_patchid(n, &ctx, &line_buf);
> - flush_current_id(patchlen, sha1, &ctx);
> + patchlen = get_one_patchid(n, result, &line_buf, stable);
> + flush_current_id(patchlen, sha1, result);
> hashcpy(sha1, n);
> }
> strbuf_release(&line_buf);
> @@ -149,9 +179,14 @@ static const char patch_id_usage[] = "git patch-id <
> patch";
>
> int cmd_patch_id(int argc, const char **argv, const char *prefix)
> {
> - if (argc != 1)
> + int stable;
> + if (argc == 2 && !strcmp(argv[1], "--order-sensitive"))
> + stable = 0;
> + else if (argc == 1)
> + stable = 1;
> + else
> usage(patch_id_usage);
>
> - generate_id_list();
> + generate_id_list(stable);
> return 0;
> }
> --
> MST
>
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html