Stefan Beller <sbel...@google.com> writes:

> +static int next_byte(const char **cp, const char **endp,
> +                  const struct diff_options *diffopt)
> +{
> +     int retval;
> +
> +     if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) {
> +             while (*endp > *cp && isspace(**endp))
> +                     (*endp)--;
> +     }

This should be done by the callers (both moved_entry_cmp() and
get_string_hash()) before starting to iterate over the bytes from
the beginning, no?

> +     if (*cp > *endp)
> +             return -1;
> +
> +     if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) {
> +             while (*cp < *endp && isspace(**cp))
> +                     (*cp)++;
> +             /*
> +              * After skipping a couple of whitespaces, we still have to
> +              * account for one space.
> +              */
> +             return (int)' ';
> +     }
> +
> +     if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) {
> +             while (*cp < *endp && isspace(**cp))
> +                     (*cp)++;
> +             /* return the first non-ws character via the usual below */
> +     }
> +
> +     retval = **cp;

The char could be signed, and byte 0xff may become indistinguishable
from the EOF (i.e. -1) you returned earlier.

> +     (*cp)++; /* advance */
> +     return retval;
> +}
> +
> +static int moved_entry_cmp(const struct moved_entry *a,
> +                        const struct moved_entry *b,
> +                        const void *keydata,
> +                        const void *data)
> +{
> +     const struct diff_options *diffopt = data;
> +     const char *ap = a->es->line, *ae = a->es->line + a->es->len;
> +     const char *bp = b->es->line, *be = b->es->line + b->es->len;
> +
> +     if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS))
> +             return a->es->len != b->es->len  || memcmp(ap, bp, a->es->len);
> +
> +     while (1) {
> +             int ca, cb;
> +             ca = next_byte(&ap, &ae, diffopt);
> +             cb = next_byte(&bp, &be, diffopt);
> +             if (ca != cb)
> +                     return 1; /* differs */
> +             if (!ca)

Shouldn't this check for "ca == -1", as we are not dealing with NUL
terminated string but a <ptr, len> thing?

> +                     return 0;
> +     };
> +}
> +
> +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct 
> diff_options *o)
> +{
> +     if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
> +             static struct strbuf sb = STRBUF_INIT;
> +             const char *ap = es->line, *ae = es->line + es->len;
> +             int c;
> +
> +             strbuf_reset(&sb);
> +             while ((c = next_byte(&ap, &ae, o)) > 0)
> +                     strbuf_addch(&sb, c);
> +
> +             return memhash(sb.buf, sb.len);
> +     } else {
> +             return memhash(es->line, es->len);
> +     }
> +}

Reply via email to