Stefan Beller <[email protected]> writes:
> +static int next_byte(const char **cp, const char **endp,
> + const struct diff_options *diffopt)
> +{
> + int retval;
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_AT_EOL)) {
> + while (*endp > *cp && isspace(**endp))
> + (*endp)--;
> + }
This should be done by the callers (both moved_entry_cmp() and
get_string_hash()) before starting to iterate over the bytes from
the beginning, no?
> + if (*cp > *endp)
> + return -1;
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE_CHANGE)) {
> + while (*cp < *endp && isspace(**cp))
> + (*cp)++;
> + /*
> + * After skipping a couple of whitespaces, we still have to
> + * account for one space.
> + */
> + return (int)' ';
> + }
> +
> + if (DIFF_XDL_TST(diffopt, IGNORE_WHITESPACE)) {
> + while (*cp < *endp && isspace(**cp))
> + (*cp)++;
> + /* return the first non-ws character via the usual below */
> + }
> +
> + retval = **cp;
The char could be signed, and byte 0xff may become indistinguishable
from the EOF (i.e. -1) you returned earlier.
> + (*cp)++; /* advance */
> + return retval;
> +}
> +
> +static int moved_entry_cmp(const struct moved_entry *a,
> + const struct moved_entry *b,
> + const void *keydata,
> + const void *data)
> +{
> + const struct diff_options *diffopt = data;
> + const char *ap = a->es->line, *ae = a->es->line + a->es->len;
> + const char *bp = b->es->line, *be = b->es->line + b->es->len;
> +
> + if (!(diffopt->xdl_opts & XDF_WHITESPACE_FLAGS))
> + return a->es->len != b->es->len || memcmp(ap, bp, a->es->len);
> +
> + while (1) {
> + int ca, cb;
> + ca = next_byte(&ap, &ae, diffopt);
> + cb = next_byte(&bp, &be, diffopt);
> + if (ca != cb)
> + return 1; /* differs */
> + if (!ca)
Shouldn't this check for "ca == -1", as we are not dealing with NUL
terminated string but a <ptr, len> thing?
> + return 0;
> + };
> +}
> +
> +static unsigned get_string_hash(struct emitted_diff_symbol *es, struct
> diff_options *o)
> +{
> + if (o->xdl_opts & XDF_WHITESPACE_FLAGS) {
> + static struct strbuf sb = STRBUF_INIT;
> + const char *ap = es->line, *ae = es->line + es->len;
> + int c;
> +
> + strbuf_reset(&sb);
> + while ((c = next_byte(&ap, &ae, o)) > 0)
> + strbuf_addch(&sb, c);
> +
> + return memhash(sb.buf, sb.len);
> + } else {
> + return memhash(es->line, es->len);
> + }
> +}