Jeff King <[email protected]> writes:
> +static void strbuf_add_varint(struct strbuf *out, uintmax_t val)
> +{
> + size_t len;
> + strbuf_grow(out, 16); /* enough for any varint */
> + len = encode_varint(val, (unsigned char *)out->buf + out->len);
> + strbuf_setlen(out, out->len + len);
> +}
> +
> +static void bitmap_to_rle(struct strbuf *out, struct bitmap *bitmap)
> +{
> + int curval = 0; /* count zeroes, then ones, then zeroes, etc */
> + size_t run = 0;
> + size_t word;
> + size_t orig_len = out->len;
> +
> + for (word = 0; word < bitmap->word_alloc; word++) {
> + int bit;
> +
> + for (bit = 0; bit < BITS_IN_EWORD; bit++) {
> + int val = !!(bitmap->words[word] & (((eword_t)1) <<
> bit));
> + if (val == curval)
> + run++;
> + else {
> + strbuf_add_varint(out, run);
> + curval = 1 - curval; /* flip 0/1 */
> + run = 1;
> + }
> + }
OK. I find it a bit disturbing to see that the loop knows a bit too
much about how "struct bitmap" is implemented, but that is a complaint
against the bitmap API, not this new user of the API.
We do not try to handle the case where bitmap has bits that is not
multiple of BITS_IN_EWORD and instead pretend that size of such a
bitmap can be rounded up, because we ignore trailing 0-bit anyway,
and we know the "struct bitmap" would pad with 0-bit at the tail?
> + }
> +
> + /*
> + * complete the run, but do not bother with trailing zeroes, unless we
> + * failed to write even an initial run of 0's.
> + */
> + if (curval && run)
> + strbuf_add_varint(out, run);
> + else if (orig_len == out->len)
> + strbuf_add_varint(out, 0);
> +
> + /* signal end-of-input with an empty run */
> + strbuf_add_varint(out, 0);
> +}
OK.
> +static size_t rle_each_bit(const unsigned char *in, size_t len,
> + void (*fn)(size_t, void *), void *data)
> +{
> + int curval = 0; /* look for zeroes first, then ones, etc */
> + const unsigned char *cur = in;
> + const unsigned char *end = in + len;
> + size_t pos;
> +
> + /* we always have a first run, even if it's 0 zeroes */
> + pos = decode_varint(&cur);
> +
> + /*
> + * ugh, varint does not seem to have a way to prevent reading past
> + * the end of the buffer. We'll do a length check after each one,
> + * so the worst case is bounded.
> + */
Sorry about that :-).
> + if (cur > end) {
> + error("input underflow in rle");
> + return len;
> + }
> +
> + while (1) {
> + size_t run = decode_varint(&cur);
> +
> + if (cur > end) {
> + error("input underflow in rle");
> + return len;
> + }
> +
> + if (!run)
> + break; /* empty run signals end */
> +
> + curval = 1 - curval; /* flip 0/1 */
> + if (curval) {
> + /* we have a run of 1's; deliver them */
> + size_t i;
> + for (i = 0; i < run; i++)
> + fn(pos + i, data);
> + }
> + pos += run;
> + }
> +
> + return cur - in;
> +}
Makes sense.