Author: rhuijben Date: Mon Oct 19 10:51:21 2015 New Revision: 1709385 URL: http://svn.apache.org/viewvc?rev=1709385&view=rev Log: Following up on r1709335, cleanup the implementation a bit and implement the encoding step. Reorder and rename arguments to more closely follow other serf functions.
* serf-dev/dev/buckets/hpack_buckets.c (EOS_CHAR): New define. (serf__hpack_huffman_decode): Update documentation and arguments. Explicitly handle corner cases exactly as specified by the RFC. (serf__hpack_huffman_encode): New function. * serf-dev/dev/serf_private.h (serf__hpack_huffman_decode): Tweak arguments. (serf__hpack_huffman_encode): New function. * serf-dev/dev/test/test_buckets.c (test_hpack_huffman_decode): Update caller. Extend with more corner cases. (test_hpack_huffman_encode): New function. (test_buckets): Add test_hpack_huffman_encode. Modified: serf/trunk/buckets/hpack_buckets.c serf/trunk/serf_private.h serf/trunk/test/test_buckets.c Modified: serf/trunk/buckets/hpack_buckets.c URL: http://svn.apache.org/viewvc/serf/trunk/buckets/hpack_buckets.c?rev=1709385&r1=1709384&r2=1709385&view=diff ============================================================================== --- serf/trunk/buckets/hpack_buckets.c (original) +++ serf/trunk/buckets/hpack_buckets.c Mon Oct 19 10:51:21 2015 @@ -27,6 +27,7 @@ #include "serf_private.h" #include "hpack_huffman.inc" +#define EOS_CHAR (256) /* Callback for bsearch() */ static int @@ -47,34 +48,37 @@ hpack_hm_compare(const void *k, return 0; } -/* Convert raw data in RAW of size RAW_LEN. If RESULT is not NULL, - put the result in the buffer pointed to by RESULT which is of size - BUF_LEN. Sets *RESULT_LEN to the resulting length. - - If RESULT is not large enough return APR_EINVAL. - */ +/* Convert encoded data in ENCODED of size ENCODED_LEN. If TEXT is not + NULL, put the result in the buffer pointed to by TEXT which is of size + TEXT_AVAIL. Sets *TEXT_LEN to the resulting length. + + If TEXT_AVAIL allows it a final '\0' is added at TEXT[*TEXT_LEN]. + + If TEXT is not large enough return APR_ENOMEM. If ENCODED isn't properly + encoded return APR_EINVAL. + */ apr_status_t -serf__hpack_huffman_decode(const unsigned char *raw, - apr_size_t raw_len, - char *result, - apr_size_t buf_len, - apr_size_t *result_len) +serf__hpack_huffman_decode(const unsigned char *encoded, + apr_size_t encoded_len, + apr_size_t text_avail, + char *text, + apr_size_t *text_len) { apr_uint64_t stash = 0; apr_int16_t bits_left = 0; - *result_len = 0; + apr_size_t result_len = 0; - while (raw_len || bits_left) + while (encoded_len || bits_left) { apr_uint32_t match; struct serf_hpack_huffman_item_t *r; - while (bits_left < 30 && raw_len) + while (bits_left < 30 && encoded_len) { - stash |= (apr_uint64_t)*raw << (64 - 8 - bits_left); + stash |= (apr_uint64_t)*encoded << (64 - 8 - bits_left); bits_left += 8; - raw_len--; - raw++; + encoded_len--; + encoded++; } match = stash >> 32; @@ -82,33 +86,156 @@ serf__hpack_huffman_decode(const unsigne sizeof(serf_hpack_hm_map) / sizeof(serf_hpack_hm_map[0]), sizeof(serf_hpack_hm_map[0]), hpack_hm_compare); - if (!r) + if (!r || (r->bits > bits_left)) { - if (!raw_len) - break; - else - return SERF_ERROR_HTTP2_PROTOCOL_ERROR; + /* With a canonical huffman code we can only reach this state + at the end of the string */ + break; } - else if (r->bits > bits_left) - break; - if (result) + if (r->cval == EOS_CHAR) + return APR_EINVAL; + + if (text) { - if (*result_len < buf_len) - result[*result_len] = (char)r->cval; + if (result_len < text_avail) + text[result_len] = (char)r->cval; else - return APR_EINVAL; + return APR_ENOMEM; } - (*result_len)++; + result_len++; stash <<= r->bits; - if (bits_left ) bits_left -= r->bits; } - if (result && *result_len < buf_len) - result[*result_len] = 0; + if (bits_left) + { + /* https://tools.ietf.org/html/rfc7541#section-5.2 + + Upon decoding, an incomplete code at the end of the encoded data is + to be considered as padding and discarded. A padding strictly longer + than 7 bits MUST be treated as a decoding error. A padding not + corresponding to the most significant bits of the code for the EOS + symbol MUST be treated as a decoding error. A Huffman-encoded string + literal containing the EOS symbol MUST be treated as a decoding + error. */ + const struct serf_hpack_huffman_item_t *eos; + apr_uint64_t exp_stash; + + eos = &serf_hpack_hm_map[serf_hpack_hm_rmap[EOS_CHAR]]; + + /* Trim EOS value to the bits we need */ + exp_stash = ((apr_uint32_t)eos->hval >> (32 - bits_left)); + /* And move it to the right position */ + exp_stash <<= 64 - bits_left; + + if (exp_stash != stash) + return APR_EINVAL; + } + + *text_len = result_len; + if (text && result_len < text_avail) + text[result_len] = 0; return APR_SUCCESS; } +/* Encodes data in TEXT of size TEXT_LEN. + + Sets ENCODED_LEN to the required length. + + If ENCODED is not NULL, it specifies an output buffer of size + ENCODED_AVAIL into which the data will be encoded. + + If ENCODE is not NULL and the data doesn't fit returns APR_ENOMEM. + */ +apr_status_t +serf__hpack_huffman_encode(const char *text, + apr_size_t text_len, + apr_size_t encoded_avail, + unsigned char *encoded, + apr_size_t *encoded_len) +{ + apr_uint64_t stash = 0; + apr_int16_t bits_left = 0; + apr_size_t result_len = 0; + + if (! encoded) + { + /* Just calculating size needed. Avoid bit handling */ + apr_int64_t result_bits = 0; + while (text_len) + { + const struct serf_hpack_huffman_item_t *r; + + r = &serf_hpack_hm_map[serf_hpack_hm_rmap[*(unsigned char*)text]]; + + result_bits += r->bits; + text_len--; + text++; + } + + *encoded_len = (apr_size_t)((result_bits+7) / 8); + return APR_SUCCESS; + } + + while (text_len) + { + if (text_len) + { + const struct serf_hpack_huffman_item_t *r; + + r = &serf_hpack_hm_map[serf_hpack_hm_rmap[*(unsigned char*)text]]; + + stash |= (apr_uint64_t)r->hval << (64 - 32 - bits_left); + bits_left += r->bits; + text_len--; + text++; + } + + while (bits_left > 8) + { + if (! encoded_avail) + return APR_ENOMEM; + + *encoded = (unsigned char)(stash >> (64 - 8)); + encoded++; + stash <<= 8; + bits_left -= 8; + + encoded_avail--; + result_len++; + } + } + + if (bits_left) + { + /* https://tools.ietf.org/html/rfc7541#section-5.2 + + As the Huffman-encoded data doesn't always end at an octet boundary, + some padding is inserted after it, up to the next octet boundary. To + prevent this padding from being misinterpreted as part of the string + literal, the most significant bits of the code corresponding to the + EOS (end-of-string) symbol are used. */ + const struct serf_hpack_huffman_item_t *r; + + if (! encoded_avail) + return APR_ENOMEM; + + r = &serf_hpack_hm_map[serf_hpack_hm_rmap[EOS_CHAR]]; + stash |= (apr_uint64_t)r->hval << (64 - 32 - bits_left); + /* bits_left += r->bits; */ + + *encoded = (unsigned char)(stash >> (64 - 8)); + /* encoded++ */ + /* stash <<= 8; */ + /* bits_left -= 8; */ + /* encoded_avail--; */ + result_len++; + } + + *encoded_len = result_len; + + return APR_SUCCESS; +} Modified: serf/trunk/serf_private.h URL: http://svn.apache.org/viewvc/serf/trunk/serf_private.h?rev=1709385&r1=1709384&r2=1709385&view=diff ============================================================================== --- serf/trunk/serf_private.h (original) +++ serf/trunk/serf_private.h Mon Oct 19 10:51:21 2015 @@ -524,11 +524,17 @@ serf_bucket_t *serf__bucket_log_wrapper_ void serf__http2_protocol_init(serf_connection_t *conn); /* From http2_hpack_buckets.c */ -apr_status_t serf__hpack_huffman_decode(const unsigned char *raw, - apr_size_t raw_len, - char *result, - apr_size_t buf_len, - apr_size_t *result_len); +apr_status_t serf__hpack_huffman_decode(const unsigned char *encoded, + apr_size_t encoded_len, + apr_size_t text_avail, + char *text, + apr_size_t *text_len); + +apr_status_t serf__hpack_huffman_encode(const char *text, + apr_size_t text_len, + apr_size_t encoded_avail, + unsigned char *encoded, + apr_size_t *encoded_len); /** Logging functions. **/ Modified: serf/trunk/test/test_buckets.c URL: http://svn.apache.org/viewvc/serf/trunk/test/test_buckets.c?rev=1709385&r1=1709384&r2=1709385&view=diff ============================================================================== --- serf/trunk/test/test_buckets.c (original) +++ serf/trunk/test/test_buckets.c Mon Oct 19 10:51:21 2015 @@ -1939,68 +1939,191 @@ static void test_hpack_huffman_decode(Cu "\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31" "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50" "\x07"; + const unsigned char preD[] = "\0\0\0\0\0"; CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre1, sizeof(pre1) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "www.example.com", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre2, sizeof(pre2) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "no-cache", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre3, sizeof(pre3) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "custom-key", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre4, sizeof(pre4) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "custom-value", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre5, sizeof(pre5) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "302", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre6, sizeof(pre6) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "private", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre7, sizeof(pre7) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "Mon, 21 Oct 2013 20:13:21 GMT", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre8, sizeof(pre8) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "https://www.example.com", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre9, sizeof(pre9) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "307", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preA, sizeof(preA) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "Mon, 21 Oct 2013 20:13:22 GMT", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preB, sizeof(preB) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "gzip", result); + CuAssertIntEquals(tc, strlen(result), len); CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preC, sizeof(preC) - 1, - result, sizeof(result), + sizeof(result), result, &len)); CuAssertStrEquals(tc, "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; " "version=1", result); + CuAssertIntEquals(tc, strlen(result), len); + + CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preD, sizeof(preD) - 1, + sizeof(result), result, + &len)); + CuAssertStrEquals(tc, "00000000", result); + CuAssertIntEquals(tc, strlen(result), len); + + /* And now check some corner cases as specified in the RFC: + + The remaining bits must be filled out with the prefix of EOS */ + CuAssertIntEquals(tc, 0, + serf__hpack_huffman_decode((const unsigned char*)"\x07", 1, + sizeof(result), result, + &len)); + CuAssertStrEquals(tc, "0", result); + CuAssertIntEquals(tc, 1, len); + + CuAssertIntEquals(tc, APR_EINVAL, + serf__hpack_huffman_decode((const unsigned char*)"\x06", 1, + sizeof(result), + result, &len)); + CuAssertIntEquals(tc, APR_EINVAL, + serf__hpack_huffman_decode((const unsigned char*)"\x01", 1, + sizeof(result), + result, &len)); + + /* EOS may not appear itself */ + CuAssertIntEquals(tc, APR_EINVAL, + serf__hpack_huffman_decode((const unsigned char*) + "\xFF\xFF\xFF\xFF", 4, + sizeof(result), result, &len)); +} + +#define VERIFY_REVERSE(x) \ + do \ + { \ + const char *v = (x); \ + apr_size_t sz2; \ + CuAssertIntEquals(tc, 0, \ + serf__hpack_huffman_encode(v, strlen(v), \ + sizeof(encoded), \ + encoded, &encoded_len)); \ + CuAssertIntEquals(tc, 0, \ + serf__hpack_huffman_encode(v, strlen(v), \ + 0, NULL, &sz2)); \ + CuAssertIntEquals(tc, encoded_len, sz2); \ + CuAssertIntEquals(tc, 0, \ + serf__hpack_huffman_decode(encoded, encoded_len, \ + sizeof(text), text, \ + &text_len)); \ + CuAssertStrEquals(tc, v, text); \ + CuAssertIntEquals(tc, strlen(v), text_len); \ + } \ + while(0) + +static void test_hpack_huffman_encode(CuTest *tc) +{ + unsigned char encoded[1024]; + char text[1024]; + apr_size_t encoded_len; + apr_size_t text_len; + + VERIFY_REVERSE("1234567890"); + VERIFY_REVERSE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + + { + char from[256]; + int i; + + for (i = 0; i < sizeof(from); i++) + from[i] = (char)i; + + CuAssertIntEquals(tc, 0, + serf__hpack_huffman_encode(from, sizeof(from), + sizeof(encoded), + encoded, &encoded_len)); + /* Nice.. need 583 bytes to encode these 256 bytes :-) */ + CuAssertIntEquals(tc, 583, encoded_len); + text[256] = 0xFE; + CuAssertIntEquals(tc, 0, + serf__hpack_huffman_decode(encoded, encoded_len, + sizeof(text), text, + &text_len)); + CuAssertIntEquals(tc, 256, text_len); + CuAssertIntEquals(tc, 0, memcmp(from, text, sizeof(from))); + /* If there is space in the buffer serf__hpack_huffman_decode will add + a final '\0' after the buffer */ + CuAssertIntEquals(tc, 0, text[256]); + + for (i = 0; i < sizeof(from); i++) + from[i] = '0'; + + CuAssertIntEquals(tc, 0, + serf__hpack_huffman_encode(from, sizeof(from), + sizeof(encoded), + encoded, &encoded_len)); + /* Ok, 160 to encode 256. Maybe there is some use case */ + CuAssertIntEquals(tc, 160, encoded_len); + text[256] = 0xEF; + CuAssertIntEquals(tc, 0, + serf__hpack_huffman_decode(encoded, encoded_len, + sizeof(text), text, + &text_len)); + CuAssertIntEquals(tc, 256, text_len); + CuAssertIntEquals(tc, 0, memcmp(from, text, sizeof(from))); + /* If there is space in the buffer serf__hpack_huffman_decode will add + a final '\0' after the buffer */ + CuAssertIntEquals(tc, 0, text[256]); + } } +#undef VERIFY_REVERSE CuSuite *test_buckets(void) { @@ -2034,6 +2157,7 @@ CuSuite *test_buckets(void) SUITE_ADD_TEST(suite, test_http2_unframe_buckets); SUITE_ADD_TEST(suite, test_http2_unpad_buckets); SUITE_ADD_TEST(suite, test_hpack_huffman_decode); + SUITE_ADD_TEST(suite, test_hpack_huffman_encode); #if 0 /* This test for issue #152 takes a lot of time generating 4GB+ of random data so it's disabled by default. */