test_buckets.c

rhuijben Mon, 19 Oct 2015 03:52:34 -0700

Author: rhuijben
Date: Mon Oct 19 10:51:21 2015
New Revision: 1709385

URL: http://svn.apache.org/viewvc?rev=1709385&view=rev
Log:
Following up on r1709335, cleanup the implementation a bit and implement
the encoding step. Reorder and rename arguments to more closely follow
other serf functions.


* serf-dev/dev/buckets/hpack_buckets.c
  (EOS_CHAR): New define.
  (serf__hpack_huffman_decode): Update documentation and arguments. Explicitly
    handle corner cases exactly as specified by the RFC.
  (serf__hpack_huffman_encode): New function.

* serf-dev/dev/serf_private.h
  (serf__hpack_huffman_decode): Tweak arguments.
  (serf__hpack_huffman_encode): New function.

* serf-dev/dev/test/test_buckets.c
  (test_hpack_huffman_decode): Update caller. Extend with more corner cases.
  (test_hpack_huffman_encode): New function.
  (test_buckets): Add test_hpack_huffman_encode.

Modified:
    serf/trunk/buckets/hpack_buckets.c
    serf/trunk/serf_private.h
    serf/trunk/test/test_buckets.c

Modified: serf/trunk/buckets/hpack_buckets.c
URL: 
http://svn.apache.org/viewvc/serf/trunk/buckets/hpack_buckets.c?rev=1709385&r1=1709384&r2=1709385&view=diff
==============================================================================
--- serf/trunk/buckets/hpack_buckets.c (original)
+++ serf/trunk/buckets/hpack_buckets.c Mon Oct 19 10:51:21 2015
@@ -27,6 +27,7 @@
 #include "serf_private.h"
 
 #include "hpack_huffman.inc"
+#define EOS_CHAR (256)
 
 /* Callback for bsearch() */
 static int
@@ -47,34 +48,37 @@ hpack_hm_compare(const void *k,
     return 0;
 }
 
-/* Convert raw data in RAW of size RAW_LEN. If RESULT is not NULL,
-   put the result in the buffer pointed to by RESULT which is of size
-   BUF_LEN. Sets *RESULT_LEN to the resulting length.
-
-   If RESULT is not large enough return APR_EINVAL.
-   */
+/* Convert encoded data in ENCODED of size ENCODED_LEN. If TEXT is not
+   NULL, put the result in the buffer pointed to by TEXT which is of size
+   TEXT_AVAIL. Sets *TEXT_LEN to the resulting length.
+
+   If TEXT_AVAIL allows it a final '\0' is added at TEXT[*TEXT_LEN].
+
+   If TEXT is not large enough return APR_ENOMEM. If ENCODED isn't properly
+   encoded return APR_EINVAL.
+ */
 apr_status_t
-serf__hpack_huffman_decode(const unsigned char *raw,
-                           apr_size_t raw_len,
-                           char *result,
-                           apr_size_t buf_len,
-                           apr_size_t *result_len)
+serf__hpack_huffman_decode(const unsigned char *encoded,
+                           apr_size_t encoded_len,
+                           apr_size_t text_avail,
+                           char *text,
+                           apr_size_t *text_len)
 {
   apr_uint64_t stash = 0;
   apr_int16_t bits_left = 0;
-  *result_len = 0;
+  apr_size_t result_len = 0;
 
-  while (raw_len || bits_left)
+  while (encoded_len || bits_left)
     {
       apr_uint32_t match;
       struct serf_hpack_huffman_item_t *r;
 
-      while (bits_left < 30 && raw_len)
+      while (bits_left < 30 && encoded_len)
         {
-          stash |= (apr_uint64_t)*raw << (64 - 8 - bits_left);
+          stash |= (apr_uint64_t)*encoded << (64 - 8 - bits_left);
           bits_left += 8;
-          raw_len--;
-          raw++;
+          encoded_len--;
+          encoded++;
         }
 
       match = stash >> 32;
@@ -82,33 +86,156 @@ serf__hpack_huffman_decode(const unsigne
                   sizeof(serf_hpack_hm_map) / sizeof(serf_hpack_hm_map[0]),
                   sizeof(serf_hpack_hm_map[0]), hpack_hm_compare);
 
-      if (!r)
+      if (!r || (r->bits > bits_left))
         {
-          if (!raw_len)
-            break;
-          else
-            return SERF_ERROR_HTTP2_PROTOCOL_ERROR;
+          /* With a canonical huffman code we can only reach this state
+             at the end of the string */
+          break;
         }
-      else if (r->bits > bits_left)
-        break;
 
-      if (result)
+      if (r->cval == EOS_CHAR)
+        return APR_EINVAL;
+
+      if (text)
         {
-          if (*result_len < buf_len)
-            result[*result_len] = (char)r->cval;
+          if (result_len < text_avail)
+            text[result_len] = (char)r->cval;
           else
-            return APR_EINVAL;
+            return APR_ENOMEM;
         }
 
-      (*result_len)++;
+      result_len++;
       stash <<= r->bits;
-      if (bits_left )
       bits_left -= r->bits;
     }
 
-  if (result && *result_len < buf_len)
-    result[*result_len] = 0;
+  if (bits_left)
+    {
+      /* https://tools.ietf.org/html/rfc7541#section-5.2
+
+         Upon decoding, an incomplete code at the end of the encoded data is
+         to be considered as padding and discarded.  A padding strictly longer
+         than 7 bits MUST be treated as a decoding error.  A padding not
+         corresponding to the most significant bits of the code for the EOS
+         symbol MUST be treated as a decoding error.  A Huffman-encoded string
+         literal containing the EOS symbol MUST be treated as a decoding
+         error. */
+      const struct serf_hpack_huffman_item_t *eos;
+      apr_uint64_t exp_stash;
+
+      eos = &serf_hpack_hm_map[serf_hpack_hm_rmap[EOS_CHAR]];
+
+      /* Trim EOS value to the bits we need */
+      exp_stash = ((apr_uint32_t)eos->hval >> (32 - bits_left));
+      /* And move it to the right position */
+      exp_stash <<= 64 - bits_left;
+
+      if (exp_stash != stash)
+        return APR_EINVAL;
+    }
+
+  *text_len = result_len;
+  if (text && result_len < text_avail)
+    text[result_len] = 0;
 
   return APR_SUCCESS;
 }
 
+/* Encodes data in TEXT of size TEXT_LEN.
+
+   Sets ENCODED_LEN to the required length.
+
+   If ENCODED is not NULL, it specifies an output buffer of size
+   ENCODED_AVAIL into which the data will be encoded.
+
+   If ENCODE is not NULL and the data doesn't fit returns APR_ENOMEM.
+ */
+apr_status_t
+serf__hpack_huffman_encode(const char *text,
+                           apr_size_t text_len,
+                           apr_size_t encoded_avail,
+                           unsigned char *encoded,
+                           apr_size_t *encoded_len)
+{
+  apr_uint64_t stash = 0;
+  apr_int16_t bits_left = 0;
+  apr_size_t result_len = 0;
+
+  if (! encoded)
+    {
+      /* Just calculating size needed. Avoid bit handling */
+      apr_int64_t result_bits = 0;
+      while (text_len)
+        {
+          const struct serf_hpack_huffman_item_t *r;
+
+          r = &serf_hpack_hm_map[serf_hpack_hm_rmap[*(unsigned char*)text]];
+
+          result_bits += r->bits;
+          text_len--;
+          text++;
+        }
+
+      *encoded_len = (apr_size_t)((result_bits+7) / 8);
+      return APR_SUCCESS;
+    }
+
+  while (text_len)
+    {
+      if (text_len)
+        {
+          const struct serf_hpack_huffman_item_t *r;
+
+          r = &serf_hpack_hm_map[serf_hpack_hm_rmap[*(unsigned char*)text]];
+
+          stash |= (apr_uint64_t)r->hval << (64 - 32 - bits_left);
+          bits_left += r->bits;
+          text_len--;
+          text++;
+        }
+
+      while (bits_left > 8)
+        {
+          if (! encoded_avail)
+            return APR_ENOMEM;
+
+          *encoded = (unsigned char)(stash >> (64 - 8));
+          encoded++;
+          stash <<= 8;
+          bits_left -= 8;
+
+          encoded_avail--;
+          result_len++;
+        }
+    }
+
+  if (bits_left)
+    {
+      /* https://tools.ietf.org/html/rfc7541#section-5.2
+
+         As the Huffman-encoded data doesn't always end at an octet boundary,
+         some padding is inserted after it, up to the next octet boundary.  To
+         prevent this padding from being misinterpreted as part of the string
+         literal, the most significant bits of the code corresponding to the
+         EOS (end-of-string) symbol are used. */
+      const struct serf_hpack_huffman_item_t *r;
+
+      if (! encoded_avail)
+        return APR_ENOMEM;
+
+      r = &serf_hpack_hm_map[serf_hpack_hm_rmap[EOS_CHAR]];
+      stash |= (apr_uint64_t)r->hval << (64 - 32 - bits_left);
+      /* bits_left += r->bits; */
+
+      *encoded = (unsigned char)(stash >> (64 - 8));
+      /* encoded++ */
+      /* stash <<= 8; */
+      /* bits_left -= 8; */
+      /* encoded_avail--; */
+      result_len++;
+    }
+
+  *encoded_len = result_len;
+
+  return APR_SUCCESS;
+}

Modified: serf/trunk/serf_private.h
URL: 
http://svn.apache.org/viewvc/serf/trunk/serf_private.h?rev=1709385&r1=1709384&r2=1709385&view=diff
==============================================================================
--- serf/trunk/serf_private.h (original)
+++ serf/trunk/serf_private.h Mon Oct 19 10:51:21 2015
@@ -524,11 +524,17 @@ serf_bucket_t *serf__bucket_log_wrapper_
 void serf__http2_protocol_init(serf_connection_t *conn);
 
 /* From http2_hpack_buckets.c */
-apr_status_t serf__hpack_huffman_decode(const unsigned char *raw,
-                                        apr_size_t raw_len,
-                                        char *result,
-                                        apr_size_t buf_len,
-                                        apr_size_t *result_len);
+apr_status_t serf__hpack_huffman_decode(const unsigned char *encoded,
+                                        apr_size_t encoded_len,
+                                        apr_size_t text_avail,
+                                        char *text,
+                                        apr_size_t *text_len);
+
+apr_status_t serf__hpack_huffman_encode(const char *text,
+                                        apr_size_t text_len,
+                                        apr_size_t encoded_avail,
+                                        unsigned char *encoded,
+                                        apr_size_t *encoded_len);
 
 
 /** Logging functions. **/

Modified: serf/trunk/test/test_buckets.c
URL: 
http://svn.apache.org/viewvc/serf/trunk/test/test_buckets.c?rev=1709385&r1=1709384&r2=1709385&view=diff
==============================================================================
--- serf/trunk/test/test_buckets.c (original)
+++ serf/trunk/test/test_buckets.c Mon Oct 19 10:51:21 2015
@@ -1939,68 +1939,191 @@ static void test_hpack_huffman_decode(Cu
                                "\x72\xC1\xAB\x27\x0F\xB5\x29\x1F\x95\x87\x31"
                                "\x60\x65\xC0\x03\xED\x4E\xE5\xB1\x06\x3D\x50"
                                "\x07";
+  const unsigned char preD[] = "\0\0\0\0\0";
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre1, sizeof(pre1) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "www.example.com", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre2, sizeof(pre2) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "no-cache", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre3, sizeof(pre3) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "custom-key", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre4, sizeof(pre4) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "custom-value", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre5, sizeof(pre5) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "302", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre6, sizeof(pre6) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "private", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre7, sizeof(pre7) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "Mon, 21 Oct 2013 20:13:21 GMT", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre8, sizeof(pre8) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "https://www.example.com";, result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(pre9, sizeof(pre9) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "307", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preA, sizeof(preA) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "Mon, 21 Oct 2013 20:13:22 GMT", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preB, sizeof(preB) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "gzip", result);
+  CuAssertIntEquals(tc, strlen(result), len);
 
   CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preC, sizeof(preC) - 1,
-                                                      result, sizeof(result),
+                                                      sizeof(result), result,
                                                       &len));
   CuAssertStrEquals(tc, "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; "
                         "version=1", result);
+  CuAssertIntEquals(tc, strlen(result), len);
+
+  CuAssertIntEquals(tc, 0, serf__hpack_huffman_decode(preD, sizeof(preD) - 1,
+                                                      sizeof(result), result,
+                                                      &len));
+  CuAssertStrEquals(tc, "00000000", result);
+  CuAssertIntEquals(tc, strlen(result), len);
+
+  /* And now check some corner cases as specified in the RFC:
+
+     The remaining bits must be filled out with the prefix of EOS */
+  CuAssertIntEquals(tc, 0,
+                    serf__hpack_huffman_decode((const unsigned char*)"\x07", 1,
+                                               sizeof(result), result,
+                                               &len));
+  CuAssertStrEquals(tc, "0", result);
+  CuAssertIntEquals(tc, 1, len);
+
+  CuAssertIntEquals(tc, APR_EINVAL,
+                    serf__hpack_huffman_decode((const unsigned char*)"\x06", 1,
+                                               sizeof(result),
+                                               result, &len));
+  CuAssertIntEquals(tc, APR_EINVAL,
+                    serf__hpack_huffman_decode((const unsigned char*)"\x01", 1,
+                                               sizeof(result),
+                                               result, &len));
+
+  /* EOS may not appear itself */
+  CuAssertIntEquals(tc, APR_EINVAL,
+                    serf__hpack_huffman_decode((const unsigned char*)
+                                                    "\xFF\xFF\xFF\xFF", 4,
+                                               sizeof(result), result, &len));
+}
+
+#define VERIFY_REVERSE(x)                                                  \
+  do                                                                       \
+   {                                                                       \
+     const char *v = (x);                                                  \
+     apr_size_t sz2;                                                       \
+     CuAssertIntEquals(tc, 0,                                              \
+                       serf__hpack_huffman_encode(v, strlen(v),            \
+                                                  sizeof(encoded),         \
+                                                  encoded, &encoded_len)); \
+     CuAssertIntEquals(tc, 0,                                              \
+                       serf__hpack_huffman_encode(v, strlen(v),            \
+                                                  0, NULL, &sz2));         \
+     CuAssertIntEquals(tc, encoded_len, sz2);                              \
+     CuAssertIntEquals(tc, 0,                                              \
+                       serf__hpack_huffman_decode(encoded, encoded_len,    \
+                                                  sizeof(text), text,      \
+                                                  &text_len));             \
+     CuAssertStrEquals(tc, v, text);                                       \
+     CuAssertIntEquals(tc, strlen(v), text_len);                           \
+   }                                                                       \
+  while(0)
+
+static void test_hpack_huffman_encode(CuTest *tc)
+{
+  unsigned char encoded[1024];
+  char text[1024];
+  apr_size_t encoded_len;
+  apr_size_t text_len;
+
+  VERIFY_REVERSE("1234567890");
+  VERIFY_REVERSE("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+  {
+    char from[256];
+    int i;
+
+    for (i = 0; i < sizeof(from); i++)
+      from[i] = (char)i;
+
+    CuAssertIntEquals(tc, 0,
+                      serf__hpack_huffman_encode(from, sizeof(from),
+                                                 sizeof(encoded),
+                                                 encoded, &encoded_len));
+    /* Nice.. need 583 bytes to encode these 256 bytes :-) */
+    CuAssertIntEquals(tc, 583, encoded_len); 
+    text[256] = 0xFE;
+    CuAssertIntEquals(tc, 0,
+                      serf__hpack_huffman_decode(encoded, encoded_len,
+                                                 sizeof(text), text,
+                                                 &text_len));
+    CuAssertIntEquals(tc, 256, text_len);
+    CuAssertIntEquals(tc, 0, memcmp(from, text, sizeof(from)));
+    /* If there is space in the buffer serf__hpack_huffman_decode will add
+       a final '\0' after the buffer */
+    CuAssertIntEquals(tc, 0, text[256]);
+
+    for (i = 0; i < sizeof(from); i++)
+      from[i] = '0';
+
+    CuAssertIntEquals(tc, 0,
+                      serf__hpack_huffman_encode(from, sizeof(from),
+                                                 sizeof(encoded),
+                                                 encoded, &encoded_len));
+    /* Ok, 160 to encode 256. Maybe there is some use case */
+    CuAssertIntEquals(tc, 160, encoded_len);
+    text[256] = 0xEF;
+    CuAssertIntEquals(tc, 0,
+                      serf__hpack_huffman_decode(encoded, encoded_len,
+                                                 sizeof(text), text,
+                                                 &text_len));
+    CuAssertIntEquals(tc, 256, text_len);
+    CuAssertIntEquals(tc, 0, memcmp(from, text, sizeof(from)));
+    /* If there is space in the buffer serf__hpack_huffman_decode will add
+    a final '\0' after the buffer */
+    CuAssertIntEquals(tc, 0, text[256]);
+  }
 }
+#undef VERIFY_REVERSE
 
 CuSuite *test_buckets(void)
 {
@@ -2034,6 +2157,7 @@ CuSuite *test_buckets(void)
     SUITE_ADD_TEST(suite, test_http2_unframe_buckets);
     SUITE_ADD_TEST(suite, test_http2_unpad_buckets);
     SUITE_ADD_TEST(suite, test_hpack_huffman_decode);
+    SUITE_ADD_TEST(suite, test_hpack_huffman_encode);
 #if 0
     /* This test for issue #152 takes a lot of time generating 4GB+ of random
        data so it's disabled by default. */

svn commit: r1709385 - in /serf/trunk: buckets/hpack_buckets.c serf_private.h test/test_buckets.c

Reply via email to