[hackers] [libgrapheme] Split test/test.c into three separate tests || Laslo Hunhold

git Sun, 18 Oct 2020 13:20:49 -0700

commit 009498ac0fc3744a7bc5cc1afb5f601e445442be
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Sun Oct 18 22:20:31 2020 +0200
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Sun Oct 18 22:20:31 2020 +0200


    Split test/test.c into three separate tests
    
    The test-infrastructure needed a bit of preparation, but now it makes
    sense to split the single test.c into its three parts, making it easier
    to handle and reason about.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/Makefile b/Makefile
index 7b4663d..b02a347 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@
 include config.mk
 
 LIB = src/boundary src/codepoint src/grapheme
-TEST = test/test
+TEST = test/grapheme_break test/utf8-decode test/utf8-encode
 DATA = data/gbp data/emo data/gbt
 
 MAN3 = man/grapheme_bytelen.3
@@ -24,12 +24,16 @@ data/util.o: data/util.c config.mk data/util.h
 src/boundary.o: src/boundary.c config.mk data/emo.h data/gbp.h grapheme.h
 src/codepoint.o: src/codepoint.c config.mk grapheme.h
 src/grapheme.o: src/grapheme.c config.mk grapheme.h
-test/test.o: test/test.c config.mk data/gbt.h grapheme.h
+test/grapheme_break.o: test/grapheme_break.c config.mk data/gbt.h grapheme.h
+test/utf8-encode.o: test/utf8-encode.c config.mk grapheme.h
+test/utf8-decode.o: test/utf8-decode.c config.mk grapheme.h
 
 data/gbp: data/gbp.o data/util.o
 data/emo: data/emo.o data/util.o
 data/gbt: data/gbt.o data/util.o
-test/test: test/test.o $(LIB:=.o)
+test/grapheme_break: test/grapheme_break.o $(LIB:=.o)
+test/utf8-encode: test/utf8-encode.o $(LIB:=.o)
+test/utf8-decode: test/utf8-decode.o $(LIB:=.o)
 
 data/gbp.txt:
        wget -O $@ 
https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
diff --git a/test/grapheme_break.c b/test/grapheme_break.c
new file mode 100644
index 0000000..3bd48a5
--- /dev/null
+++ b/test/grapheme_break.c
@@ -0,0 +1,41 @@
+/* See LICENSE file for copyright and license details. */
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../grapheme.h"
+#include "../data/gbt.h"
+
+#define LEN(x) (sizeof(x) / sizeof(*x))
+
+int
+main(void)
+{
+       int state;
+       size_t i, j, k, len, failed;
+
+       /* grapheme break test */
+       for (i = 0, failed = 0; i < LEN(t); i++) {
+               for (j = 0, k = 0, state = 0, len = 1; j < t[i].cplen; j++) {
+                       if ((j + 1) == t[i].cplen ||
+                           grapheme_boundary(t[i].cp[j], t[i].cp[j + 1],
+                                             &state)) {
+                               /* check if our resulting length matches */
+                               if (k == t[i].lenlen || len != t[i].len[k++]) {
+                                       fprintf(stderr, "Failed \"%s\"\n",
+                                               t[i].descr);
+                                       failed++;
+                                       break;
+                               }
+                               len = 1;
+                       } else {
+                               len++;
+                       }
+               }
+       }
+       printf("Grapheme break test: Passed %zu out of %zu tests.\n",
+              LEN(t) - failed, LEN(t));
+
+       return (failed > 0) ? 1 : 0;
+}
diff --git a/test/test.c b/test/utf8-decode.c
similarity index 69%
rename from test/test.c
rename to test/utf8-decode.c
index 82613a1..8349f39 100644
--- a/test/test.c
+++ b/test/utf8-decode.c
@@ -5,53 +5,9 @@
 #include <string.h>
 
 #include "../grapheme.h"
-#include "../data/gbt.h"
 
 #define LEN(x) (sizeof(x) / sizeof(*x))
 
-static const struct {
-       uint32_t cp;      /* input code point */
-       uint8_t *exp_arr; /* expected UTF-8 byte sequence */
-       size_t   exp_len; /* expected length of UTF-8 sequence */
-} enc_test[] = {
-       {
-               /* invalid code point (UTF-16 surrogate half) */
-               .cp      = UINT32_C(0xD800),
-               .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
-               .exp_len = 3,
-       },
-       {
-               /* invalid code point (UTF-16-unrepresentable) */
-               .cp      = UINT32_C(0x110000),
-               .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
-               .exp_len = 3,
-       },
-       {
-               /* code point encoded to a 1-byte sequence */
-               .cp      = 0x01,
-               .exp_arr = (uint8_t[]){ 0x01 },
-               .exp_len = 1,
-       },
-       {
-               /* code point encoded to a 2-byte sequence */
-               .cp      = 0xFF,
-               .exp_arr = (uint8_t[]){ 0xC3, 0xBF },
-               .exp_len = 2,
-       },
-       {
-               /* code point encoded to a 3-byte sequence */
-               .cp      = 0xFFF,
-               .exp_arr = (uint8_t[]){ 0xE0, 0xBF, 0xBF },
-               .exp_len = 3,
-       },
-       {
-               /* code point encoded to a 4-byte sequence */
-               .cp      = UINT32_C(0xFFFFF),
-               .exp_arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0xBF },
-               .exp_len = 4,
-       },
-};
-
 static const struct {
        uint8_t *arr;     /* UTF-8 byte sequence */
        size_t   len;     /* length of UTF-8 byte sequence */
@@ -293,40 +249,7 @@ static const struct {
 int
 main(void)
 {
-       int state;
-       size_t i, j, k, len, failed;
-
-       /* UTF-8 encoder test */
-       for (i = 0, failed = 0; i < LEN(enc_test); i++) {
-               uint8_t arr[4];
-               size_t len;
-
-               len = grapheme_cp_encode(enc_test[i].cp, arr, LEN(arr));
-
-               if (len != enc_test[i].exp_len ||
-                   memcmp(arr, enc_test[i].exp_arr, len)) {
-                       fprintf(stderr, "Failed UTF-8-encoder test %zu: "
-                               "Expected (", i);
-                       for (j = 0; j < enc_test[i].exp_len; j++) {
-                               fprintf(stderr, "0x%x",
-                                       enc_test[i].exp_arr[j]);
-                               if (j + 1 < enc_test[i].exp_len) {
-                                       fprintf(stderr, " ");
-                               }
-                       }
-                       fprintf(stderr, "), but got (");
-                       for (j = 0; j < len; j++) {
-                               fprintf(stderr, "0x%x", arr[j]);
-                               if (j + 1 < len) {
-                                       fprintf(stderr, " ");
-                               }
-                       }
-                       fprintf(stderr, ")\n");
-                       failed++;
-               }
-       }
-       printf("UTF-8 encoder test: Passed %zu out of %zu tests.\n",
-              LEN(enc_test) - failed, LEN(enc_test));
+       size_t i, failed;
 
        /* UTF-8 decoder test */
        for (i = 0, failed = 0; i < LEN(dec_test); i++) {
@@ -348,27 +271,5 @@ main(void)
        printf("UTF-8 decoder test: Passed %zu out of %zu tests.\n",
               LEN(dec_test) - failed, LEN(dec_test));
 
-       /* grapheme break test */
-       for (i = 0, failed = 0; i < LEN(t); i++) {
-               for (j = 0, k = 0, state = 0, len = 1; j < t[i].cplen; j++) {
-                       if ((j + 1) == t[i].cplen ||
-                           grapheme_boundary(t[i].cp[j], t[i].cp[j + 1],
-                                             &state)) {
-                               /* check if our resulting length matches */
-                               if (k == t[i].lenlen || len != t[i].len[k++]) {
-                                       fprintf(stderr, "Failed \"%s\"\n",
-                                               t[i].descr);
-                                       failed++;
-                                       break;
-                               }
-                               len = 1;
-                       } else {
-                               len++;
-                       }
-               }
-       }
-       printf("Grapheme break test: Passed %zu out of %zu tests.\n",
-              LEN(t) - failed, LEN(t));
-
        return (failed > 0) ? 1 : 0;
 }
diff --git a/test/utf8-encode.c b/test/utf8-encode.c
new file mode 100644
index 0000000..7851d25
--- /dev/null
+++ b/test/utf8-encode.c
@@ -0,0 +1,92 @@
+/* See LICENSE file for copyright and license details. */
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../grapheme.h"
+
+#define LEN(x) (sizeof(x) / sizeof(*x))
+
+static const struct {
+       uint32_t cp;      /* input code point */
+       uint8_t *exp_arr; /* expected UTF-8 byte sequence */
+       size_t   exp_len; /* expected length of UTF-8 sequence */
+} enc_test[] = {
+       {
+               /* invalid code point (UTF-16 surrogate half) */
+               .cp      = UINT32_C(0xD800),
+               .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
+               .exp_len = 3,
+       },
+       {
+               /* invalid code point (UTF-16-unrepresentable) */
+               .cp      = UINT32_C(0x110000),
+               .exp_arr = (uint8_t[]){ 0xEF, 0xBF, 0xBD },
+               .exp_len = 3,
+       },
+       {
+               /* code point encoded to a 1-byte sequence */
+               .cp      = 0x01,
+               .exp_arr = (uint8_t[]){ 0x01 },
+               .exp_len = 1,
+       },
+       {
+               /* code point encoded to a 2-byte sequence */
+               .cp      = 0xFF,
+               .exp_arr = (uint8_t[]){ 0xC3, 0xBF },
+               .exp_len = 2,
+       },
+       {
+               /* code point encoded to a 3-byte sequence */
+               .cp      = 0xFFF,
+               .exp_arr = (uint8_t[]){ 0xE0, 0xBF, 0xBF },
+               .exp_len = 3,
+       },
+       {
+               /* code point encoded to a 4-byte sequence */
+               .cp      = UINT32_C(0xFFFFF),
+               .exp_arr = (uint8_t[]){ 0xF3, 0xBF, 0xBF, 0xBF },
+               .exp_len = 4,
+       },
+};
+
+int
+main(void)
+{
+       size_t i, j, failed;
+
+       /* UTF-8 encoder test */
+       for (i = 0, failed = 0; i < LEN(enc_test); i++) {
+               uint8_t arr[4];
+               size_t len;
+
+               len = grapheme_cp_encode(enc_test[i].cp, arr, LEN(arr));
+
+               if (len != enc_test[i].exp_len ||
+                   memcmp(arr, enc_test[i].exp_arr, len)) {
+                       fprintf(stderr, "Failed UTF-8-encoder test %zu: "
+                               "Expected (", i);
+                       for (j = 0; j < enc_test[i].exp_len; j++) {
+                               fprintf(stderr, "0x%x",
+                                       enc_test[i].exp_arr[j]);
+                               if (j + 1 < enc_test[i].exp_len) {
+                                       fprintf(stderr, " ");
+                               }
+                       }
+                       fprintf(stderr, "), but got (");
+                       for (j = 0; j < len; j++) {
+                               fprintf(stderr, "0x%x", arr[j]);
+                               if (j + 1 < len) {
+                                       fprintf(stderr, " ");
+                               }
+                       }
+                       fprintf(stderr, ")\n");
+                       failed++;
+               }
+       }
+       printf("UTF-8 encoder test: Passed %zu out of %zu tests.\n",
+              LEN(enc_test) - failed, LEN(enc_test));
+
+       return (failed > 0) ? 1 : 0;
+}

[hackers] [libgrapheme] Split test/test.c into three separate tests || Laslo Hunhold

Reply via email to