[hackers] [libgrapheme] Add case-conversion-unit-tests || Laslo Hunhold

git Wed, 21 Sep 2022 11:19:47 -0700

commit e63bcc42010176b300feea6a7412f814a6cc4191
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Wed Sep 21 20:18:12 2022 +0200
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Wed Sep 21 20:18:12 2022 +0200


    Add case-conversion-unit-tests
    
    To give even more assurance and catch any possible future regressions,
    exhaustive unit tests are added for the case-conversion functions.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/Makefile b/Makefile
index eacd8c3..d325c1b 100644
--- a/Makefile
+++ b/Makefile
@@ -53,6 +53,7 @@ SRC =\
        src/word\
 
 TEST =\
+       test/case\
        test/character\
        test/line\
        test/sentence\
@@ -160,6 +161,7 @@ src/sentence.o: src/sentence.c config.mk gen/sentence.h 
grapheme.h src/util.h
 src/utf8.o: src/utf8.c config.mk grapheme.h
 src/util.o: src/util.c config.mk gen/types.h grapheme.h src/util.h
 src/word.o: src/word.c config.mk gen/word.h grapheme.h src/util.h
+test/case.o: test/case.c config.mk grapheme.h test/util.h
 test/character.o: test/character.c config.mk gen/character-test.h grapheme.h 
test/util.h
 test/line.o: test/line.c config.mk gen/line-test.h grapheme.h test/util.h
 test/sentence.o: test/sentence.c config.mk gen/sentence-test.h grapheme.h 
test/util.h
@@ -183,6 +185,7 @@ gen/sentence: gen/sentence.o gen/util.o
 gen/sentence-test: gen/sentence-test.o gen/util.o
 gen/word: gen/word.o gen/util.o
 gen/word-test: gen/word-test.o gen/util.o
+test/case: test/case.o test/util.o libgrapheme.a
 test/character: test/character.o test/util.o libgrapheme.a
 test/line: test/line.o test/util.o libgrapheme.a
 test/sentence: test/sentence.o test/util.o libgrapheme.a
diff --git a/test/case.c b/test/case.c
new file mode 100644
index 0000000..ba2e729
--- /dev/null
+++ b/test/case.c
@@ -0,0 +1,329 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../grapheme.h"
+#include "util.h"
+
+struct unit_test_to_case_utf8 {
+       const char *description;
+       struct {
+               const char *src;
+               size_t srclen;
+               size_t destlen;
+       } input;
+       struct {
+               const char *dest;
+               size_t ret;
+       } output;
+};
+
+struct unit_test_to_case_utf8 lowercase_utf8[] = {
+       {
+               .description = "empty input",
+               .input =  { "", 0, 10 },
+               .output = { "", 0 },
+       },
+       {
+               .description = "empty output",
+               .input =  { "hello", 5, 0 },
+               .output = { "", 5 },
+       },
+       {
+               .description = "one character, conversion",
+               .input =  { "A", 1, 10 },
+               .output = { "a", 1 },
+       },
+       {
+               .description = "one character, no conversion",
+               .input =  { "a", 1, 10 },
+               .output = { "a", 1 },
+       },
+       {
+               .description = "one character, conversion, truncation",
+               .input =  { "A", 1, 0 },
+               .output = { "", 1 },
+       },
+       {
+               .description = "one character, conversion, NUL-terminated",
+               .input =  { "A", SIZE_MAX, 10 },
+               .output = { "a", 1 },
+       },
+       {
+               .description = "one character, no conversion, NUL-terminated",
+               .input =  { "a", SIZE_MAX, 10 },
+               .output = { "a", 1 },
+       },
+       {
+               .description = "one character, conversion, NUL-terminated, 
truncation",
+               .input =  { "A", SIZE_MAX, 0 },
+               .output = { "", 1 },
+       },
+       {
+               .description = "one word, conversion",
+               .input =  { "wOrD", 4, 10 },
+               .output = { "word", 4 },
+       },
+       {
+               .description = "one word, no conversion",
+               .input =  { "word", 4, 10 },
+               .output = { "word", 4 },
+       },
+       {
+               .description = "one word, conversion, truncation",
+               .input =  { "wOrD", 4, 3 },
+               .output = { "wo", 4 },
+       },
+       {
+               .description = "one word, conversion, NUL-terminated",
+               .input =  { "wOrD", SIZE_MAX, 10 },
+               .output = { "word", 4 },
+       },
+       {
+               .description = "one word, no conversion, NUL-terminated",
+               .input =  { "word", SIZE_MAX, 10 },
+               .output = { "word", 4 },
+       },
+       {
+               .description = "one word, conversion, NUL-terminated, 
truncation",
+               .input =  { "wOrD", SIZE_MAX, 3 },
+               .output = { "wo", 4 },
+       },
+};
+
+struct unit_test_to_case_utf8 uppercase_utf8[] = {
+       {
+               .description = "empty input",
+               .input =  { "", 0, 10 },
+               .output = { "", 0 },
+       },
+       {
+               .description = "empty output",
+               .input =  { "hello", 5, 0 },
+               .output = { "", 5 },
+       },
+       {
+               .description = "one character, conversion",
+               .input =  { "a", 1, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, no conversion",
+               .input =  { "A", 1, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, conversion, truncation",
+               .input =  { "a", 1, 0 },
+               .output = { "", 1 },
+       },
+       {
+               .description = "one character, conversion, NUL-terminated",
+               .input =  { "a", SIZE_MAX, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, no conversion, NUL-terminated",
+               .input =  { "A", SIZE_MAX, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, conversion, NUL-terminated, 
truncation",
+               .input =  { "a", SIZE_MAX, 0 },
+               .output = { "", 1 },
+       },
+       {
+               .description = "one word, conversion",
+               .input =  { "wOrD", 4, 10 },
+               .output = { "WORD", 4 },
+       },
+       {
+               .description = "one word, no conversion",
+               .input =  { "WORD", 4, 10 },
+               .output = { "WORD", 4 },
+       },
+       {
+               .description = "one word, conversion, truncation",
+               .input =  { "wOrD", 4, 3 },
+               .output = { "WO", 4 },
+       },
+       {
+               .description = "one word, conversion, NUL-terminated",
+               .input =  { "wOrD", SIZE_MAX, 10 },
+               .output = { "WORD", 4 },
+       },
+       {
+               .description = "one word, no conversion, NUL-terminated",
+               .input =  { "WORD", SIZE_MAX, 10 },
+               .output = { "WORD", 4 },
+       },
+       {
+               .description = "one word, conversion, NUL-terminated, 
truncation",
+               .input =  { "wOrD", SIZE_MAX, 3 },
+               .output = { "WO", 4 },
+       },
+};
+
+struct unit_test_to_case_utf8 titlecase_utf8[] = {
+       {
+               .description = "empty input",
+               .input =  { "", 0, 10 },
+               .output = { "", 0 },
+       },
+       {
+               .description = "empty output",
+               .input =  { "hello", 5, 0 },
+               .output = { "", 5 },
+       },
+       {
+               .description = "one character, conversion",
+               .input =  { "a", 1, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, no conversion",
+               .input =  { "A", 1, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, conversion, truncation",
+               .input =  { "a", 1, 0 },
+               .output = { "", 1 },
+       },
+       {
+               .description = "one character, conversion, NUL-terminated",
+               .input =  { "a", SIZE_MAX, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, no conversion, NUL-terminated",
+               .input =  { "A", SIZE_MAX, 10 },
+               .output = { "A", 1 },
+       },
+       {
+               .description = "one character, conversion, NUL-terminated, 
truncation",
+               .input =  { "a", SIZE_MAX, 0 },
+               .output = { "", 1 },
+       },
+       {
+               .description = "one word, conversion",
+               .input =  { "heLlo", 5, 10 },
+               .output = { "Hello", 5 },
+       },
+       {
+               .description = "one word, no conversion",
+               .input =  { "Hello", 5, 10 },
+               .output = { "Hello", 5 },
+       },
+       {
+               .description = "one word, conversion, truncation",
+               .input =  { "heLlo", 5, 2 },
+               .output = { "H", 5 },
+       },
+       {
+               .description = "one word, conversion, NUL-terminated",
+               .input =  { "heLlo", SIZE_MAX, 10 },
+               .output = { "Hello", 5 },
+       },
+       {
+               .description = "one word, no conversion, NUL-terminated",
+               .input =  { "Hello", SIZE_MAX, 10 },
+               .output = { "Hello", 5 },
+       },
+       {
+               .description = "one word, conversion, NUL-terminated, 
truncation",
+               .input =  { "heLlo", SIZE_MAX, 3 },
+               .output = { "He", 5 },
+       },
+       {
+               .description = "two words, conversion",
+               .input =  { "heLlo wORLd!", 12, 20 },
+               .output = { "Hello World!", 12 },
+       },
+       {
+               .description = "two words, no conversion",
+               .input =  { "Hello World!", 12, 20 },
+               .output = { "Hello World!", 12 },
+       },
+       {
+               .description = "two words, conversion, truncation",
+               .input =  { "heLlo wORLd!", 12, 8 },
+               .output = { "Hello W", 12 },
+       },
+       {
+               .description = "two words, conversion, NUL-terminated",
+               .input =  { "heLlo wORLd!", SIZE_MAX, 20 },
+               .output = { "Hello World!", 12 },
+       },
+       {
+               .description = "two words, no conversion, NUL-terminated",
+               .input =  { "Hello World!", SIZE_MAX, 20 },
+               .output = { "Hello World!", 12 },
+       },
+       {
+               .description = "two words, conversion, NUL-terminated, 
truncation",
+               .input =  { "heLlo wORLd!", SIZE_MAX, 4 },
+               .output = { "Hel", 12 },
+       },
+};
+
+static int
+unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const 
char *argv0)
+{
+       struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 
*)t + off;
+       size_t ret = 0, i;
+       char buf[512];
+
+       /* fill the array with canary values */
+       memset(buf, 0x7f, LEN(buf));
+
+       if (t == lowercase_utf8) {
+               ret = grapheme_to_lowercase_utf8(test->input.src, 
test->input.srclen,
+                                                buf, test->input.destlen);
+       } else if (t == uppercase_utf8) {
+               ret = grapheme_to_uppercase_utf8(test->input.src, 
test->input.srclen,
+                                                buf, test->input.destlen);
+       } else if (t == titlecase_utf8) {
+               ret = grapheme_to_titlecase_utf8(test->input.src, 
test->input.srclen,
+                                                buf, test->input.destlen);
+       } else {
+               goto err;
+       }
+
+       /* check results */
+       if (ret != test->output.ret ||
+           memcmp(buf, test->output.dest, MIN(test->input.destlen, 
test->output.ret))) {
+               goto err;
+       }
+
+       /* check that none of the canary values have been overwritten */
+       for (i = test->input.destlen; i < LEN(buf); i++) {
+               if (buf[i] != 0x7f) {
+fprintf(stderr, "REEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE\n");
+                       goto err;
+               }
+       }
+
+       return 0;
+err:
+       fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
+               "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", 
argv0,
+               name, off, test->description, (int)ret, buf, ret,
+               (int)test->output.ret, test->output.dest, test->output.ret);
+       return 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+       (void)argc;
+
+       return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8,
+                             LEN(lowercase_utf8), 
"grapheme_to_lowercase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8,
+                             LEN(uppercase_utf8), 
"grapheme_to_uppercase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8,
+                             LEN(titlecase_utf8), 
"grapheme_to_titlecase_utf8", argv[0]);
+}
diff --git a/test/util.c b/test/util.c
index 992eca9..d6c0de1 100644
--- a/test/util.c
+++ b/test/util.c
@@ -23,7 +23,7 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, 
size_t),
                        /* check if our resulting offset matches */
                        if (j == test[i].lenlen ||
                            res != test[i].len[j++]) {
-                               fprintf(stderr, "%s: Failed test %zu \"%s\".\n",
+                               fprintf(stderr, "%s: Failed conformance test 
%zu \"%s\".\n",
                                        argv0, i, test[i].descr);
                                fprintf(stderr, "J=%zu: EXPECTED len %zu, got 
%zu\n", j-1, test[i].len[j-1], res);
                                failed++;
@@ -31,7 +31,24 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, 
size_t),
                        }
                }
        }
-       printf("%s: %zu/%zu tests passed.\n", argv0,
+       printf("%s: %zu/%zu conformance tests passed.\n", argv0,
+              testlen - failed, testlen);
+
+       return (failed > 0) ? 1 : 0;
+}
+
+int
+run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
+               const char *), void *test, size_t testlen, const char *name,
+               const char *argv0)
+{
+       size_t i, failed;
+
+       for (i = 0, failed = 0; i < testlen; i++) {
+               failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 
1;
+       }
+
+       printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
               testlen - failed, testlen);
 
        return (failed > 0) ? 1 : 0;
diff --git a/test/util.h b/test/util.h
index 1c6e18f..e6577a1 100644
--- a/test/util.h
+++ b/test/util.h
@@ -5,10 +5,15 @@
 #include "../gen/types.h"
 #include "../grapheme.h"
 
+#undef MIN
+#define MIN(x,y)  ((x) < (y) ? (x) : (y))
+#undef LEN
 #define LEN(x) (sizeof(x) / sizeof(*(x)))
 
 int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
                     const struct break_test *test, size_t testlen,
                     const char *);
+int run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
+                   const char *), void *, size_t, const char *, const char *);
 
 #endif /* UTIL_H */

[hackers] [libgrapheme] Add case-conversion-unit-tests || Laslo Hunhold

Reply via email to