[hackers] [libgrapheme] Refactor case-checking-functions with Herodotus and add unit tests || Laslo Hunhold

git Sat, 24 Sep 2022 01:37:19 -0700

commit 5dec22a7143e1105f25c7a7626fa166d882367d0
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Sat Sep 24 10:36:15 2022 +0200
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Sat Sep 24 10:36:15 2022 +0200


    Refactor case-checking-functions with Herodotus and add unit tests
    
    Additionally, expand the unit tests with special-casing-cases.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/src/case.c b/src/case.c
index 8bfc8f6..04e504a 100644
--- a/src/case.c
+++ b/src/case.c
@@ -1,4 +1,5 @@
 /* See LICENSE file for copyright and license details. */
+#include <stddef.h>
 #include <stdint.h>
 
 #include "../grapheme.h"
@@ -208,6 +209,7 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
                /* cast the rest of the codepoints in the word to lowercase */
                to_case(r, w, 1, lower_major, lower_minor, lower_special);
 
+               /* remove the limit on the word before the next iteration */
                herodotus_reader_pop_limit(r);
        }
 
@@ -289,20 +291,16 @@ grapheme_to_titlecase_utf8(const char *src, size_t 
srclen, char *dest, size_t de
 }
 
 static inline bool
-is_case(const void *src, size_t srclen,
-        size_t srcnumprocess,
-        size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t 
*),
-        const uint_least16_t *major, const int_least32_t *minor,
-        const struct special_case *sc, size_t *output)
+is_case(HERODOTUS_READER *r, const uint_least16_t *major,
+        const int_least32_t *minor, const struct special_case *sc,
+        size_t *output)
 {
-       size_t srcoff, new_srcoff, tmp, res, off, i;
-       uint_least32_t cp, tmp_cp;
+       size_t off, i;
+       bool ret = true;
+       uint_least32_t cp;
        int_least32_t map;
 
-       for (srcoff = 0; srcoff < srcnumprocess; srcoff = new_srcoff) {
-               /* read in next source codepoint */
-               new_srcoff = srcoff + get_codepoint(src, srclen, srcoff, &cp);
-
+       for (; herodotus_read_codepoint(r, false, &cp) == 
HERODOTUS_STATUS_SUCCESS;) {
                /* get and handle case mapping */
                if (unlikely((map = get_case_offset(cp, major, minor)) >=
                             INT32_C(0x110000))) {
@@ -310,173 +308,162 @@ is_case(const void *src, size_t srclen,
                         * is the difference to 0x110000*/
                        off = (uint_least32_t)map - UINT32_C(0x110000);
 
-                       for (i = 0, tmp = srcoff; i < sc[off].cplen; i++, tmp 
+= res) {
-                               res = get_codepoint(src, srclen, srcoff, 
&tmp_cp);
-                               if (tmp_cp != sc[off].cp[i]) {
-                                       /* we have a difference */
-                                       if (output) {
-                                               *output = tmp;
+                       for (i = 0; i < sc[off].cplen; i++) {
+                               if (herodotus_read_codepoint(r, false, &cp) ==
+                                   HERODOTUS_STATUS_SUCCESS) {
+                                       if (cp != sc[off].cp[i]) {
+                                               ret = false;
+                                               goto done;
+                                       } else {
+                                               /* move forward */
+                                               herodotus_read_codepoint(r, 
true, &cp);
                                        }
-                                       return false;
+                               } else {
+                                       /*
+                                        * input ended and we didn't see
+                                        * any difference so far, so this
+                                        * string is in fact okay
+                                        */
+                                       ret = true;
+                                       goto done;
                                }
                        }
-                       new_srcoff = tmp;
                } else {
                        /* we have a simple mapping */
                        if (cp != (uint_least32_t)((int_least32_t)cp + map)) {
                                /* we have a difference */
-                               if (output) {
-                                       *output = srcoff;
-                               }
-                               return false;
+                               ret = false;
+                               goto done;
+                       } else {
+                               /* move forward */
+                               herodotus_read_codepoint(r, true, &cp);
                        }
                }
        }
-
+done:
        if (output) {
-               *output = srcoff;
+               *output = herodotus_reader_number_read(r);
        }
-       return true;
+       return ret;
 }
 
 static inline bool
-is_titlecase(const void *src, size_t srclen,
-             size_t (*get_codepoint)(const void *, size_t, size_t, 
uint_least32_t *),
-            size_t *output)
+is_titlecase(HERODOTUS_READER *r, size_t *output)
 {
        enum case_property prop;
-       size_t next_wb, srcoff, res, tmp_output;
+       enum herodotus_status s;
+       bool ret = true;
        uint_least32_t cp;
 
-       for (srcoff = 0; ; ) {
-               if (get_codepoint == get_codepoint_utf8) {
-                       if ((next_wb = grapheme_next_word_break_utf8((const 
char *)src + srcoff,
-                                                                    srclen - 
srcoff)) == 0) {
-                               /* we consumed all of the string */
-                               break;
-                       }
-               } else {
-                       if ((next_wb = grapheme_next_word_break((const 
uint_least32_t *)src + srcoff,
-                                                               srclen - 
srcoff)) == 0) {
-                               /* we consumed all of the string */
-                               break;
-                       }
-               }
-
-               for (; next_wb > 0 && srcoff < srclen; next_wb -= res, srcoff 
+= res) {
+       for (;;) {
+               herodotus_reader_push_advance_limit(r, 
herodotus_next_word_break(r));
+               for (; (s = herodotus_read_codepoint(r, false, &cp)) == 
HERODOTUS_STATUS_SUCCESS;) {
                        /* check if we have a cased character */
-                       res = get_codepoint(src, srclen, srcoff, &cp);
                        prop = get_case_property(cp);
                        if (prop == CASE_PROP_CASED ||
                            prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
                                break;
-                       }
-               }
-
-               if (next_wb > 0) {
-                       /* get character length */
-                       res = get_codepoint(src, srclen, srcoff, &cp);
-
-                       /* we have a cased character at srcoff, check if it's 
titlecase */
-                       if (get_codepoint == get_codepoint_utf8) {
-                               if (!is_case((const char *)src + srcoff,
-                                             srclen - srcoff, res,
-                                             get_codepoint_utf8, title_major,
-                                             title_minor, title_special, 
&tmp_output)) {
-                                       if (output) {
-                                               *output = srcoff + tmp_output;
-                                       }
-                                       return false;
-                               }
                        } else {
-                               if (!is_case((const uint_least32_t *)src + 
srcoff,
-                                             srclen - srcoff, res,
-                                             get_codepoint, title_major,
-                                             title_minor, title_special, 
&tmp_output)) {
-                                       if (output) {
-                                               *output = srcoff + tmp_output;
-                                       }
-                                       return false;
-                               }
+                               /* increment reader */
+                               herodotus_read_codepoint(r, true, &cp);
                        }
+               }
 
+               if (s == HERODOTUS_STATUS_END_OF_BUFFER) {
+                       /* we are done */
+                       break;
+               } else if (s == HERODOTUS_STATUS_SOFT_LIMIT_REACHED) {
+                       /*
+                        * we did not encounter any cased character
+                        * up to the word break
+                        */
+                       continue;
+               } else {
                        /*
-                        * we consumed a character (make sure to never
-                        * underflow next_wb; this should not happen,
-                        * but it's better to be sure)
+                        * we encountered a cased character before the word
+                        * break, check if it's titlecase
                         */
-                       srcoff += res;
-                       next_wb -= (res <= next_wb) ? res : next_wb;
+                       herodotus_reader_push_advance_limit(r,
+                               herodotus_reader_next_codepoint_break(r));
+                       if (!is_case(r, title_major, title_minor, 
title_special, NULL)) {
+                               ret = false;
+                               goto done;
+                       }
+                       herodotus_reader_pop_limit(r);
                }
 
                /* check if the rest of the codepoints in the word are 
lowercase */
-               if (get_codepoint == get_codepoint_utf8) {
-                       if (!is_case((const char *)src + srcoff,
-                                     srclen - srcoff, next_wb,
-                                     get_codepoint_utf8, lower_major,
-                                     lower_minor, lower_special, &tmp_output)) 
{
-                               if (output) {
-                                       *output = srcoff + tmp_output;
-                               }
-                               return false;
-                       }
-               } else {
-                       if (!is_case((const uint_least32_t *)src + srcoff,
-                                     srclen - srcoff, next_wb,
-                                     get_codepoint, lower_major,
-                                     lower_minor, lower_special, &tmp_output)) 
{
-                               if (output) {
-                                       *output = srcoff + tmp_output;
-                               }
-                               return false;
-                       }
+               if (!is_case(r, lower_major, lower_minor, lower_special, NULL)) 
{
+                       ret = false;
+                       goto done;
                }
-               srcoff += next_wb;
-       }
 
+               /* remove the limit on the word before the next iteration */
+               herodotus_reader_pop_limit(r);
+       }
+done:
        if (output) {
-               *output = srcoff;
+               *output = herodotus_reader_number_read(r);
        }
-       return true;
+       return ret;
 }
 
 bool
 grapheme_is_uppercase(const uint_least32_t *src, size_t srclen, size_t 
*caselen)
 {
-       return is_case(src, srclen, srclen, get_codepoint,
-                      upper_major, upper_minor, upper_special, caselen);
+       HERODOTUS_READER r;
+
+       herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+       return is_case(&r, upper_major, upper_minor, upper_special, caselen);
 }
 
 bool
 grapheme_is_lowercase(const uint_least32_t *src, size_t srclen, size_t 
*caselen)
 {
-       return is_case(src, srclen, srclen, get_codepoint,
-                      lower_major, lower_minor, lower_special, caselen);
+       HERODOTUS_READER r;
+
+       herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+       return is_case(&r, lower_major, lower_minor, lower_special, caselen);
 }
 
 bool
 grapheme_is_titlecase(const uint_least32_t *src, size_t srclen, size_t 
*caselen)
 {
-       return is_titlecase(src, srclen, get_codepoint, caselen);
+       HERODOTUS_READER r;
+
+       herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
+
+       return is_titlecase(&r, caselen);
 }
 
 bool
 grapheme_is_uppercase_utf8(const char *src, size_t srclen, size_t *caselen)
 {
-       return is_case(src, srclen, srclen, get_codepoint_utf8,
-                      upper_major, upper_minor, upper_special, caselen);
+       HERODOTUS_READER r;
+
+       herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+       return is_case(&r, upper_major, upper_minor, upper_special, caselen);
 }
 
 bool
 grapheme_is_lowercase_utf8(const char *src, size_t srclen, size_t *caselen)
 {
-       return is_case(src, srclen, srclen, get_codepoint_utf8,
-                      lower_major, lower_minor, lower_special, caselen);
+       HERODOTUS_READER r;
+
+       herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
 
+       return is_case(&r, lower_major, lower_minor, lower_special, caselen);
 }
 
 bool
 grapheme_is_titlecase_utf8(const char *src, size_t srclen, size_t *caselen)
 {
-       return is_titlecase(src, srclen, get_codepoint_utf8, caselen);
+       HERODOTUS_READER r;
+
+       herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
+
+       return is_titlecase(&r, caselen);
 }
diff --git a/src/util.h b/src/util.h
index 2ec151c..1b112a2 100644
--- a/src/util.h
+++ b/src/util.h
@@ -79,6 +79,7 @@ void herodotus_reader_init(HERODOTUS_READER *, enum 
herodotus_type,
 void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *);
 void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t);
 void herodotus_reader_pop_limit(HERODOTUS_READER *);
+size_t herodotus_reader_number_read(const HERODOTUS_READER *);
 size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
 size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
 enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, 
uint_least32_t *);
@@ -86,7 +87,7 @@ enum herodotus_status 
herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_le
 void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
                            size_t);
 void herodotus_writer_nul_terminate(HERODOTUS_WRITER *);
-size_t herodotus_writer_number_written(HERODOTUS_WRITER *);
+size_t herodotus_writer_number_written(const HERODOTUS_WRITER *);
 void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t);
 
 size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *);
diff --git a/test/case.c b/test/case.c
index 6df3d90..9f6af31 100644
--- a/test/case.c
+++ b/test/case.c
@@ -7,6 +7,18 @@
 #include "../grapheme.h"
 #include "util.h"
 
+struct unit_test_is_case_utf8 {
+       const char *description;
+       struct {
+               const char *src;
+               size_t srclen;
+       } input;
+       struct {
+               bool ret;
+               size_t caselen;
+       } output;
+};
+
 struct unit_test_to_case_utf8 {
        const char *description;
        struct {
@@ -20,7 +32,201 @@ struct unit_test_to_case_utf8 {
        } output;
 };
 
-static struct unit_test_to_case_utf8 lowercase_utf8[] = {
+static struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
+       {
+               .description = "empty input",
+               .input =  { "", 0 },
+               .output = { true, 0 },
+       },
+       {
+               .description = "one character, violation",
+               .input =  { "A", 1 },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one character, confirmation",
+               .input =  { "\xc3\x9f", 2 },
+               .output = { true, 2 },
+       },
+       {
+               .description = "one character, violation, NUL-terminated",
+               .input =  { "A", SIZE_MAX },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one character, confirmation, NUL-terminated",
+               .input =  { "\xc3\x9f", SIZE_MAX },
+               .output = { true, 2 },
+       },
+       {
+               .description = "one word, violation",
+               .input =  { "Hello", 5 },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one word, partial confirmation",
+               .input =  { "gru" "\xc3\x9f" "fOrmel", 11 },
+               .output = { false, 6 },
+       },
+       {
+               .description = "one word, full confirmation",
+               .input =  { "gru" "\xc3\x9f" "formel", 11 },
+               .output = { true, 11 },
+       },
+       {
+               .description = "one word, violation, NUL-terminated",
+               .input =  { "Hello", SIZE_MAX },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one word, partial confirmation, NUL-terminated",
+               .input =  { "gru" "\xc3\x9f" "fOrmel", SIZE_MAX },
+               .output = { false, 6 },
+       },
+       {
+               .description = "one word, full confirmation, NUL-terminated",
+               .input =  { "gru" "\xc3\x9f" "formel", SIZE_MAX },
+               .output = { true, 11 },
+       },
+};
+
+static struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
+       {
+               .description = "empty input",
+               .input =  { "", 0 },
+               .output = { true, 0 },
+       },
+       {
+               .description = "one character, violation",
+               .input =  { "\xc3\x9f", 2 },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one character, confirmation",
+               .input =  { "A", 1 },
+               .output = { true, 1 },
+       },
+       {
+               .description = "one character, violation, NUL-terminated",
+               .input =  { "\xc3\x9f", SIZE_MAX },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one character, confirmation, NUL-terminated",
+               .input =  { "A", SIZE_MAX },
+               .output = { true, 1 },
+       },
+       {
+               .description = "one word, violation",
+               .input =  { "hello", 5 },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one word, partial confirmation",
+               .input =  { "GRU" "\xc3\x9f" "formel", 11 },
+               .output = { false, 3 },
+       },
+       {
+               .description = "one word, full confirmation",
+               .input =  { "HELLO", 5 },
+               .output = { true, 5 },
+       },
+       {
+               .description = "one word, violation, NUL-terminated",
+               .input =  { "hello", SIZE_MAX },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one word, partial confirmation, NUL-terminated",
+               .input =  { "GRU" "\xc3\x9f" "formel", SIZE_MAX },
+               .output = { false, 3 },
+       },
+       {
+               .description = "one word, full confirmation, NUL-terminated",
+               .input =  { "HELLO", SIZE_MAX },
+               .output = { true, 5 },
+       },
+};
+
+static struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
+       {
+               .description = "empty input",
+               .input =  { "", 0 },
+               .output = { true, 0 },
+       },
+       {
+               .description = "one character, violation",
+               .input =  { "\xc3\x9f", 2 },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one character, confirmation",
+               .input =  { "A", 1 },
+               .output = { true, 1 },
+       },
+       {
+               .description = "one character, violation, NUL-terminated",
+               .input =  { "\xc3\x9f", SIZE_MAX },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one character, confirmation, NUL-terminated",
+               .input =  { "A", SIZE_MAX },
+               .output = { true, 1 },
+       },
+       {
+               .description = "one word, violation",
+               .input =  { "hello", 5 },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one word, partial confirmation",
+               .input =  { "Gru" "\xc3\x9f" "fOrmel", 11 },
+               .output = { false, 6 },
+       },
+       {
+               .description = "one word, full confirmation",
+               .input =  { "Gru" "\xc3\x9f" "formel", 11 },
+               .output = { true, 11 },
+       },
+       {
+               .description = "one word, violation, NUL-terminated",
+               .input =  { "hello", SIZE_MAX },
+               .output = { false, 0 },
+       },
+       {
+               .description = "one word, partial confirmation, NUL-terminated",
+               .input =  { "Gru" "\xc3\x9f" "fOrmel", SIZE_MAX },
+               .output = { false, 6 },
+       },
+       {
+               .description = "one word, full confirmation, NUL-terminated",
+               .input =  { "Gru" "\xc3\x9f" "formel", SIZE_MAX },
+               .output = { true, 11 },
+       },
+       {
+               .description = "multiple words, partial confirmation",
+               .input =  { "Hello Gru" "\xc3\x9f" "fOrmel!", 18 },
+               .output = { false, 12 },
+       },
+       {
+               .description = "multiple words, full confirmation",
+               .input =  { "Hello Gru" "\xc3\x9f" "formel!", 18 },
+               .output = { true, 18 },
+       },
+       {
+               .description = "multiple words, partial confirmation, 
NUL-terminated",
+               .input =  { "Hello Gru" "\xc3\x9f" "fOrmel!", SIZE_MAX },
+               .output = { false, 12 },
+       },
+       {
+               .description = "multiple words, full confirmation, 
NUL-terminated",
+               .input =  { "Hello Gru" "\xc3\x9f" "formel!", SIZE_MAX },
+               .output = { true, 18 },
+       },
+};
+
+static struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
        {
                .description = "empty input",
                .input =  { "", 0, 10 },
@@ -38,8 +244,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
        },
        {
                .description = "one character, no conversion",
-               .input =  { "a", 1, 10 },
-               .output = { "a", 1 },
+               .input =  { "\xc3\x9f", 2, 10 },
+               .output = { "\xc3\x9f", 2 },
        },
        {
                .description = "one character, conversion, truncation",
@@ -53,8 +259,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
        },
        {
                .description = "one character, no conversion, NUL-terminated",
-               .input =  { "a", SIZE_MAX, 10 },
-               .output = { "a", 1 },
+               .input =  { "\xc3\x9f", SIZE_MAX, 10 },
+               .output = { "\xc3\x9f", 2 },
        },
        {
                .description = "one character, conversion, NUL-terminated, 
truncation",
@@ -93,7 +299,7 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
        },
 };
 
-static struct unit_test_to_case_utf8 uppercase_utf8[] = {
+static struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
        {
                .description = "empty input",
                .input =  { "", 0, 10 },
@@ -106,8 +312,8 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
        },
        {
                .description = "one character, conversion",
-               .input =  { "a", 1, 10 },
-               .output = { "A", 1 },
+               .input =  { "\xc3\x9f", 2, 10 },
+               .output = { "SS", 2 },
        },
        {
                .description = "one character, no conversion",
@@ -116,13 +322,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
        },
        {
                .description = "one character, conversion, truncation",
-               .input =  { "a", 1, 0 },
-               .output = { "", 1 },
+               .input =  { "\xc3\x9f", 2, 0 },
+               .output = { "", 2 },
        },
        {
                .description = "one character, conversion, NUL-terminated",
-               .input =  { "a", SIZE_MAX, 10 },
-               .output = { "A", 1 },
+               .input =  { "\xc3\x9f", SIZE_MAX, 10 },
+               .output = { "SS", 2 },
        },
        {
                .description = "one character, no conversion, NUL-terminated",
@@ -131,13 +337,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
        },
        {
                .description = "one character, conversion, NUL-terminated, 
truncation",
-               .input =  { "a", SIZE_MAX, 0 },
-               .output = { "", 1 },
+               .input =  { "\xc3\x9f", SIZE_MAX, 0 },
+               .output = { "", 2 },
        },
        {
                .description = "one word, conversion",
-               .input =  { "wOrD", 4, 10 },
-               .output = { "WORD", 4 },
+               .input =  { "gRu" "\xc3\x9f" "fOrMel", 11, 15 },
+               .output = { "GRUSSFORMEL", 11 },
        },
        {
                .description = "one word, no conversion",
@@ -146,13 +352,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
        },
        {
                .description = "one word, conversion, truncation",
-               .input =  { "wOrD", 4, 3 },
-               .output = { "WO", 4 },
+               .input =  { "gRu" "\xc3\x9f" "formel", 11, 5 },
+               .output = { "GRUS", 11 },
        },
        {
                .description = "one word, conversion, NUL-terminated",
-               .input =  { "wOrD", SIZE_MAX, 10 },
-               .output = { "WORD", 4 },
+               .input =  { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 15 },
+               .output = { "GRUSSFORMEL", 11 },
        },
        {
                .description = "one word, no conversion, NUL-terminated",
@@ -161,12 +367,12 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
        },
        {
                .description = "one word, conversion, NUL-terminated, 
truncation",
-               .input =  { "wOrD", SIZE_MAX, 3 },
-               .output = { "WO", 4 },
+               .input =  { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 5 },
+               .output = { "GRUS", 11 },
        },
 };
 
-static struct unit_test_to_case_utf8 titlecase_utf8[] = {
+static struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
        {
                .description = "empty input",
                .input =  { "", 0, 10 },
@@ -269,6 +475,42 @@ static struct unit_test_to_case_utf8 titlecase_utf8[] = {
        },
 };
 
+static int
+unit_test_callback_is_case_utf8(void *t, size_t off, const char *name, const 
char *argv0)
+{
+       struct unit_test_is_case_utf8 *test = (struct unit_test_is_case_utf8 
*)t + off;
+       bool ret = false;
+       size_t caselen = 0x7f;
+
+       if (t == is_lowercase_utf8) {
+               ret = grapheme_is_lowercase_utf8(test->input.src, 
test->input.srclen,
+                                                &caselen);
+       } else if (t == is_uppercase_utf8) {
+               ret = grapheme_is_uppercase_utf8(test->input.src, 
test->input.srclen,
+                                                &caselen);
+       } else if (t == is_titlecase_utf8) {
+               ret = grapheme_is_titlecase_utf8(test->input.src, 
test->input.srclen,
+                                                &caselen);
+
+       } else {
+               goto err;
+       }
+
+       /* check results */
+       if (ret != test->output.ret || caselen != test->output.caselen) {
+               goto err;
+       }
+
+       return 0;
+err:
+       fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
+               "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
+               name, off, test->description, ret ? "true" : "false",
+               caselen, test->output.ret ? "true" : "false",
+               test->output.caselen);
+       return 1;
+}
+
 static int
 unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const 
char *argv0)
 {
@@ -279,13 +521,13 @@ unit_test_callback_to_case_utf8(void *t, size_t off, 
const char *name, const cha
        /* fill the array with canary values */
        memset(buf, 0x7f, LEN(buf));
 
-       if (t == lowercase_utf8) {
+       if (t == to_lowercase_utf8) {
                ret = grapheme_to_lowercase_utf8(test->input.src, 
test->input.srclen,
                                                 buf, test->input.destlen);
-       } else if (t == uppercase_utf8) {
+       } else if (t == to_uppercase_utf8) {
                ret = grapheme_to_uppercase_utf8(test->input.src, 
test->input.srclen,
                                                 buf, test->input.destlen);
-       } else if (t == titlecase_utf8) {
+       } else if (t == to_titlecase_utf8) {
                ret = grapheme_to_titlecase_utf8(test->input.src, 
test->input.srclen,
                                                 buf, test->input.destlen);
        } else {
@@ -319,10 +561,16 @@ main(int argc, char *argv[])
 {
        (void)argc;
 
-       return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8,
-                             LEN(lowercase_utf8), 
"grapheme_to_lowercase_utf8", argv[0]) +
-              run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8,
-                             LEN(uppercase_utf8), 
"grapheme_to_uppercase_utf8", argv[0]) +
-              run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8,
-                             LEN(titlecase_utf8), 
"grapheme_to_titlecase_utf8", argv[0]);
+       return run_unit_tests(unit_test_callback_is_case_utf8, 
is_lowercase_utf8,
+                             LEN(is_lowercase_utf8), 
"grapheme_is_lowercase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_is_case_utf8, 
is_uppercase_utf8,
+                             LEN(is_uppercase_utf8), 
"grapheme_is_uppercase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_is_case_utf8, 
is_titlecase_utf8,
+                             LEN(is_titlecase_utf8), 
"grapheme_is_titlecase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_to_case_utf8, 
to_lowercase_utf8,
+                             LEN(to_lowercase_utf8), 
"grapheme_to_lowercase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_to_case_utf8, 
to_uppercase_utf8,
+                             LEN(to_uppercase_utf8), 
"grapheme_to_uppercase_utf8", argv[0]) +
+              run_unit_tests(unit_test_callback_to_case_utf8, 
to_titlecase_utf8,
+                             LEN(to_titlecase_utf8), 
"grapheme_to_titlecase_utf8", argv[0]);
 }

[hackers] [libgrapheme] Refactor case-checking-functions with Herodotus and add unit tests || Laslo Hunhold

Reply via email to