commit 62e7175fe5b2d7bebf16d39d334a645ec4361f83
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Sat Dec 18 13:43:58 2021 +0100
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Sat Dec 18 13:43:58 2021 +0100

    Rename GRAPHEME_INVALID_CODE_POINT to GRAPHEME_CODEPOINT_INVALID
    
    Now that the underline is gone by convention in this codebase, the
    new ordering is much easier to read. It also conveys that this define
    is a codepoint.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/grapheme.h b/grapheme.h
index 1d09ab9..b9c381c 100644
--- a/grapheme.h
+++ b/grapheme.h
@@ -17,7 +17,7 @@ typedef struct grapheme_internal_segmentation_state {
        uint_least16_t flags;
 } GRAPHEME_STATE;
 
-#define GRAPHEME_INVALID_CODE_POINT UINT32_C(0xFFFD)
+#define GRAPHEME_CODEPOINT_INVALID UINT32_C(0xFFFD)
 
 size_t grapheme_character_nextbreak(const char *);
 
diff --git a/man/grapheme_utf8_decode.3 b/man/grapheme_utf8_decode.3
index 69352a8..6a1f5c2 100644
--- a/man/grapheme_utf8_decode.3
+++ b/man/grapheme_utf8_decode.3
@@ -18,7 +18,7 @@ of length
 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
 string ends unexpectedly, empty string, etc.) the decoding is stopped
 at the last processed byte and the decoded codepoint set to
-.Dv GRAPHEME_INVALID_CODE_POINT.
+.Dv GRAPHEME_CODEPOINT_INVALID.
 .Pp
 If
 .Va cp
diff --git a/src/character.c b/src/character.c
index 06aa8d3..be49a34 100644
--- a/src/character.c
+++ b/src/character.c
@@ -203,7 +203,7 @@ grapheme_character_nextbreak(const char *str)
 
        /* get first codepoint */
        len += grapheme_utf8_decode(str, (size_t)-1, &cp0);
-       if (cp0 == GRAPHEME_INVALID_CODE_POINT) {
+       if (cp0 == GRAPHEME_CODEPOINT_INVALID) {
                return len;
        }
 
@@ -211,7 +211,7 @@ grapheme_character_nextbreak(const char *str)
                /* get next codepoint */
                ret = grapheme_utf8_decode(str + len, (size_t)-1, &cp1);
 
-               if (cp1 == GRAPHEME_INVALID_CODE_POINT ||
+               if (cp1 == GRAPHEME_CODEPOINT_INVALID ||
                    grapheme_character_isbreak(cp0, cp1, &state)) {
                        /* we read an invalid cp or have a breakpoint */
                        break;
diff --git a/src/utf8.c b/src/utf8.c
index 8be67c9..851f075 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -54,7 +54,7 @@ grapheme_utf8_decode(const char *s, size_t n, uint_least32_t 
*cp)
 
        if (s == NULL || n == 0) {
                /* a sequence must be at least 1 byte long */
-               *cp = GRAPHEME_INVALID_CODE_POINT;
+               *cp = GRAPHEME_CODEPOINT_INVALID;
                return 0;
        }
 
@@ -79,14 +79,14 @@ grapheme_utf8_decode(const char *s, size_t n, 
uint_least32_t *cp)
                 * this also includes the cases where bits higher than
                 * the 8th are set on systems with CHAR_BIT > 8
                 */
-               *cp = GRAPHEME_INVALID_CODE_POINT;
+               *cp = GRAPHEME_CODEPOINT_INVALID;
                return 1;
        }
        if (1 + off > n) {
                /*
                 * input is not long enough, set cp as invalid
                 */
-               *cp = GRAPHEME_INVALID_CODE_POINT;
+               *cp = GRAPHEME_CODEPOINT_INVALID;
 
                /*
                 * count the following continuation bytes, but nothing
@@ -125,7 +125,7 @@ grapheme_utf8_decode(const char *s, size_t n, 
uint_least32_t *cp)
                         * higher than the 8th are set on systems
                         * with CHAR_BIT > 8
                         */
-                       *cp = GRAPHEME_INVALID_CODE_POINT;
+                       *cp = GRAPHEME_CODEPOINT_INVALID;
                        return 1 + (i - 1);
                }
                /*
@@ -144,7 +144,7 @@ grapheme_utf8_decode(const char *s, size_t n, 
uint_least32_t *cp)
                 * not representable in UTF-16 (>0x10FFFF) (RFC-3629
                 * specifies the latter two conditions)
                 */
-               *cp = GRAPHEME_INVALID_CODE_POINT;
+               *cp = GRAPHEME_CODEPOINT_INVALID;
        }
 
        return 1 + off;
@@ -162,7 +162,7 @@ grapheme_utf8_encode(uint_least32_t cp, char *s, size_t n)
                 * (0xD800..0xDFFF) or not representable in UTF-16
                 * (>0x10FFFF), which RFC-3629 deems invalid for UTF-8.
                 */
-               cp = GRAPHEME_INVALID_CODE_POINT;
+               cp = GRAPHEME_CODEPOINT_INVALID;
        }
 
        /* determine necessary sequence type */
diff --git a/test/utf8-decode.c b/test/utf8-decode.c
index 1de282c..7d5e389 100644
--- a/test/utf8-decode.c
+++ b/test/utf8-decode.c
@@ -21,7 +21,7 @@ static const struct {
                .arr     = NULL,
                .len     = 0,
                .exp_len = 0,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid lead byte
@@ -31,7 +31,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xFD },
                .len     = 1,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* valid 1-byte sequence
@@ -61,7 +61,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xC3 },
                .len     = 1,
                .exp_len = 2,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 2-byte sequence (second byte malformed)
@@ -71,7 +71,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xC3, 0xFF },
                .len     = 2,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 2-byte sequence (overlong encoded)
@@ -81,7 +81,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xC1, 0xBF },
                .len     = 2,
                .exp_len = 2,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* valid 3-byte sequence
@@ -101,7 +101,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xE0 },
                .len     = 1,
                .exp_len = 3,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 3-byte sequence (second byte malformed)
@@ -111,7 +111,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xE0, 0x7F, 0xBF },
                .len     = 3,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 3-byte sequence (short string, second byte malformed)
@@ -121,7 +121,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xE0, 0x7F },
                .len     = 2,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 3-byte sequence (third byte missing)
@@ -131,7 +131,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xE0, 0xBF },
                .len     = 2,
                .exp_len = 3,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 3-byte sequence (third byte malformed)
@@ -141,7 +141,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xE0, 0xBF, 0x7F },
                .len     = 3,
                .exp_len = 2,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 3-byte sequence (overlong encoded)
@@ -151,7 +151,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xE0, 0x9F, 0xBF },
                .len     = 3,
                .exp_len = 3,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 3-byte sequence (UTF-16 surrogate half)
@@ -161,7 +161,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xED, 0xA0, 0x80 },
                .len     = 3,
                .exp_len = 3,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* valid 4-byte sequence
@@ -181,7 +181,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3 },
                .len     = 1,
                .exp_len = 4,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (second byte malformed)
@@ -191,7 +191,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF, 0xBF },
                .len     = 4,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (short string 1, second byte 
malformed)
@@ -201,7 +201,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0x7F },
                .len     = 2,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (short string 2, second byte 
malformed)
@@ -211,7 +211,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0x7F, 0xBF },
                .len     = 3,
                .exp_len = 1,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
 
        {
@@ -222,7 +222,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF },
                .len     = 2,
                .exp_len = 4,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (third byte malformed)
@@ -232,7 +232,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F, 0xBF },
                .len     = 4,
                .exp_len = 2,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (short string, third byte malformed)
@@ -242,7 +242,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0x7F },
                .len     = 3,
                .exp_len = 2,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (fourth byte missing)
@@ -252,7 +252,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF },
                .len     = 3,
                .exp_len = 4,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (fourth byte malformed)
@@ -262,7 +262,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0x7F },
                .len     = 4,
                .exp_len = 3,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (overlong encoded)
@@ -272,7 +272,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF0, 0x80, 0x81, 0xBF },
                .len     = 4,
                .exp_len = 4,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
        {
                /* invalid 4-byte sequence (UTF-16-unrepresentable)
@@ -282,7 +282,7 @@ static const struct {
                .arr     = (char *)(unsigned char[]){ 0xF4, 0x90, 0x80, 0x80 },
                .len     = 4,
                .exp_len = 4,
-               .exp_cp  = GRAPHEME_INVALID_CODE_POINT,
+               .exp_cp  = GRAPHEME_CODEPOINT_INVALID,
        },
 };
 

Reply via email to