commit 950adad158c79da041c85cbb3773208988ea7477
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Sun Dec 19 01:22:58 2021 +0100
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Sun Dec 19 01:22:58 2021 +0100

    Match function parameters in code and documentation
    
    This always helps with readability if you want to check upon the
    implementation.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/grapheme.h b/grapheme.h
index c2def7c..1f08f55 100644
--- a/grapheme.h
+++ b/grapheme.h
@@ -12,8 +12,8 @@ struct grapheme_internal_heisenstate {
 };
 
 typedef struct grapheme_internal_segmentation_state {
-       struct grapheme_internal_heisenstate a;
-       struct grapheme_internal_heisenstate b;
+       struct grapheme_internal_heisenstate cp0;
+       struct grapheme_internal_heisenstate cp1;
        uint_least16_t flags;
 } GRAPHEME_STATE;
 
diff --git a/man/grapheme_encode_utf8.3 b/man/grapheme_encode_utf8.3
index 42dbbe5..5e51ac2 100644
--- a/man/grapheme_encode_utf8.3
+++ b/man/grapheme_encode_utf8.3
@@ -7,7 +7,7 @@
 .Sh SYNOPSIS
 .In grapheme.h
 .Ft size_t
-.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *" "size_t"
+.Fn grapheme_encode_utf8 "uint_least32_t cp" "char *str" "size_t len"
 .Sh DESCRIPTION
 The
 .Fn grapheme_encode_utf8
diff --git a/src/character.c b/src/character.c
index 2215543..ae8f3df 100644
--- a/src/character.c
+++ b/src/character.c
@@ -14,7 +14,7 @@ enum {
 };
 
 bool
-grapheme_is_character_break(uint_least32_t a, uint_least32_t b, GRAPHEME_STATE 
*state)
+grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, 
GRAPHEME_STATE *state)
 {
        struct grapheme_internal_heisenstate *p[2] = { 0 };
        uint_least16_t flags = 0;
@@ -22,14 +22,14 @@ grapheme_is_character_break(uint_least32_t a, 
uint_least32_t b, GRAPHEME_STATE *
 
        /* set state depending on state pointer */
        if (state != NULL) {
-               p[0] = &(state->a);
-               p[1] = &(state->b);
+               p[0] = &(state->cp0);
+               p[1] = &(state->cp1);
                flags = state->flags;
        }
 
        /* skip printable ASCII */
-       if ((a >= 0x20 && a <= 0x7E) &&
-           (b >= 0x20 && b <= 0x7E)) {
+       if ((cp0 >= 0x20 && cp0 <= 0x7E) &&
+           (cp1 >= 0x20 && cp1 <= 0x7E)) {
                goto hasbreak;
        }
 
@@ -41,8 +41,8 @@ grapheme_is_character_break(uint_least32_t a, uint_least32_t 
b, GRAPHEME_STATE *
        /*
         * update flags, if state-pointer given
         */
-       if (has_property(b, p[1], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR)) {
-               if (has_property(a, p[0], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR)) {
+       if (has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR)) {
+               if (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR)) {
                        /* one more RI is on the left side of the seam, flip 
state */
                        flags ^= CHARACTER_FLAG_RI_ODD;
                } else {
@@ -52,22 +52,22 @@ grapheme_is_character_break(uint_least32_t a, 
uint_least32_t b, GRAPHEME_STATE *
                }
        }
        if (!(flags & CHARACTER_FLAG_EMOJI) &&
-           ((has_property(a, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
-             has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
-             (has_property(a, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
-             has_property(b, p[1], character_prop, CHARACTER_PROP_EXTEND)))) {
+           ((has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+             has_property(cp1, p[1], character_prop, CHARACTER_PROP_ZWJ)) ||
+             (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+             has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTEND)))) 
{
                flags |= CHARACTER_FLAG_EMOJI;
        } else if ((flags & CHARACTER_FLAG_EMOJI) &&
-                  ((has_property(a, p[0], character_prop, CHARACTER_PROP_ZWJ) 
&&
-                    has_property(b, p[1], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) ||
-                   (has_property(a, p[0], character_prop, 
CHARACTER_PROP_EXTEND) &&
-                    has_property(b, p[1], character_prop, 
CHARACTER_PROP_EXTEND)) ||
-                   (has_property(a, p[0], character_prop, 
CHARACTER_PROP_EXTEND) &&
-                    has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) 
||
-                   (has_property(a, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
-                    has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) 
||
-                   (has_property(a, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
-                    has_property(b, p[1], character_prop, 
CHARACTER_PROP_EXTEND)))) {
+                  ((has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_ZWJ) &&
+                    has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) ||
+                   (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_EXTEND) &&
+                    has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_EXTEND)) ||
+                   (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_EXTEND) &&
+                    has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_ZWJ)) ||
+                   (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+                    has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_ZWJ)) ||
+                   (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC) &&
+                    has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_EXTEND)))) {
                /* CHARACTER_FLAG_EMOJI remains */
        } else {
                flags &= ~CHARACTER_FLAG_EMOJI;
@@ -85,76 +85,76 @@ grapheme_is_character_break(uint_least32_t a, 
uint_least32_t b, GRAPHEME_STATE *
        /* skip GB1 and GB2, as they are never satisfied here */
 
        /* GB3 */
-       if (has_property(a, p[0], character_prop, CHARACTER_PROP_CR) &&
-           has_property(b, p[1], character_prop, CHARACTER_PROP_LF)) {
+       if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_CR) &&
+           has_property(cp1, p[1], character_prop, CHARACTER_PROP_LF)) {
                goto nobreak;
        }
 
        /* GB4 */
-       if (has_property(a, p[0], character_prop, CHARACTER_PROP_CONTROL) ||
-           has_property(a, p[0], character_prop, CHARACTER_PROP_CR) ||
-           has_property(a, p[0], character_prop, CHARACTER_PROP_LF)) {
+       if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_CONTROL) ||
+           has_property(cp0, p[0], character_prop, CHARACTER_PROP_CR) ||
+           has_property(cp0, p[0], character_prop, CHARACTER_PROP_LF)) {
                goto hasbreak;
        }
 
        /* GB5 */
-       if (has_property(b, p[1], character_prop, CHARACTER_PROP_CONTROL) ||
-           has_property(b, p[1], character_prop, CHARACTER_PROP_CR) ||
-           has_property(b, p[1], character_prop, CHARACTER_PROP_LF)) {
+       if (has_property(cp1, p[1], character_prop, CHARACTER_PROP_CONTROL) ||
+           has_property(cp1, p[1], character_prop, CHARACTER_PROP_CR) ||
+           has_property(cp1, p[1], character_prop, CHARACTER_PROP_LF)) {
                goto hasbreak;
        }
 
        /* GB6 */
-       if (has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_L) &&
-           (has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_L) ||
-            has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
-            has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_LV) ||
+       if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_L) &&
+           (has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_L) ||
+            has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
+            has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_LV) 
||
 
-            has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_LVT))) 
{
+            has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_HANGUL_LVT))) {
                goto nobreak;
        }
 
        /* GB7 */
-       if ((has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_LV) ||
-            has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_V)) &&
-           (has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
-            has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_T))) {
+       if ((has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_LV) 
||
+            has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_V)) 
&&
+           (has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_V) ||
+            has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_T))) 
{
                goto nobreak;
        }
 
        /* GB8 */
-       if ((has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_LVT) ||
-            has_property(a, p[0], character_prop, CHARACTER_PROP_HANGUL_T)) &&
-           has_property(b, p[1], character_prop, CHARACTER_PROP_HANGUL_T)) {
+       if ((has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_LVT) 
||
+            has_property(cp0, p[0], character_prop, CHARACTER_PROP_HANGUL_T)) 
&&
+           has_property(cp1, p[1], character_prop, CHARACTER_PROP_HANGUL_T)) {
                goto nobreak;
        }
 
        /* GB9 */
-       if (has_property(b, p[1], character_prop, CHARACTER_PROP_EXTEND) ||
-           has_property(b, p[1], character_prop, CHARACTER_PROP_ZWJ)) {
+       if (has_property(cp1, p[1], character_prop, CHARACTER_PROP_EXTEND) ||
+           has_property(cp1, p[1], character_prop, CHARACTER_PROP_ZWJ)) {
                goto nobreak;
        }
 
        /* GB9a */
-       if (has_property(b, p[1], character_prop, CHARACTER_PROP_SPACINGMARK)) {
+       if (has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_SPACINGMARK)) {
                goto nobreak;
        }
 
        /* GB9b */
-       if (has_property(a, p[0], character_prop, CHARACTER_PROP_PREPEND)) {
+       if (has_property(cp0, p[0], character_prop, CHARACTER_PROP_PREPEND)) {
                goto nobreak;
        }
 
        /* GB11 */
        if ((flags & CHARACTER_FLAG_EMOJI) &&
-           has_property(a, p[0], character_prop, CHARACTER_PROP_ZWJ) &&
-           has_property(b, p[1], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) {
+           has_property(cp0, p[0], character_prop, CHARACTER_PROP_ZWJ) &&
+           has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_EXTENDED_PICTOGRAPHIC)) {
                goto nobreak;
        }
 
        /* GB12/GB13 */
-       if (has_property(a, p[0], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR) &&
-           has_property(b, p[1], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR) &&
+       if (has_property(cp0, p[0], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR) &&
+           has_property(cp1, p[1], character_prop, 
CHARACTER_PROP_REGIONAL_INDICATOR) &&
            (flags & CHARACTER_FLAG_RI_ODD)) {
                goto nobreak;
        }
@@ -166,8 +166,8 @@ nobreak:
 hasbreak:
        if (state != NULL) {
                /* move b-state to a-state, discard b-state */
-               memcpy(&(state->a), &(state->b), sizeof(state->a));
-               memset(&(state->b), 0, sizeof(state->b));
+               memcpy(&(state->cp0), &(state->cp1), sizeof(state->cp0));
+               memset(&(state->cp1), 0, sizeof(state->cp1));
 
                /* reset flags */
                if (isbreak) {
diff --git a/src/utf8.c b/src/utf8.c
index fe7775c..e01fa37 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -48,11 +48,11 @@ static const struct {
 };
 
 size_t
-grapheme_decode_utf8(const char *s, size_t n, uint_least32_t *cp)
+grapheme_decode_utf8(const char *str, size_t len, uint_least32_t *cp)
 {
        size_t off, i;
 
-       if (s == NULL || n == 0) {
+       if (str == NULL || len == 0) {
                /* a sequence must be at least 1 byte long */
                *cp = GRAPHEME_INVALID_CODEPOINT;
                return 0;
@@ -60,14 +60,14 @@ grapheme_decode_utf8(const char *s, size_t n, 
uint_least32_t *cp)
 
        /* identify sequence type with the first byte */
        for (off = 0; off < LEN(lut); off++) {
-               if (BETWEEN(((const unsigned char *)s)[0], lut[off].lower,
+               if (BETWEEN(((const unsigned char *)str)[0], lut[off].lower,
                            lut[off].upper)) {
                        /*
                         * first byte is within the bounds; fill
                         * p with the the first bits contained in
                         * the first byte (by subtracting the high bits)
                         */
-                       *cp = ((const unsigned char *)s)[0] - lut[off].lower;
+                       *cp = ((const unsigned char *)str)[0] - lut[off].lower;
                        break;
                }
        }
@@ -82,7 +82,7 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t 
*cp)
                *cp = GRAPHEME_INVALID_CODEPOINT;
                return 1;
        }
-       if (1 + off > n) {
+       if (1 + off > len) {
                /*
                 * input is not long enough, set cp as invalid
                 */
@@ -93,8 +93,8 @@ grapheme_decode_utf8(const char *s, size_t n, uint_least32_t 
*cp)
                 * else in case we have a "rogue" case where e.g. such a
                 * sequence starter occurs right before a NUL-byte.
                 */
-               for (i = 0; 1 + i < n; i++) {
-                       if(!BETWEEN(((const unsigned char *)s)[1 + i],
+               for (i = 0; 1 + i < len; i++) {
+                       if(!BETWEEN(((const unsigned char *)str)[1 + i],
                                    0x80, 0xBF)) {
                                break;
                        }
@@ -106,7 +106,7 @@ grapheme_decode_utf8(const char *s, size_t n, 
uint_least32_t *cp)
                 * Otherwise return the number of bytes we actually
                 * expected, which is larger than n.
                 */
-               return ((1 + i) < n) ? (1 + i) : (1 + off);
+               return ((1 + i) < len) ? (1 + i) : (1 + off);
        }
 
        /*
@@ -114,7 +114,7 @@ grapheme_decode_utf8(const char *s, size_t n, 
uint_least32_t *cp)
         * (i.e. between 0x80 (10000000) and 0xBF (10111111))
         */
        for (i = 1; i <= off; i++) {
-               if(!BETWEEN(((const unsigned char *)s)[i], 0x80, 0xBF)) {
+               if(!BETWEEN(((const unsigned char *)str)[i], 0x80, 0xBF)) {
                        /*
                         * byte does not match format; return
                         * number of bytes processed excluding the
@@ -132,7 +132,7 @@ grapheme_decode_utf8(const char *s, size_t n, 
uint_least32_t *cp)
                 * shift codepoint by 6 bits and add the 6 stored bits
                 * in s[i] to it using the bitmask 0x3F (00111111)
                 */
-               *cp = (*cp << 6) | (((const unsigned char *)s)[i] & 0x3F);
+               *cp = (*cp << 6) | (((const unsigned char *)str)[i] & 0x3F);
        }
 
        if (*cp < lut[off].mincp ||
@@ -151,7 +151,7 @@ grapheme_decode_utf8(const char *s, size_t n, 
uint_least32_t *cp)
 }
 
 size_t
-grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
+grapheme_encode_utf8(uint_least32_t cp, char *str, size_t len)
 {
        size_t off, i;
 
@@ -171,7 +171,7 @@ grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
                        break;
                }
        }
-       if (1 + off > n || s == NULL || n == 0) {
+       if (1 + off > len || str == NULL || len == 0) {
                /*
                 * specified buffer is too small to store sequence or
                 * the caller just wanted to know how many bytes the
@@ -191,7 +191,7 @@ grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
         * We do not overwrite the mask because we guaranteed earlier
         * that there are no bits higher than the mask allows.
         */
-       ((unsigned char *)s)[0] = lut[off].lower | (uint8_t)(cp >> (6 * off));
+       ((unsigned char *)str)[0] = lut[off].lower | (uint8_t)(cp >> (6 * off));
 
        for (i = 1; i <= off; i++) {
                /*
@@ -200,8 +200,8 @@ grapheme_encode_utf8(uint_least32_t cp, char *s, size_t n)
                 * extract from the properly-shifted value using the
                 * mask 00111111 (0x3F)
                 */
-               ((unsigned char *)s)[i] = 0x80 |
-                                         ((cp >> (6 * (off - i))) & 0x3F);
+               ((unsigned char *)str)[i] = 0x80 |
+                                           ((cp >> (6 * (off - i))) & 0x3F);
        }
 
        return 1 + off;

Reply via email to