commit f13f39a84bf411748a956f066743a18ec73ab7e6
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Fri Jan 7 17:41:48 2022 +0100
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Fri Jan 7 17:51:27 2022 +0100

    Refactor data-table-generation, use enum-minor-table
    
    For the special case where the property-struct only contains a single
    element it's a bit redundant to store offsets in the minor-array which
    only point into a property-struct. We might as well store the enums
    themselves in the minor-array, saving us one table-lookup.
    
    Given our enum is way below 255 entries, we can store those enums
    uint_least8_t. Previously, the offset-array would at least be an
    uint_least16_t, which was a small oversight on my behalf.
    
    The end-goal is to have separate data-tables for the other
    break-detection-algorithms (word, sentence, line). This is a stark
    contrast to libutf8proc for instance, which has one data-table for
    everything. However, preliminary experiments showed, of course, that the
    larger the data-table gets, the longer the accesses take as the CPU is
    more likely to offload the table from the cache.
    Another factor is static linking: It's unlikely that you use all of the
    algorithms at once. For a minimal overall data-overhead (just a few KB)
    each object file in the library (character.o, etc.) will be
    self-sufficient, allowing easy LTO for the linker.
    
    The aforementioned change leads to a speedup of around 6% and
    data-table-reduction of around 40% (!) to 35K, which easily fits in the
    L1-cache. We are now around half an order of magnitude faster than
    libutf8proc.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/gen/properties.c b/gen/properties.c
index 2edd74e..db7292d 100644
--- a/gen/properties.c
+++ b/gen/properties.c
@@ -12,7 +12,7 @@
 #define FILE_GRAPHEME "data/GraphemeBreakProperty.txt"
 
 struct properties {
-       uint_least8_t break_property;
+       uint_least8_t char_break_property;
 };
 
 struct property_spec {
@@ -21,7 +21,13 @@ struct property_spec {
        const char *ucdname;
 };
 
-static const struct property_spec break_property[] = {
+struct property_payload {
+       struct properties *prop;
+       const struct property_spec *spec;
+       size_t speclen;
+};
+
+static const struct property_spec char_break_property[] = {
        {
                .enumname = "OTHER",
                .file     = NULL,
@@ -104,7 +110,7 @@ break_property_callback(char *file, char **field, size_t 
nfields,
                         char *comment, void *payload)
 {
        /* prop always has the length 0x110000 */
-       struct properties *prop = (struct properties *)payload;
+       struct property_payload *p = (struct property_payload *)payload;
        struct range r;
        size_t i;
        uint_least32_t cp;
@@ -115,11 +121,11 @@ break_property_callback(char *file, char **field, size_t 
nfields,
                return 1;
        }
 
-       for (i = 0; i < LEN(break_property); i++) {
+       for (i = 0; i < p->speclen; i++) {
                /* identify fitting file and identifier */
-               if (break_property[i].file &&
-                   !strcmp(break_property[i].file, file) &&
-                   !strcmp(break_property[i].ucdname, field[1])) {
+               if (p->spec[i].file &&
+                   !strcmp(p->spec[i].file, file) &&
+                   !strcmp(p->spec[i].ucdname, field[1])) {
                        /* parse range in first field */
                        if (range_parse(field[0], &r)) {
                                return 1;
@@ -127,13 +133,18 @@ break_property_callback(char *file, char **field, size_t 
nfields,
 
                        /* apply to all codepoints in the range */
                        for (cp = r.lower; cp <= r.upper; cp++) {
-                               if (prop[cp].break_property != 0) {
+                               if (p->spec == char_break_property) {
+                                       if (p->prop[cp].char_break_property != 
0) {
+                                               fprintf(stderr, 
"break_property_callback: "
+                                                       "Character break 
property overlap.\n");
+                                               exit(1);
+                                       }
+                                       p->prop[cp].char_break_property = i;
+                               } else {
                                        fprintf(stderr, 
"break_property_callback: "
-                                               "property overlap\n");
+                                                       "Unknown 
specification.\n");
                                        exit(1);
                                }
-
-                               prop[cp].break_property = i;
                        }
 
                        break;
@@ -292,13 +303,54 @@ print_lookup_table(char *name, size_t *data, size_t 
datalen, size_t maxval)
        printf("};\n");
 }
 
+/*
+ * this function is for the special case where struct property
+ * only contains a single enum value.
+ */
+static void
+print_enum_lookup_table(char *name, size_t *offset, size_t offsetlen,
+                        const struct properties *prop,
+                        const struct property_spec *spec,
+                        const char *enumprefix)
+{
+       size_t i;
+
+       printf("const uint8_t %s[] = {\n\t", name);
+       for (i = 0; i < offsetlen; i++) {
+               printf("%s_%s", enumprefix,
+                      spec[*((uint_least8_t *)&(prop[offset[i]]))].enumname);
+               if (i + 1 == offsetlen) {
+                       printf("\n");
+               } else if ((i + 1) % 8 != 0) {
+                       printf(", ");
+               } else {
+                       printf(",\n\t");
+               }
+
+       }
+       printf("};\n");
+}
+
+static void
+print_enum(const struct property_spec *spec, size_t speclen,
+           const char *enumname, const char *enumprefix)
+{
+       size_t i;
+
+       printf("enum %s {\n", enumname);
+       for (i = 0; i < speclen; i++) {
+               printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
+       }
+       printf("\tNUM_%sS,\n};\n\n", enumprefix);
+}
+
 int
 main(int argc, char *argv[])
 {
        struct compressed_properties comp;
        struct major_minor_properties mm;
+       struct property_payload payload;
        struct properties *prop;
-       size_t i;
 
        (void)argc;
 
@@ -309,8 +361,12 @@ main(int argc, char *argv[])
        }
 
        /* extract properties */
-       parse_file_with_callback(FILE_EMOJI, break_property_callback, prop);
-       parse_file_with_callback(FILE_GRAPHEME, break_property_callback, prop);
+       payload.prop = prop;
+
+       payload.spec = char_break_property;
+       payload.speclen = LEN(char_break_property);
+       parse_file_with_callback(FILE_EMOJI, break_property_callback, &payload);
+       parse_file_with_callback(FILE_GRAPHEME, break_property_callback, 
&payload);
 
        /*
         * deduplicate by generating an array of offsets into prop where
@@ -323,29 +379,17 @@ main(int argc, char *argv[])
                get_major_minor_properties(&comp, &mm));
 
        /* print data */
+       print_enum(char_break_property, LEN(char_break_property),
+                  "char_break_property", "CHAR_BREAK_PROP");
+
        if (mm.minorlen < 0x100) {
                fprintf(stderr, "minor-array is too short.\n");
        }
        print_lookup_table("major", mm.major, 0x1100, mm.minorlen - 0x100);
        printf("\n");
-       print_lookup_table("minor", mm.minor, mm.minorlen, comp.datalen);
+       print_enum_lookup_table("minor", mm.minor, mm.minorlen, comp.data, 
char_break_property, "CHAR_BREAK_PROP");
        printf("\n");
 
-       printf("enum break_property {\n");
-       for (i = 0; i < LEN(break_property); i++) {
-               printf("\tBREAK_PROP_%s,\n", break_property[i].enumname);
-       }
-       printf("\tNUM_BREAK_PROPS,\n};\n\n");
-       printf("struct properties {\n\tenum break_property 
break_property;\n};\n\n");
-
-       /* properties table */
-       printf("const struct properties prop[] = {\n");
-       for (i = 0; i < comp.datalen; i++) {
-               printf("\t{ BREAK_PROP_%s },\n",
-                      break_property[comp.data[i].break_property].enumname);
-       }
-       printf("};\n");
-
        /* free data */
        free(prop);
        free(comp.data);
diff --git a/src/character.c b/src/character.c
index 27e1cf0..239463d 100644
--- a/src/character.c
+++ b/src/character.c
@@ -8,114 +8,114 @@
 #include "../grapheme.h"
 #include "util.h"
 
-static const uint_least16_t dont_break[NUM_BREAK_PROPS] = {
-       [BREAK_PROP_OTHER] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_CR] =
-               UINT16_C(1 << BREAK_PROP_LF),            /* GB3  */
-       [BREAK_PROP_EXTEND] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_HANGUL_L] =
-               UINT16_C(1 << BREAK_PROP_HANGUL_L)     | /* GB6  */
-               UINT16_C(1 << BREAK_PROP_HANGUL_V)     | /* GB6  */
-               UINT16_C(1 << BREAK_PROP_HANGUL_LV)    | /* GB6  */
-               UINT16_C(1 << BREAK_PROP_HANGUL_LVT)   | /* GB6  */
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_HANGUL_V] =
-               UINT16_C(1 << BREAK_PROP_HANGUL_V)     | /* GB7  */
-               UINT16_C(1 << BREAK_PROP_HANGUL_T)     | /* GB7  */
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_HANGUL_T] =
-               UINT16_C(1 << BREAK_PROP_HANGUL_T)     | /* GB8  */
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_HANGUL_LV] =
-               UINT16_C(1 << BREAK_PROP_HANGUL_V)     | /* GB7  */
-               UINT16_C(1 << BREAK_PROP_HANGUL_T)     | /* GB7  */
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_HANGUL_LVT] =
-               UINT16_C(1 << BREAK_PROP_HANGUL_T)     | /* GB8  */
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_PREPEND] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK)  | /* GB9a */
+static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
+       [CHAR_BREAK_PROP_OTHER] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_CR] =
+               UINT16_C(1 << CHAR_BREAK_PROP_LF),            /* GB3  */
+       [CHAR_BREAK_PROP_EXTEND] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_HANGUL_L] =
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_L)     | /* GB6  */
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB6  */
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LV)    | /* GB6  */
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LVT)   | /* GB6  */
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_HANGUL_V] =
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB7  */
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB7  */
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_HANGUL_T] =
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB8  */
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_HANGUL_LV] =
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB7  */
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB7  */
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_HANGUL_LVT] =
+               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB8  */
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_PREPEND] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK)  | /* GB9a */
                (UINT16_C(0xFFFF) &
-                ~(UINT16_C(1 << BREAK_PROP_CR)      |
-                  UINT16_C(1 << BREAK_PROP_LF)      |
-                  UINT16_C(1 << BREAK_PROP_CONTROL)
+                ~(UINT16_C(1 << CHAR_BREAK_PROP_CR)      |
+                  UINT16_C(1 << CHAR_BREAK_PROP_LF)      |
+                  UINT16_C(1 << CHAR_BREAK_PROP_CONTROL)
                  )
                ),                                           /* GB9b */
-       [BREAK_PROP_REGIONAL_INDICATOR] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_SPACINGMARK] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
-       [BREAK_PROP_ZWJ] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_SPACINGMARK] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+       [CHAR_BREAK_PROP_ZWJ] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
+               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
 };
-static const uint_least16_t flag_update_gb11[2 * NUM_BREAK_PROPS] = {
-       [BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
-               UINT16_C(1 << BREAK_PROP_ZWJ)                   |
-               UINT16_C(1 << BREAK_PROP_EXTEND),
-       [BREAK_PROP_ZWJ + NUM_BREAK_PROPS] =
-               UINT16_C(1 << BREAK_PROP_EXTENDED_PICTOGRAPHIC),
-       [BREAK_PROP_EXTEND + NUM_BREAK_PROPS] =
-               UINT16_C(1 << BREAK_PROP_EXTEND)                |
-               UINT16_C(1 << BREAK_PROP_ZWJ),
-       [BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_BREAK_PROPS] =
-               UINT16_C(1 << BREAK_PROP_ZWJ)                   |
-               UINT16_C(1 << BREAK_PROP_EXTEND),
+static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
+       [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)                   |
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+       [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+       [CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)                |
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ),
+       [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
+               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)                   |
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
 };
-static const uint_least16_t dont_break_gb11[2 * NUM_BREAK_PROPS] = {
-       [BREAK_PROP_ZWJ + NUM_BREAK_PROPS] =
-               UINT16_C(1 << BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
+       [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
+               UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
 };
-static const uint_least16_t flag_update_gb12_13[2 * NUM_BREAK_PROPS] = {
-       [BREAK_PROP_REGIONAL_INDICATOR] =
-               UINT16_C(1 << BREAK_PROP_REGIONAL_INDICATOR),
+static const uint_least16_t flag_update_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
+       [CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
+               UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
 };
-static const uint_least16_t dont_break_gb12_13[2 * NUM_BREAK_PROPS] = {
-       [BREAK_PROP_REGIONAL_INDICATOR + NUM_BREAK_PROPS] =
-               UINT16_C(1 << BREAK_PROP_REGIONAL_INDICATOR),
+static const uint_least16_t dont_break_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
+       [CHAR_BREAK_PROP_REGIONAL_INDICATOR + NUM_CHAR_BREAK_PROPS] =
+               UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
 };
 
-static enum break_property
+static enum char_break_property
 get_break_prop(uint_least32_t cp)
 {
        if (likely(cp <= 0x10FFFF)) {
-               return prop[minor[major[cp >> 8] + (cp & 0xff)]].break_property;
+               return (enum char_break_property)minor[major[cp >> 8] + (cp & 
0xff)];
        } else {
-               return BREAK_PROP_OTHER;
+               return CHAR_BREAK_PROP_OTHER;
        }
 }
 
 bool
 grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, 
GRAPHEME_STATE *state)
 {
-       enum break_property cp0_prop, cp1_prop;
+       enum char_break_property cp0_prop, cp1_prop;
        bool notbreak = false;
 
        if (likely(state)) {
@@ -132,11 +132,11 @@ grapheme_is_character_break(uint_least32_t cp0, 
uint_least32_t cp1, GRAPHEME_STA
 
                /* update flags */
                state->gb11_flag = flag_update_gb11[cp0_prop +
-                                                   NUM_BREAK_PROPS *
+                                                   NUM_CHAR_BREAK_PROPS *
                                                    state->gb11_flag] &
                                   UINT16_C(1 << cp1_prop);
                state->gb12_13_flag = flag_update_gb12_13[cp0_prop +
-                                                         NUM_BREAK_PROPS *
+                                                         NUM_CHAR_BREAK_PROPS *
                                                          state->gb12_13_flag] &
                                      UINT16_C(1 << cp1_prop);
 
@@ -146,10 +146,10 @@ grapheme_is_character_break(uint_least32_t cp0, 
uint_least32_t cp1, GRAPHEME_STA
                 */
                notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
                           (dont_break_gb11[cp0_prop + state->gb11_flag *
-                                           NUM_BREAK_PROPS] &
+                                           NUM_CHAR_BREAK_PROPS] &
                            UINT16_C(1 << cp1_prop)) ||
                           (dont_break_gb12_13[cp0_prop + state->gb12_13_flag *
-                                              NUM_BREAK_PROPS] &
+                                              NUM_CHAR_BREAK_PROPS] &
                            UINT16_C(1 << cp1_prop));
 
                /* update or reset flags (when we have a break) */

Reply via email to