commit a88c9a176dfc9459c3a7b09f6c9aedda6d06732f
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Mon Jun 1 12:00:50 2020 +0200
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Mon Jun 1 12:00:50 2020 +0200

    Expose grapheme_cp_{en,de}code() and grapheme_boundary()
    
    After the preparation, we can now expose these three functions in
    grapheme.h, as suggested by Mattias.
    In this context, we get rid of the Codepoint-typedef, as there is no
    need to opaquely define uint32_t. A codepoint is just a number, and thus
    let's stop with the "Rune", "Codepoint", etc. naming-nonsense!
    
    Moving everything into grapheme.h, there is also no need for boundary.h
    and codepoint.h, which we reflect in the Makefile.
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/Makefile b/Makefile
index 7816aaf..949ef98 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@
 include config.mk
 
 BIN = src/test
-REQ = src/codepoint src/boundary src/grapheme
+REQ = src/boundary src/codepoint src/grapheme
 GBP_URL = 
https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt
 EMO_URL = https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-data.txt
 GBT_URL = 
https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
@@ -19,10 +19,10 @@ all: libgrapheme.a libgrapheme.so $(BIN)
 
 src/test: src/test.o $(REQ:=.o)
 
-src/boundary.o: src/boundary.c config.mk src/codepoint.h src/boundary.h
-src/codepoint.o: src/codepoint.c config.mk src/codepoint.h
-src/grapheme.o: src/grapheme.c config.mk src/codepoint.h src/boundary.h
-src/test.o: src/test.c config.mk src/codepoint.h src/boundary.h
+src/boundary.o: src/boundary.c config.mk grapheme.h
+src/codepoint.o: src/codepoint.c config.mk grapheme.h
+src/grapheme.o: src/grapheme.c config.mk grapheme.h
+src/test.o: src/test.c config.mk grapheme.h
 
 .o:
        $(CC) -o $@ $(LDFLAGS) $< $(REQ:=.o)
@@ -42,7 +42,7 @@ test:
 
 src/boundary.c: data/gbt.awk $(GBP) data/emo.awk $(EMO) src/boundary_body.c
        printf "/* Automatically generated by gbp.awk and emo.awk */\n" > $@
-       printf "#include \"codepoint.h\"\n" >> $@
+       printf "#include <stdint.h>\n\n" >> $@
        awk -f data/gbp.awk $(GBP) >> $@
        awk -f data/emo.awk $(EMO) >> $@
        printf "\n" >> $@
@@ -50,8 +50,9 @@ src/boundary.c: data/gbt.awk $(GBP) data/emo.awk $(EMO) 
src/boundary_body.c
 
 src/test.c: data/gbt.awk $(GBT) src/test_body.c
        printf "/* Automatically generated by gbt.awk */\n" > $@
-       printf "#include <stddef.h>\n\n" >> $@
-       printf "#include \"codepoint.h\"\n\n" >> $@
+       printf "#include <stddef.h>\n" >> $@
+       printf "#include <stdint.h>\n\n" >> $@
+       printf "#include \"../grapheme.h\"\n\n" >> $@
        awk -f data/gbt.awk $(GBT) >> $@
        printf "\n" >> $@
        cat src/test_body.c >> $@
diff --git a/data/emo.awk b/data/emo.awk
index bebec8f..3897b71 100644
--- a/data/emo.awk
+++ b/data/emo.awk
@@ -34,7 +34,7 @@ function hextonum(str) {
 }
 
 function mktable(name, array, arrlen) {
-       printf("\nstatic const Codepoint "name"_table[][2] = {\n");
+       printf("\nstatic const uint32_t "name"_table[][2] = {\n");
 
        for (j = 0; j < arrlen; j++) {
                if (ind = index(array[j], "..")) {
diff --git a/data/gbp.awk b/data/gbp.awk
index c9ebc31..92038ca 100644
--- a/data/gbp.awk
+++ b/data/gbp.awk
@@ -58,7 +58,7 @@ function hextonum(str) {
 }
 
 function mktable(name, array, arrlen) {
-       printf("\nstatic const Codepoint "name"_table[][2] = {\n");
+       printf("static const uint32_t "name"_table[][2] = {\n");
 
        for (j = 0; j < arrlen; j++) {
                if (ind = index(array[j], "..")) {
diff --git a/data/gbt.awk b/data/gbt.awk
index 5fd7c0a..f76e665 100644
--- a/data/gbt.awk
+++ b/data/gbt.awk
@@ -4,7 +4,7 @@
 BEGIN {
        FS = " "
 
-       printf("struct test {\n\tCodepoint *cp;\n\tsize_t cplen;\n");
+       printf("struct test {\n\tuint32_t *cp;\n\tsize_t cplen;\n");
        printf("\tsize_t *len;\n\tsize_t lenlen;\n\tchar *descr;\n};\n\n");
        printf("static const struct test t[] = {\n");
 }
@@ -38,7 +38,7 @@ $0 ~ /^#/ || $0 ~ /^\s*$/ { next }
        len[nlens++] = curlen;
 
        # print code points
-       printf("\t{\n\t\t.cp     = (Codepoint[]){ ");
+       printf("\t{\n\t\t.cp     = (uint32_t[]){ ");
        for (i = 0; i < ncps; i++) {
                printf("0x%s", cp[i]);
                if (i + 1 < ncps) {
diff --git a/grapheme.h b/grapheme.h
index dae667b..3bcbd77 100644
--- a/grapheme.h
+++ b/grapheme.h
@@ -3,6 +3,14 @@
 #define GRAPHEME_H
 
 #include <stddef.h>
+#include <stdint.h>
+
+#define CP_INVALID UINT32_C(0xFFFD)
+
+int grapheme_boundary(uint32_t, uint32_t, int *);
+
+size_t grapheme_cp_decode(uint32_t *, const uint8_t *, size_t);
+size_t grapheme_cp_encode(uint32_t, uint8_t *, size_t);
 
 size_t grapheme_len(const char *);
 
diff --git a/src/boundary.h b/src/boundary.h
deleted file mode 100644
index 77d0054..0000000
--- a/src/boundary.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#ifndef BOUNDARY_H
-#define BOUNDARY_H
-
-#include <stddef.h>
-
-#include "codepoint.h"
-
-int boundary(Codepoint, Codepoint, int *);
-
-#endif /* BOUNDARY_H */
diff --git a/src/boundary_body.c b/src/boundary_body.c
index 3a160cf..b86345b 100644
--- a/src/boundary_body.c
+++ b/src/boundary_body.c
@@ -1,10 +1,8 @@
 /* See LICENSE file for copyright and license details. */
 #include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 
-#include "codepoint.h"
-#include "boundary.h"
-
 #define LEN(x) (sizeof(x) / sizeof(*x))
 
 enum {
@@ -15,8 +13,8 @@ enum {
 static int
 cp_cmp(const void *a, const void *b)
 {
-       Codepoint cp = *(Codepoint *)a;
-       Codepoint *range = (Codepoint *)b;
+       uint32_t cp = *(uint32_t *)a;
+       uint32_t *range = (uint32_t *)b;
 
        return (cp >= range[0] && cp <= range[1]) ? 0 : (cp - range[0]);
 }
@@ -40,7 +38,7 @@ enum property {
 };
 
 struct {
-       const Codepoint (*table)[2];
+       const uint32_t (*table)[2];
        size_t tablelen;
 } tables[] = {
        [PROP_CR] = {
@@ -102,7 +100,7 @@ struct {
 };
 
 static int
-is(Codepoint cp[2], char (*props)[2], int index, enum property p)
+is(uint32_t cp[2], char (*props)[2], int index, enum property p)
 {
        if (props[p][index] == 2) {
                /* need to determine property */
@@ -119,9 +117,9 @@ is(Codepoint cp[2], char (*props)[2], int index, enum 
property p)
 #define IS(I, PROP) (is(cp, props, I, PROP))
 
 int
-boundary(Codepoint cp0, Codepoint cp1, int *state)
+grapheme_boundary(uint32_t cp0, uint32_t cp1, int *state)
 {
-       Codepoint cp[2] = { cp0, cp1 };
+       uint32_t cp[2] = { cp0, cp1 };
        char props[NUM_PROPS][2];
        size_t i;
 
diff --git a/src/codepoint.c b/src/codepoint.c
index 7e76ca5..b1df4dc 100644
--- a/src/codepoint.c
+++ b/src/codepoint.c
@@ -1,5 +1,5 @@
 /* See LICENSE file for copyright and license details. */
-#include "codepoint.h"
+#include "../grapheme.h"
 #include <stdio.h>
 
 #define BETWEEN(c, l, u) (c >= l && c <= u)
diff --git a/src/codepoint.h b/src/codepoint.h
deleted file mode 100644
index 38292ba..0000000
--- a/src/codepoint.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#ifndef CODEPOINT_H
-#define CODEPOINT_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef uint32_t Codepoint;
-
-#define CP_INVALID 0xFFFD
-
-size_t grapheme_cp_decode(uint32_t *, const uint8_t *, size_t);
-size_t grapheme_cp_encode(uint32_t, uint8_t *, size_t);
-
-#endif /* CODEPOINT_H */
diff --git a/src/grapheme.c b/src/grapheme.c
index 8478f87..5f0ad91 100644
--- a/src/grapheme.c
+++ b/src/grapheme.c
@@ -2,13 +2,12 @@
 #include <stddef.h>
 #include <stdlib.h>
 
-#include "codepoint.h"
-#include "boundary.h"
+#include "../grapheme.h"
 
 size_t
 grapheme_len(const char *str)
 {
-       Codepoint cp0, cp1;
+       uint32_t cp0, cp1;
        size_t ret, len = 0;
        int state = 0;
 
@@ -38,7 +37,7 @@ grapheme_len(const char *str)
                /* get next code point */
                ret = grapheme_cp_decode(&cp1, (uint8_t *)(str + len), 5);
 
-               if (cp1 == CP_INVALID || boundary(cp0, cp1, &state)) {
+               if (cp1 == CP_INVALID || grapheme_boundary(cp0, cp1, &state)) {
                        /* we read an invalid cp or have a breakpoint */
                        break;
                } else {
diff --git a/src/test_body.c b/src/test_body.c
index 5f83d93..8cc2fc2 100644
--- a/src/test_body.c
+++ b/src/test_body.c
@@ -1,10 +1,10 @@
 /* See LICENSE file for copyright and license details. */
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 
-#include "boundary.h"
-#include "codepoint.h"
+#include "../grapheme.h"
 
 #define LEN(x) (sizeof(x) / sizeof(*x))
 
@@ -350,7 +350,8 @@ int main(void)
        for (i = 0, failed = 0; i < LEN(t); i++) {
                for (j = 0, k = 0, state = 0, len = 1; j < t[i].cplen; j++) {
                        if ((j + 1) == t[i].cplen ||
-                           boundary(t[i].cp[j], t[i].cp[j + 1], &state)) {
+                           grapheme_boundary(t[i].cp[j], t[i].cp[j + 1],
+                                             &state)) {
                                /* check if our resulting length matches */
                                if (k == t[i].lenlen || len != t[i].len[k++]) {
                                        fprintf(stderr, "Failed \"%s\"\n",

Reply via email to