cp_decode has been renamed to grapheme_decode and
boundary has been renamed to grapheme_boundary.
The purpose of this is to allow faster text rendering
where both individual code points and grapheme clusters
boundaries are of interest, but it also (1) makes it
easy to do online processing of large document (the user
does not need to search for spaces, but only know an
upper limit for how long encoding is needed to encode
any codepoint) and (2) makes to library easy to use
with non-UTF-8 text.
This change also eliminates all unnamespaced, non-static
functions that are not exposed to the user.
---
Makefile | 12 ++++++------
grapheme.h | 7 +++++++
src/boundary.h | 11 -----------
src/boundary_body.c | 5 ++---
src/codepoint.c | 5 +++--
src/codepoint.h | 14 --------------
src/grapheme.c | 9 ++++-----
src/test_body.c | 4 ++--
8 files changed, 24 insertions(+), 43 deletions(-)
delete mode 100644 src/boundary.h
delete mode 100644 src/codepoint.h
diff --git a/Makefile b/Makefile
index 5d52598..6b964ed 100644
--- a/Makefile
+++ b/Makefile
@@ -19,10 +19,10 @@ all: libgrapheme.a libgrapheme.so $(BIN)
src/test: src/test.o $(REQ:=.o)
-src/boundary.o: src/boundary.c config.mk src/codepoint.h src/boundary.h
-src/codepoint.o: src/codepoint.c config.mk src/codepoint.h
-src/grapheme.o: src/grapheme.c config.mk src/codepoint.h src/boundary.h
-src/test.o: src/test.c config.mk src/codepoint.h src/boundary.h
+src/boundary.o: src/boundary.c config.mk grapheme.h
+src/codepoint.o: src/codepoint.c config.mk grapheme.h
+src/grapheme.o: src/grapheme.c config.mk grapheme.h
+src/test.o: src/test.c config.mk grapheme.h
.o:
$(CC) -o $@ $(LDFLAGS) $< $(REQ:=.o)
@@ -42,7 +42,7 @@ test:
src/boundary.c: data/gbt.awk $(GBP) data/emo.awk $(EMO) src/boundary_body.c
printf "/* Automatically generated by gbp.awk and emo.awk */\n" > $@
- printf "#include \"codepoint.h\"\n" >> $@
+ printf "#include \"../grapheme.h\"\n" >> $@
awk -f data/gbp.awk $(GBP) >> $@
awk -f data/emo.awk $(EMO) >> $@
printf "\n" >> $@
@@ -51,7 +51,7 @@ src/boundary.c: data/gbt.awk $(GBP) data/emo.awk $(EMO)
src/boundary_body.c
src/test.c: data/gbt.awk $(GBT) src/test_body.c
printf "/* Automatically generated by gbt.awk */\n" > $@
printf "#include <stddef.h>\n\n" >> $@
- printf "#include \"codepoint.h\"\n\n" >> $@
+ printf "#include \"../grapheme.h\"\n\n" >> $@
awk -f data/gbt.awk $(GBT) >> $@
printf "\n" >> $@
cat src/test_body.c >> $@
diff --git a/grapheme.h b/grapheme.h
index dae667b..21e73fe 100644
--- a/grapheme.h
+++ b/grapheme.h
@@ -3,7 +3,14 @@
#define GRAPHEME_H
#include <stddef.h>
+#include <stdint.h>
+
+typedef uint32_t Codepoint;
+
+#define CP_INVALID 0xFFFD
size_t grapheme_len(const char *);
+size_t grapheme_decode(const char *, Codepoint *);
+int grapheme_boundary(Codepoint, Codepoint, int *);
#endif /* GRAPHEME_H */
diff --git a/src/boundary.h b/src/boundary.h
deleted file mode 100644
index 77d0054..0000000
--- a/src/boundary.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#ifndef BOUNDARY_H
-#define BOUNDARY_H
-
-#include <stddef.h>
-
-#include "codepoint.h"
-
-int boundary(Codepoint, Codepoint, int *);
-
-#endif /* BOUNDARY_H */
diff --git a/src/boundary_body.c b/src/boundary_body.c
index 3a160cf..c6dd133 100644
--- a/src/boundary_body.c
+++ b/src/boundary_body.c
@@ -2,8 +2,7 @@
#include <stddef.h>
#include <stdlib.h>
-#include "codepoint.h"
-#include "boundary.h"
+#include "../grapheme.h"
#define LEN(x) (sizeof(x) / sizeof(*x))
@@ -119,7 +118,7 @@ is(Codepoint cp[2], char (*props)[2], int index, enum
property p)
#define IS(I, PROP) (is(cp, props, I, PROP))
int
-boundary(Codepoint cp0, Codepoint cp1, int *state)
+grapheme_boundary(Codepoint cp0, Codepoint cp1, int *state)
{
Codepoint cp[2] = { cp0, cp1 };
char props[NUM_PROPS][2];
diff --git a/src/codepoint.c b/src/codepoint.c
index 0b63184..ed00987 100644
--- a/src/codepoint.c
+++ b/src/codepoint.c
@@ -1,13 +1,14 @@
/* See LICENSE file for copyright and license details. */
-#include "codepoint.h"
+#include "../grapheme.h"
#include <stdio.h>
#define BETWEEN(c, l, u) (c >= l && c <= u)
#define LEN(x) (sizeof(x) / sizeof(*x))
size_t
-cp_decode(const uint8_t *str, Codepoint *p)
+grapheme_decode(const char *str_, Codepoint *p)
{
+ const uint8_t *str = (const uint8_t *)str_;
size_t off, j, k, l;
struct {
diff --git a/src/codepoint.h b/src/codepoint.h
deleted file mode 100644
index dedc2f4..0000000
--- a/src/codepoint.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* See LICENSE file for copyright and license details. */
-#ifndef CODEPOINT_H
-#define CODEPOINT_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-typedef uint32_t Codepoint;
-
-#define CP_INVALID 0xFFFD
-
-size_t cp_decode(const uint8_t *, Codepoint *);
-
-#endif /* CODEPOINT_H */
diff --git a/src/grapheme.c b/src/grapheme.c
index 4ff917f..445fd8a 100644
--- a/src/grapheme.c
+++ b/src/grapheme.c
@@ -2,8 +2,7 @@
#include <stddef.h>
#include <stdlib.h>
-#include "codepoint.h"
-#include "boundary.h"
+#include "../grapheme.h"
size_t
grapheme_len(const char *str)
@@ -13,18 +12,18 @@ grapheme_len(const char *str)
int state = 0;
/* get first code point */
- if ((ret = cp_decode((const uint8_t *)str, &cp0)) == 0) {
+ if ((ret = grapheme_decode(str, &cp0)) == 0) {
return len;
}
len += ret;
while (cp0 != 0) {
/* get next codepoint */
- if ((ret = cp_decode((const uint8_t *)(str + len), &cp1)) == 0)
{
+ if ((ret = grapheme_decode(str + len, &cp1)) == 0) {
break;
}
- if (boundary(cp0, cp1, &state)) {
+ if (grapheme_boundary(cp0, cp1, &state)) {
/* we have a breakpoint */
break;
} else {
diff --git a/src/test_body.c b/src/test_body.c
index 25dedd2..3f73beb 100644
--- a/src/test_body.c
+++ b/src/test_body.c
@@ -2,7 +2,7 @@
#include <stddef.h>
#include <stdio.h>
-#include "boundary.h"
+#include "../grapheme.h"
#define LEN(x) (sizeof(x) / sizeof(*x))
@@ -14,7 +14,7 @@ int main(void)
for (i = 0; i < LEN(t); i++) {
for (j = 0, k = 0, state = 0, len = 1; j < t[i].cplen; j++) {
if ((j + 1) == t[i].cplen ||
- boundary(t[i].cp[j], t[i].cp[j + 1], &state)) {
+ grapheme_boundary(t[i].cp[j], t[i].cp[j + 1],
&state)) {
/* check if our resulting length matches */
if (k == t[i].lenlen || len != t[i].len[k++]) {
fprintf(stderr, "Failed \"%s\"\n",
--
2.26.2