commit f4408163156a468667ae24d3b7eb33b5b8346cf0
Author: Laslo Hunhold <[email protected]>
AuthorDate: Sat Aug 27 02:09:10 2022 +0200
Commit: Laslo Hunhold <[email protected]>
CommitDate: Sat Aug 27 02:09:10 2022 +0200
Add UNICODE_VERSION variable to Makefile and add to manual-templates
This simplifies updating to new Unicode versions a bit, but will not
be added to config.mk as changing between Unicode versions is not
as simple as downloading new files. Apart from that, it is necessary
to check all the different implemented algorithms for changes.
Signed-off-by: Laslo Hunhold <[email protected]>
diff --git a/Makefile b/Makefile
index 9cdb406..f61e50e 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,8 @@
include config.mk
+UNICODE_VERSION = 14.0.0
+
BENCHMARK =\
benchmark/case\
benchmark/character\
@@ -81,46 +83,46 @@ MAN7 =\
all: data/LICENSE $(MAN3:=.3) $(MAN7:=.7) libgrapheme.a libgrapheme.so
data/DerivedCoreProperties.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/DerivedCoreProperties.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/DerivedCoreProperties.txt
data/EastAsianWidth.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt
data/emoji-data.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/emoji/emoji-data.txt
data/GraphemeBreakProperty.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakProperty.txt
data/GraphemeBreakTest.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakTest.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/GraphemeBreakTest.txt
data/LICENSE:
wget -O $@ https://www.unicode.org/license.txt
data/LineBreak.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/LineBreak.txt
data/LineBreakTest.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/LineBreakTest.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/LineBreakTest.txt
data/SentenceBreakProperty.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/SentenceBreakProperty.txt
data/SentenceBreakTest.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakTest.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/SentenceBreakTest.txt
data/SpecialCasing.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/SpecialCasing.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/SpecialCasing.txt
data/UnicodeData.txt:
- wget -O $@ https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
data/WordBreakProperty.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/WordBreakProperty.txt
data/WordBreakTest.txt:
- wget -O $@
https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakTest.txt
+ wget -O $@
https://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/auxiliary/WordBreakTest.txt
benchmark/case.o: benchmark/case.c config.mk gen/word-test.h grapheme.h
benchmark/util.h
benchmark/character.o: benchmark/character.c config.mk gen/character-test.h
grapheme.h benchmark/util.h
@@ -232,10 +234,10 @@ libgrapheme.so: $(SRC:=.o)
$(CC) -o $@ $(SOFLAGS) $(LDFLAGS) $(SRC:=.o)
$(MAN3:=.3):
- SH=$(SH) $(SH) $(@:.3=.sh) > $@
+ SH=$(SH) UNICODE_VERSION=$(UNICODE_VERSION) $(SH) $(@:.3=.sh) > $@
$(MAN7:=.7):
- SH=$(SH) $(SH) $(@:.7=.sh) > $@
+ SH=$(SH) UNICODE_VERSION=$(UNICODE_VERSION) $(SH) $(@:.7=.sh) > $@
benchmark: $(BENCHMARK)
for m in $(BENCHMARK); do ./$$m; done
diff --git a/man/libgrapheme.sh b/man/libgrapheme.sh
index 040a659..37c2d7a 100644
--- a/man/libgrapheme.sh
+++ b/man/libgrapheme.sh
@@ -51,7 +51,7 @@ example illustrating the possible usage.
.Xr grapheme_next_word_break_utf8 3
.Sh STANDARDS
.Nm
-is compliant with the Unicode 14.0.0 specification.
+is compliant with the Unicode $UNICODE_VERSION specification.
.Sh MOTIVATION
The idea behind every character encoding scheme like ASCII or Unicode
is to express abstract characters (which can be thought of as shapes
diff --git a/man/template/to_case.sh b/man/template/to_case.sh
index eb12764..30951bb 100644
--- a/man/template/to_case.sh
+++ b/man/template/to_case.sh
@@ -52,7 +52,7 @@ is
.Xr libgrapheme 7
.Sh STANDARDS
.Fn grapheme_to_$CASE
-is compliant with the Unicode 14.0.0 specification.
+is compliant with the Unicode $UNICODE_VERSION specification.
.Sh AUTHORS
.An Laslo Hunhold Aq Mt [email protected]
EOF