commit c31ca4f0d107e505602fc746dd09001b33dd1811
Author: Laslo Hunhold <[email protected]>
AuthorDate: Sun Aug 28 14:59:24 2022 +0200
Commit: Laslo Hunhold <[email protected]>
CommitDate: Sun Aug 28 15:01:21 2022 +0200
Convert grapheme_next*() manuals to being templated-based
This removes a lot of redundancy and makes them much easier to
maintain.
Signed-off-by: Laslo Hunhold <[email protected]>
diff --git a/Makefile b/Makefile
index f61e50e..bbf6714 100644
--- a/Makefile
+++ b/Makefile
@@ -191,14 +191,14 @@ gen/word-test.h: data/WordBreakTest.txt gen/word-test
man/grapheme_decode_utf8.3: man/grapheme_decode_utf8.sh config.mk
man/grapheme_encode_utf8.3: man/grapheme_encode_utf8.sh config.mk
man/grapheme_is_character_break.3: man/grapheme_is_character_break.sh config.mk
-man/grapheme_next_character_break.3: man/grapheme_next_character_break.sh
config.mk
-man/grapheme_next_line_break.3: man/grapheme_next_line_break.sh config.mk
-man/grapheme_next_sentence_break.3: man/grapheme_next_sentence_break.sh
config.mk
-man/grapheme_next_word_break.3: man/grapheme_next_word_break.sh config.mk
-man/grapheme_next_character_break_utf8.3:
man/grapheme_next_character_break_utf8.sh config.mk
-man/grapheme_next_line_break_utf8.3: man/grapheme_next_line_break_utf8.sh
config.mk
-man/grapheme_next_sentence_break_utf8.3:
man/grapheme_next_sentence_break_utf8.sh config.mk
-man/grapheme_next_word_break_utf8.3: man/grapheme_next_word_break_utf8.sh
config.mk
+man/grapheme_next_character_break.3: man/grapheme_next_character_break.sh
man/template/next_break.sh config.mk
+man/grapheme_next_line_break.3: man/grapheme_next_line_break.sh
man/template/next_break.sh config.mk
+man/grapheme_next_sentence_break.3: man/grapheme_next_sentence_break.sh
man/template/next_break.sh config.mk
+man/grapheme_next_word_break.3: man/grapheme_next_word_break.sh
man/template/next_break.sh config.mk
+man/grapheme_next_character_break_utf8.3:
man/grapheme_next_character_break_utf8.sh man/template/next_break_utf8.sh
config.mk
+man/grapheme_next_line_break_utf8.3: man/grapheme_next_line_break_utf8.sh
man/template/next_break_utf8.sh config.mk
+man/grapheme_next_sentence_break_utf8.3:
man/grapheme_next_sentence_break_utf8.sh man/template/next_break_utf8.sh
config.mk
+man/grapheme_next_word_break_utf8.3: man/grapheme_next_word_break_utf8.sh
man/template/next_break_utf8.sh config.mk
man/grapheme_to_uppercase.3: man/grapheme_to_uppercase.sh
man/template/to_case.sh config.mk
man/grapheme_to_lowercase.3: man/grapheme_to_lowercase.sh
man/template/to_case.sh config.mk
man/grapheme_to_titlecase.3: man/grapheme_to_titlecase.sh
man/template/to_case.sh config.mk
diff --git a/man/grapheme_next_character_break.sh
b/man/grapheme_next_character_break.sh
index c3135e7..0d91dcc 100644
--- a/man/grapheme_next_character_break.sh
+++ b/man/grapheme_next_character_break.sh
@@ -1,57 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_CHARACTER_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_character_break
-.Nd determine codepoint-offset to next grapheme cluster break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_character_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_character_break
-function computes the offset (in codepoints) to the next grapheme
-cluster break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-If a grapheme cluster begins at
-.Va str
-this offset is equal to the length of said grapheme cluster.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_character_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_character_break
-function returns the offset (in codepoints) to the next grapheme cluster
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_character_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_character_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="character" \
+REALTYPE="grapheme cluster" \
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_character_break_utf8.sh
b/man/grapheme_next_character_break_utf8.sh
index 0bcf9ce..5a2ecb2 100644
--- a/man/grapheme_next_character_break_utf8.sh
+++ b/man/grapheme_next_character_break_utf8.sh
@@ -1,97 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_character_break_utf8
-.Nd determine byte-offset to next grapheme cluster break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_character_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_character_break_utf8
-function computes the offset (in bytes) to the next grapheme
-cluster break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If a grapheme cluster begins at
-.Va str
-this offset is equal to the length of said grapheme cluster.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_is_character_break 3
-and
-.Xr grapheme_next_character_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_character_break_utf8
-function returns the offset (in bytes) to the next grapheme cluster
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_character_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_is_character_break 3 ,
-.Xr grapheme_next_character_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_character_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="character" \
+REALTYPE="grapheme cluster" \
+ $SH man/template/next_break_utf8.sh
diff --git a/man/grapheme_next_line_break.sh b/man/grapheme_next_line_break.sh
index 7ef5fe8..fcd84e1 100644
--- a/man/grapheme_next_line_break.sh
+++ b/man/grapheme_next_line_break.sh
@@ -1,53 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_LINE_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_line_break
-.Nd determine codepoint-offset to next grapheme cluster break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_line_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_line_break
-function computes the offset (in codepoints) to the next possible line
-break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_line_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_line_break
-function returns the offset (in codepoints) to the next possible line
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_next_line_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_line_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="line" \
+REALTYPE="possible line" \
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_line_break_utf8.sh
b/man/grapheme_next_line_break_utf8.sh
index c4a1d42..c666f96 100644
--- a/man/grapheme_next_line_break_utf8.sh
+++ b/man/grapheme_next_line_break_utf8.sh
@@ -1,91 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_LINE_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_line_break_utf8
-.Nd determine byte-offset to next possible line break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_line_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_line_break_utf8
-function computes the offset (in bytes) to the next possible line
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_next_line_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_line_break_utf8
-function returns the offset (in bytes) to the next possible line
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_line_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_line_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_line_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_line_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="line" \
+REALTYPE="possible line" \
+ $SH man/template/next_break_utf8.sh
diff --git a/man/grapheme_next_sentence_break.sh
b/man/grapheme_next_sentence_break.sh
index 0366db6..2325a94 100644
--- a/man/grapheme_next_sentence_break.sh
+++ b/man/grapheme_next_sentence_break.sh
@@ -1,56 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_SENTENCE_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_sentence_break
-.Nd determine codepoint-offset to next sentence break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_sentence_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_sentence_break
-function computes the offset (in codepoints) to the next sentence
-break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-If a sentence begins at
-.Va str
-this offset is equal to the length of said sentence.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_sentence_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_sentence_break
-function returns the offset (in codepoints) to the next sentence
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_next_sentence_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_sentence_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="sentence" \
+REALTYPE="sentence" \
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_sentence_break_utf8.sh
b/man/grapheme_next_sentence_break_utf8.sh
index b9fc5de..1d8b3e7 100644
--- a/man/grapheme_next_sentence_break_utf8.sh
+++ b/man/grapheme_next_sentence_break_utf8.sh
@@ -1,94 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_SENTENCE_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_sentence_break_utf8
-.Nd determine byte-offset to next sentence break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_sentence_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_sentence_break_utf8
-function computes the offset (in bytes) to the next sentence
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If a sentence begins at
-.Va str
-this offset is equal to the length of said sentence.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_next_sentence_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_sentence_break_utf8
-function returns the offset (in bytes) to the next sentence
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_sentence_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_sentence_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_sentence_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_sentence_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="sentence" \
+REALTYPE="sentence" \
+ $SH man/template/next_break_utf8.sh
diff --git a/man/grapheme_next_word_break.sh b/man/grapheme_next_word_break.sh
index a72092a..d1ea725 100644
--- a/man/grapheme_next_word_break.sh
+++ b/man/grapheme_next_word_break.sh
@@ -1,56 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_WORD_BREAK 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_word_break
-.Nd determine codepoint-offset to next word break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_word_break "const uint_least32_t *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_word_break
-function computes the offset (in codepoints) to the next word
-break (see
-.Xr libgrapheme 7 )
-in the codepoint array
-.Va str
-of length
-.Va len .
-If a word begins at
-.Va str
-this offset is equal to the length of said word.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For UTF-8-encoded input data
-.Xr grapheme_next_word_break_utf8 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_word_break
-function returns the offset (in codepoints) to the next word
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh SEE ALSO
-.Xr grapheme_next_word_break_utf8 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_word_break
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="word" \
+REALTYPE="word" \
+ $SH man/template/next_break.sh
diff --git a/man/grapheme_next_word_break_utf8.sh
b/man/grapheme_next_word_break_utf8.sh
index 772fe71..15643bf 100644
--- a/man/grapheme_next_word_break_utf8.sh
+++ b/man/grapheme_next_word_break_utf8.sh
@@ -1,94 +1,3 @@
-cat << EOF
-.Dd 2022-08-26
-.Dt GRAPHEME_NEXT_WORD_BREAK_UTF8 3
-.Os suckless.org
-.Sh NAME
-.Nm grapheme_next_word_break_utf8
-.Nd determine byte-offset to next word break
-.Sh SYNOPSIS
-.In grapheme.h
-.Ft size_t
-.Fn grapheme_next_word_break_utf8 "const char *str" "size_t len"
-.Sh DESCRIPTION
-The
-.Fn grapheme_next_word_break_utf8
-function computes the offset (in bytes) to the next word
-break (see
-.Xr libgrapheme 7 )
-in the UTF-8-encoded string
-.Va str
-of length
-.Va len .
-If a word begins at
-.Va str
-this offset is equal to the length of said word.
-.Pp
-If
-.Va len
-is set to
-.Dv SIZE_MAX
-(stdint.h is already included by grapheme.h) the string
-.Va str
-is interpreted to be NUL-terminated and processing stops when a
-NUL-byte is encountered.
-.Pp
-For non-UTF-8 input data
-.Xr grapheme_next_word_break 3
-can be used instead.
-.Sh RETURN VALUES
-The
-.Fn grapheme_next_word_break_utf8
-function returns the offset (in bytes) to the next word
-break in
-.Va str
-or 0 if
-.Va str
-is
-.Dv NULL .
-.Sh EXAMPLES
-.Bd -literal
-/* cc (-static) -o example example.c -lgrapheme */
-#include <grapheme.h>
-#include <stdint.h>
-#include <stdio.h>
-
-int
-main(void)
-{
- /* UTF-8 encoded input */
- char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
- "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
- "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
- "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
- size_t ret, len, off;
-
- printf("Input: \\"%s\\"\\n", s);
-
- /* print each grapheme cluster with byte-length */
- printf("Grapheme clusters in NUL-delimited input:\\n");
- for (off = 0; s[off] != '\\0'; off += ret) {
- ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
- printf("\\n");
-
- /* do the same, but this time string is length-delimited */
- len = 17;
- printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
- for (off = 0; off < len; off += ret) {
- ret = grapheme_next_word_break_utf8(s + off, len - off);
- printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
- }
-
- return 0;
-}
-.Ed
-.Sh SEE ALSO
-.Xr grapheme_next_word_break 3 ,
-.Xr libgrapheme 7
-.Sh STANDARDS
-.Fn grapheme_next_word_break_utf8
-is compliant with the Unicode 14.0.0 specification.
-.Sh AUTHORS
-.An Laslo Hunhold Aq Mt [email protected]
-EOF
+TYPE="word" \
+REALTYPE="word" \
+ $SH man/template/next_break_utf8.sh
diff --git a/man/template/next_break.sh b/man/template/next_break.sh
new file mode 100644
index 0000000..143d8e6
--- /dev/null
+++ b/man/template/next_break.sh
@@ -0,0 +1,56 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_$(printf $TYPE | tr [:lower:] [:upper:])_BREAK 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_$(printf $TYPE)_break
+.Nd determine codepoint-offset to next $REALTYPE break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_$(printf $TYPE)_break "const uint_least32_t *str" "size_t
len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_$(printf $TYPE)_break
+function computes the offset (in codepoints) to the next $REALTYPE
+break (see
+.Xr libgrapheme 7 )
+in the codepoint array
+.Va str
+of length
+.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a $REALTYPE begins at
+.Va str
+this offset is equal to the length of said $REALTYPE."; fi)
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For UTF-8-encoded input data
+.Xr grapheme_next_$(printf $TYPE)_break_utf8 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_$(printf $TYPE)_break
+function returns the offset (in codepoints) to the next $REALTYPE
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh SEE ALSO
+.Xr grapheme_is_character_break 3 ,
+.Xr grapheme_next_$(printf $TYPE)_break_utf8 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_$(printf $TYPE)_break
+is compliant with the Unicode $UNICODE_VERSION specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt [email protected]
+EOF
diff --git a/man/template/next_break_utf8.sh b/man/template/next_break_utf8.sh
new file mode 100644
index 0000000..cc14965
--- /dev/null
+++ b/man/template/next_break_utf8.sh
@@ -0,0 +1,96 @@
+cat << EOF
+.Dd 2022-08-26
+.Dt GRAPHEME_NEXT_$(printf $TYPE | tr [:lower:] [:upper:])_BREAK_UTF8 3
+.Os suckless.org
+.Sh NAME
+.Nm grapheme_next_$(printf $TYPE)_break_utf8
+.Nd determine byte-offset to next $REALTYPE break
+.Sh SYNOPSIS
+.In grapheme.h
+.Ft size_t
+.Fn grapheme_next_$(printf $TYPE)_break_utf8 "const char *str" "size_t len"
+.Sh DESCRIPTION
+The
+.Fn grapheme_next_$(printf $TYPE)_break_utf8
+function computes the offset (in bytes) to the next $REALTYPE
+break (see
+.Xr libgrapheme 7 )
+in the UTF-8-encoded string
+.Va str
+of length
+.Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a $REALTYPE begins at
+.Va str
+this offset is equal to the length of said $REALTYPE."; fi)
+.Pp
+If
+.Va len
+is set to
+.Dv SIZE_MAX
+(stdint.h is already included by grapheme.h) the string
+.Va str
+is interpreted to be NUL-terminated and processing stops when a
+NUL-byte is encountered.
+.Pp
+For non-UTF-8 input data$(if [ "$TYPE" = "character" ];
+then printf "\n.Xr grapheme_is_character_break 3
+and"; fi)
+.Xr grapheme_next_$(printf $TYPE)_break 3
+can be used instead.
+.Sh RETURN VALUES
+The
+.Fn grapheme_next_$(printf $TYPE)_break_utf8
+function returns the offset (in bytes) to the next $REALTYPE
+break in
+.Va str
+or 0 if
+.Va str
+is
+.Dv NULL .
+.Sh EXAMPLES
+.Bd -literal
+/* cc (-static) -o example example.c -lgrapheme */
+#include <grapheme.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int
+main(void)
+{
+ /* UTF-8 encoded input */
+ char *s = "T\\\\xC3\\\\xABst
\\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
+
"\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
+ "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8
\\\\xE0\\\\xA4\\\\xA8\\\\xE0"
+ "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
+ size_t ret, len, off;
+
+ printf("Input: \\\\"%s\\\\"\\\\n", s);
+
+ /* print each $REALTYPE with byte-length */
+ printf("$(printf "$REALTYPE")s in NUL-delimited input:\\\\n");
+ for (off = 0; s[off] != '\\\\0'; off += ret) {
+ ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off,
SIZE_MAX);
+ printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret);
+ }
+ printf("\\\\n");
+
+ /* do the same, but this time string is length-delimited */
+ len = 17;
+ printf("$(printf "$REALTYPE")s in input delimited to %zu bytes:\\\\n",
len);
+ for (off = 0; off < len; off += ret) {
+ ret = grapheme_next_$(printf $TYPE)_break_utf8(s + off, len -
off);
+ printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off, ret);
+ }
+
+ return 0;
+}
+.Ed
+.Sh SEE ALSO$(if [ "$TYPE" = "character" ];
+then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
+.Xr grapheme_next_$(printf $TYPE)_break 3 ,
+.Xr libgrapheme 7
+.Sh STANDARDS
+.Fn grapheme_next_$(printf $TYPE)_break_utf8
+is compliant with the Unicode 14.0.0 specification.
+.Sh AUTHORS
+.An Laslo Hunhold Aq Mt [email protected]
+EOF