commit aa5dda2687c4907d6a47e57b1d7973b8f9d158ae
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Tue Aug 16 16:25:31 2022 +0200
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Tue Aug 16 16:25:31 2022 +0200

    Move get_codepoint_*()-util-functions to src/util.c
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/src/case.c b/src/case.c
index abafe78..21ec5af 100644
--- a/src/case.c
+++ b/src/case.c
@@ -32,68 +32,6 @@ get_case_offset(uint_least32_t cp, const uint_least16_t 
*major,
        }
 }
 
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
-       if (offset < len) {
-               *cp = ((const uint_least32_t *)str)[offset];
-               return 1;
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t 
*cp)
-{
-       size_t ret;
-
-       if (offset < len) {
-               ret = grapheme_decode_utf8((const char *)str + offset,
-                                          len - offset, cp);
-
-               if (unlikely(len == SIZE_MAX && cp == 0)) {
-                       return 0;
-               } else {
-                       return ret;
-               }
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
-static inline size_t
-set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
-{
-       if (str == NULL || len == 0) {
-               return 1;
-       }
-
-       if (offset < len) {
-               ((uint_least32_t *)str)[offset] = cp;
-               return 1;
-       } else {
-               return 0;
-       }
-}
-
-static inline size_t
-set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
-{
-       if (str == NULL || len == 0) {
-               return grapheme_encode_utf8(cp, NULL, 0);
-       }
-
-       if (offset < len) {
-               return grapheme_encode_utf8(cp, (char *)str + offset,
-                                           len - offset);
-       } else {
-               return grapheme_encode_utf8(cp, NULL, 0);
-       }
-}
-
 static inline size_t
 to_case(const void *src, size_t srclen, void *dest, size_t destlen,
         size_t srcnumprocess, uint_least8_t final_sigma_level,
diff --git a/src/line.c b/src/line.c
index 2a5623d..bced2f2 100644
--- a/src/line.c
+++ b/src/line.c
@@ -19,30 +19,6 @@ get_break_prop(uint_least32_t cp)
        }
 }
 
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
-       if (offset < len) {
-               *cp = ((const uint_least32_t *)str)[offset];
-               return 1;
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t 
*cp)
-{
-       if (offset < len) {
-               return grapheme_decode_utf8((const char *)str + offset,
-                                           len - offset, cp);
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
 static size_t
 next_line_break(const void *str, size_t len, size_t (*get_codepoint)
                 (const void *, size_t, size_t, uint_least32_t *))
@@ -152,7 +128,9 @@ next_line_break(const void *str, size_t len, size_t 
(*get_codepoint)
                         *     and one (CL | CP) to the left of the middle
                         *     spot
                         */
-                       if (lb25_level == 0 && cp0_prop == LINE_BREAK_PROP_NU) {
+                       if ((lb25_level == 0 ||
+                            lb25_level == 1) &&
+                           cp0_prop == LINE_BREAK_PROP_NU) {
                                /* sequence has begun */
                                lb25_level = 1;
                        } else if ((lb25_level == 1 || lb25_level == 2) &&
diff --git a/src/sentence.c b/src/sentence.c
index b26e42a..a5850ec 100644
--- a/src/sentence.c
+++ b/src/sentence.c
@@ -20,30 +20,6 @@ get_break_prop(uint_least32_t cp)
        }
 }
 
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
-       if (offset < len) {
-               *cp = ((const uint_least32_t *)str)[offset];
-               return 1;
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t 
*cp)
-{
-       if (offset < len) {
-               return grapheme_decode_utf8((const char *)str + offset,
-                                           len - offset, cp);
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
 static size_t
 next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
                     (const void *, size_t, size_t, uint_least32_t *))
@@ -142,7 +118,8 @@ next_sentence_break(const void *str, size_t len, size_t 
(*get_codepoint)
                         *     left of the middle spot.
                         *
                         */
-                       if (aterm_close_sp_level == 0 &&
+                       if ((aterm_close_sp_level == 0 ||
+                            aterm_close_sp_level == 1) &&
                            skip.b == SENTENCE_BREAK_PROP_ATERM) {
                                /* sequence has begun */
                                aterm_close_sp_level = 1;
@@ -162,7 +139,8 @@ next_sentence_break(const void *str, size_t len, size_t 
(*get_codepoint)
                                aterm_close_sp_level = 0;
                        }
 
-                       if (saterm_close_sp_parasep_level == 0 &&
+                       if ((saterm_close_sp_parasep_level == 0 ||
+                            saterm_close_sp_parasep_level == 1) &&
                            (skip.b == SENTENCE_BREAK_PROP_STERM ||
                             skip.b == SENTENCE_BREAK_PROP_ATERM)) {
                                /* sequence has begun */
diff --git a/src/util.c b/src/util.c
index 7b8e176..8a1dfc9 100644
--- a/src/util.c
+++ b/src/util.c
@@ -6,3 +6,65 @@
 #include "../gen/types.h"
 #include "../grapheme.h"
 #include "util.h"
+
+inline size_t
+get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
+{
+       if (offset < len) {
+               *cp = ((const uint_least32_t *)str)[offset];
+               return 1;
+       } else {
+               *cp = GRAPHEME_INVALID_CODEPOINT;
+               return 0;
+       }
+}
+
+inline size_t
+get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t 
*cp)
+{
+       size_t ret;
+
+       if (offset < len) {
+               ret = grapheme_decode_utf8((const char *)str + offset,
+                                          len - offset, cp);
+
+               if (unlikely(len == SIZE_MAX && cp == 0)) {
+                       return 0;
+               } else {
+                       return ret;
+               }
+       } else {
+               *cp = GRAPHEME_INVALID_CODEPOINT;
+               return 0;
+       }
+}
+
+inline size_t
+set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
+{
+       if (str == NULL || len == 0) {
+               return 1;
+       }
+
+       if (offset < len) {
+               ((uint_least32_t *)str)[offset] = cp;
+               return 1;
+       } else {
+               return 0;
+       }
+}
+
+inline size_t
+set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
+{
+       if (str == NULL || len == 0) {
+               return grapheme_encode_utf8(cp, NULL, 0);
+       }
+
+       if (offset < len) {
+               return grapheme_encode_utf8(cp, (char *)str + offset,
+                                           len - offset);
+       } else {
+               return grapheme_encode_utf8(cp, NULL, 0);
+       }
+}
diff --git a/src/util.h b/src/util.h
index b61a026..edac8b5 100644
--- a/src/util.h
+++ b/src/util.h
@@ -25,4 +25,10 @@
        #define unlikely(expr) (expr)
 #endif
 
+size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *);
+size_t get_codepoint_utf8(const void *, size_t, size_t, uint_least32_t *);
+
+size_t set_codepoint(uint_least32_t, void *, size_t, size_t);
+size_t set_codepoint_utf8(uint_least32_t, void *, size_t, size_t);
+
 #endif /* UTIL_H */
diff --git a/src/word.c b/src/word.c
index 4e7c411..e8e9b44 100644
--- a/src/word.c
+++ b/src/word.c
@@ -19,30 +19,6 @@ get_break_prop(uint_least32_t cp)
        }
 }
 
-static inline size_t
-get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
-{
-       if (offset < len) {
-               *cp = ((const uint_least32_t *)str)[offset];
-               return 1;
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
-static inline size_t
-get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t 
*cp)
-{
-       if (offset < len) {
-               return grapheme_decode_utf8((const char *)str + offset,
-                                           len - offset, cp);
-       } else {
-               *cp = GRAPHEME_INVALID_CODEPOINT;
-               return 0;
-       }
-}
-
 static size_t
 next_word_break(const void *str, size_t len, size_t (*get_codepoint)
                 (const void *, size_t, size_t, uint_least32_t *))

Reply via email to