commit 8a7e2ee85f0a2824e48e85e57534c5b18113cf07
Author:     Laslo Hunhold <[email protected]>
AuthorDate: Sat Sep 24 01:54:52 2022 +0200
Commit:     Laslo Hunhold <[email protected]>
CommitDate: Sat Sep 24 01:54:52 2022 +0200

    Compile the library in freestanding mode
    
    Looking closely, we never explicitly depend on the standard library
    within the actual library code. This can be explicitly expressed by
    setting -ffreestanding during object-compilation and -nostdlib during
    linking. The result is a clean library with zero libc-symbols, allowing
    it to be used even without an operating system (kernel code, ELF,
    etc.), by making use of the freestanding implementation form defined
    in the standard[0].
    
    To be freestanding, the code may only include <float.h>, <iso646.h>,
    <limits.h>, <stdalign.h>, <stdarg.h>, <stdbool.h>, <stddef.h>,
    <stdint.h> and <stdnoreturn.h>. We satisfy this condition implictly,
    but there are some erroneous supplementary includes that are removed
    in this commit. Additionally, the strict compiler-implementation simply
    adds the U-prefix to the argument of UINT16_C (et. al.), which is why
    calls to it have to be changed to really include only constants.
    
    [0]:https://www.iso-9899.info/n1570.html#4.p6
    
    Signed-off-by: Laslo Hunhold <[email protected]>

diff --git a/config.mk b/config.mk
index 38b0b4b..14aedd3 100644
--- a/config.mk
+++ b/config.mk
@@ -15,8 +15,8 @@ BUILD_CPPFLAGS = $(CPPFLAGS)
 BUILD_CFLAGS   = $(CFLAGS)
 BUILD_LDFLAGS  = $(LDFLAGS)
 
-SHFLAGS  = -fPIC
-SOFLAGS  = -shared -Wl,--soname=libgrapheme.so
+SHFLAGS  = -fPIC -ffreestanding
+SOFLAGS  = -shared -nostdlib -Wl,--soname=libgrapheme.so
 
 # tools
 CC       = cc
diff --git a/src/character.c b/src/character.c
index 78da33f..4a0a05e 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1,8 +1,7 @@
 /* See LICENSE file for copyright and license details. */
+#include <limits.h>
 #include <stdbool.h>
 #include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
 
 #include "../gen/character.h"
 #include "../grapheme.h"
@@ -10,96 +9,96 @@
 
 static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
        [CHAR_BREAK_PROP_OTHER] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_CR] =
-               UINT16_C(1 << CHAR_BREAK_PROP_LF),            /* GB3  */
+               UINT16_C(1) << CHAR_BREAK_PROP_LF,            /* GB3  */
        [CHAR_BREAK_PROP_EXTEND] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_HANGUL_L] =
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_L)     | /* GB6  */
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB6  */
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LV)    | /* GB6  */
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LVT)   | /* GB6  */
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L     | /* GB6  */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB6  */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV    | /* GB6  */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT   | /* GB6  */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_HANGUL_V] =
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB7  */
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB7  */
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB7  */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB7  */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_HANGUL_T] =
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB8  */
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB8  */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_HANGUL_LV] =
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB7  */
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB7  */
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB7  */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB7  */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_HANGUL_LVT] =
-               UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB8  */
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB8  */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_PREPEND] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK)  | /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK  | /* GB9a */
                (UINT16_C(0xFFFF) &
-                ~(UINT16_C(1 << CHAR_BREAK_PROP_CR)      |
-                  UINT16_C(1 << CHAR_BREAK_PROP_LF)      |
-                  UINT16_C(1 << CHAR_BREAK_PROP_CONTROL)
+                ~(UINT16_C(1) << CHAR_BREAK_PROP_CR      |
+                  UINT16_C(1) << CHAR_BREAK_PROP_LF      |
+                  UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
                  )
                ),                                           /* GB9b */
        [CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_SPACINGMARK] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        [CHAR_BREAK_PROP_ZWJ] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
-               UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
+               UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
 };
 static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
        [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)                   |
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ                   |
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
        [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
        [CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)                |
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ),
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND                |
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
        [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
-               UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)                   |
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
+               UINT16_C(1) << CHAR_BREAK_PROP_ZWJ                   |
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
 };
 static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
        [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
-               UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
+               UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
 };
 static const uint_least16_t flag_update_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
        [CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
-               UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
+               UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
 };
 static const uint_least16_t dont_break_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
        [CHAR_BREAK_PROP_REGIONAL_INDICATOR + NUM_CHAR_BREAK_PROPS] =
-               UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
+               UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
 };
 
 static inline enum char_break_property
@@ -135,23 +134,23 @@ grapheme_is_character_break(uint_least32_t cp0, 
uint_least32_t cp1, GRAPHEME_STA
                state->gb11_flag =
                        flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
                                         state->gb11_flag] &
-                       UINT16_C(1 << cp1_prop);
+                       UINT16_C(1) << cp1_prop;
                state->gb12_13_flag =
                        flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
                                            state->gb12_13_flag] &
-                       UINT16_C(1 << cp1_prop);
+                       UINT16_C(1) << cp1_prop;
 
                /*
                 * Apply grapheme cluster breaking algorithm (UAX #29), see
                 * 
http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
                 */
-               notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
+               notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
                           (dont_break_gb11[cp0_prop + state->gb11_flag *
                                            NUM_CHAR_BREAK_PROPS] &
-                           UINT16_C(1 << cp1_prop)) ||
+                           (UINT16_C(1) << cp1_prop)) ||
                           (dont_break_gb12_13[cp0_prop + state->gb12_13_flag *
                                               NUM_CHAR_BREAK_PROPS] &
-                           UINT16_C(1 << cp1_prop));
+                           (UINT16_C(1) << cp1_prop));
 
                /* update or reset flags (when we have a break) */
                if (likely(!notbreak)) {
@@ -168,9 +167,9 @@ grapheme_is_character_break(uint_least32_t cp0, 
uint_least32_t cp1, GRAPHEME_STA
                 * Given we have no state, this behaves as if the state-booleans
                 * were all set to false
                 */
-               notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
-                          (dont_break_gb11[cp0_prop] & UINT16_C(1 << 
cp1_prop)) ||
-                          (dont_break_gb12_13[cp0_prop] & UINT16_C(1 << 
cp1_prop));
+               notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
+                          (dont_break_gb11[cp0_prop] & (UINT16_C(1) << 
cp1_prop)) ||
+                          (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << 
cp1_prop));
        }
 
        return !notbreak;
diff --git a/src/line.c b/src/line.c
index 051e152..6dbb217 100644
--- a/src/line.c
+++ b/src/line.c
@@ -1,8 +1,6 @@
 /* See LICENSE file for copyright and license details. */
 #include <stdbool.h>
 #include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
 
 #include "../gen/line.h"
 #include "../grapheme.h"
diff --git a/src/sentence.c b/src/sentence.c
index d464edc..c302747 100644
--- a/src/sentence.c
+++ b/src/sentence.c
@@ -1,8 +1,6 @@
 /* See LICENSE file for copyright and license details. */
 #include <stdbool.h>
 #include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
 
 #include "../gen/sentence.h"
 #include "../grapheme.h"
diff --git a/src/utf8.c b/src/utf8.c
index 46db763..0cada62 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,5 +1,6 @@
 /* See LICENSE file for copyright and license details. */
-#include <stdio.h>
+#include <stddef.h>
+#include <stdint.h>
 
 #include "../grapheme.h"
 #include "util.h"
diff --git a/src/util.c b/src/util.c
index 373d91e..2a2b7d0 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,7 +1,8 @@
 /* See LICENSE file for copyright and license details. */
+#include <limits.h>
 #include <stdbool.h>
+#include <stddef.h>
 #include <stdint.h>
-#include <stdlib.h>
 
 #include "../gen/types.h"
 #include "../grapheme.h"
@@ -88,6 +89,12 @@ herodotus_reader_next_codepoint_break(const HERODOTUS_READER 
*r)
        }
 }
 
+size_t
+herodotus_reader_number_read(const HERODOTUS_READER *r)
+{
+       return r->off;
+}
+
 enum herodotus_status
 herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
 {
@@ -202,7 +209,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
 }
 
 size_t
-herodotus_writer_number_written(HERODOTUS_WRITER *w)
+herodotus_writer_number_written(const HERODOTUS_WRITER *w)
 {
        return w->off;
 }
diff --git a/src/word.c b/src/word.c
index dffb5b5..91e1c31 100644
--- a/src/word.c
+++ b/src/word.c
@@ -1,8 +1,6 @@
 /* See LICENSE file for copyright and license details. */
 #include <stdbool.h>
 #include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
 
 #include "../gen/word.h"
 #include "../grapheme.h"

Reply via email to