Gitweb links:

...log 
http://git.netsurf-browser.org/librufl.git/shortlog/38ed0277db3d0f20a1ff677e76eef8dece770dbc
...commit 
http://git.netsurf-browser.org/librufl.git/commit/38ed0277db3d0f20a1ff677e76eef8dece770dbc
...tree 
http://git.netsurf-browser.org/librufl.git/tree/38ed0277db3d0f20a1ff677e76eef8dece770dbc

The branch, jmb/ac has been created
        at  38ed0277db3d0f20a1ff677e76eef8dece770dbc (commit)

- Log -----------------------------------------------------------------
commitdiff 
http://git.netsurf-browser.org/librufl.git/commit/?id=38ed0277db3d0f20a1ff677e76eef8dece770dbc
commit 38ed0277db3d0f20a1ff677e76eef8dece770dbc
Author: John-Mark Bell <[email protected]>
Commit: John-Mark Bell <[email protected]>

    Pave the way for astral character support.
    
    No functional change, but redefine the meaning of the old "size"
    member of the rufl_character_set structure to allow for the
    addition of extension structures in future. This change is
    backwards compatible as it is reusing previously unused bits in
    the size field (which will be set to zero in all existing
    RUfl_caches). Rename the "size" field to "metadata" which better
    reflects its new usage.
    
    Update rufl_character_set_test to follow this change (and fix up
    its parameter types while we're here).

diff --git a/src/rufl_character_set_test.c b/src/rufl_character_set_test.c
index 45fbcaf..2e97894 100644
--- a/src/rufl_character_set_test.c
+++ b/src/rufl_character_set_test.c
@@ -12,18 +12,28 @@
  * Test if a character set contains a character.
  *
  * \param  charset  character set
- * \param  c        character code
+ * \param  u        Unicode codepoint
  * \return  true if present, false if absent
  */
 
-bool rufl_character_set_test(struct rufl_character_set *charset,
-               unsigned int c)
+bool rufl_character_set_test(const struct rufl_character_set *charset,
+               uint32_t u)
 {
-       unsigned int block = c >> 8;
-       unsigned int byte = (c >> 3) & 31;
-       unsigned int bit = c & 7;
+       unsigned int plane = u >> 16;
+       unsigned int block = (u >> 8) & 0xff;
+       unsigned int byte = (u >> 3) & 31;
+       unsigned int bit = u & 7;
 
-       if (256 <= block)
+       if (17 <= plane)
+               return false;
+
+       /* Look for the plane we want */
+       while (PLANE_ID(charset->metadata) != plane &&
+                       EXTENSION_FOLLOWS(charset->metadata)) {
+               charset = (void *)(((uint8_t *)charset) +
+                               PLANE_SIZE(charset->metadata));
+       }
+       if (PLANE_ID(charset->metadata) != plane)
                return false;
 
        if (charset->index[block] == BLOCK_EMPTY)
@@ -31,7 +41,7 @@ bool rufl_character_set_test(struct rufl_character_set 
*charset,
        else if (charset->index[block] == BLOCK_FULL)
                return true;
        else {
-               unsigned char z = charset->block[charset->index[block]][byte];
+               uint8_t z = charset->block[charset->index[block]][byte];
                return z & (1 << bit);
        }
 }
diff --git a/src/rufl_init.c b/src/rufl_init.c
index b441edc..3ae4ffa 100644
--- a/src/rufl_init.c
+++ b/src/rufl_init.c
@@ -575,9 +575,9 @@ rufl_code rufl_init_scan_font(unsigned int font_index)
        }
 
        /* shrink-wrap */
-       charset->size = offsetof(struct rufl_character_set, block) +
+       charset->metadata = offsetof(struct rufl_character_set, block) +
                        32 * last_used;
-       charset2 = realloc(charset, charset->size);
+       charset2 = realloc(charset, PLANE_SIZE(charset->metadata));
        if (!charset2) {
                free(charset);
                return rufl_OUT_OF_MEMORY;
@@ -696,9 +696,9 @@ rufl_code rufl_init_scan_font_no_enumerate(unsigned int 
font_index)
        }
 
        /* shrink-wrap */
-       charset->size = offsetof(struct rufl_character_set, block) +
+       charset->metadata = offsetof(struct rufl_character_set, block) +
                        32 * last_used;
-       charset2 = realloc(charset, charset->size);
+       charset2 = realloc(charset, PLANE_SIZE(charset->metadata));
        if (!charset2) {
                free(charset);
                return rufl_OUT_OF_MEMORY;
@@ -885,9 +885,9 @@ rufl_code rufl_init_scan_font_old(unsigned int font_index)
        }
 
        /* shrink-wrap */
-       charset->size = offsetof(struct rufl_character_set, block) +
+       charset->metadata = offsetof(struct rufl_character_set, block) +
                        32 * last_used;
-       charset2 = realloc(charset, charset->size);
+       charset2 = realloc(charset, PLANE_SIZE(charset->metadata));
        if (!charset2) {
                for (i = 0; i < num_umaps; i++)
                        free((umap + i)->encoding);
@@ -1255,7 +1255,8 @@ rufl_code rufl_save_cache(void)
 
                /* character set */
                if (fwrite(rufl_font_list[i].charset,
-                               rufl_font_list[i].charset->size, 1, fp) != 1) {
+                               PLANE_SIZE(rufl_font_list[i].charset->metadata),
+                               1, fp) != 1) {
                        LOG("fwrite: 0x%x: %s", errno, strerror(errno));
                        fclose(fp);
                        return rufl_OK;
@@ -1430,7 +1431,7 @@ rufl_code rufl_load_cache(void)
                        return rufl_OUT_OF_MEMORY;
                }
 
-               charset->size = size;
+               charset->metadata = size;
                if (fread(charset->index, size - sizeof size, 1, fp) != 1) {
                        if (feof(fp))
                                LOG("fread: %s", "unexpected eof");
diff --git a/src/rufl_internal.h b/src/rufl_internal.h
index 7d793a1..9c7d46e 100644
--- a/src/rufl_internal.h
+++ b/src/rufl_internal.h
@@ -14,20 +14,78 @@
 #endif
 
 
-/** The available characters in a font. The range which can be represented is
- * 0x0000 to 0xffff. The size of the structure is 4 + 256 + 32 * blocks. A
- * typical * 200 glyph font might have characters in 10 blocks, giving 580
- * bytes. The maximum possible size of the structure is 8388 bytes. Note that
- * since two index values are reserved, fonts with 65280-65024 glyphs may be
- * unrepresentable, if there are no full blocks. This is unlikely. The primary
- * aim of this structure is to make lookup fast. */
+/**
+ * The available Unicode codepoints represented by a font. The entire Unicode
+ * range (U+0000 - U+10FFFF) may be covered by the font, but only codepoints
+ * in the Basic Multilingual Plane (i.e. U+0000 - U+FFFF) can be represented
+ * without the need for extension structures.
+ *
+ * Fonts which provide glyphs for astral characters will set the extension
+ * bit in the structure size field. If set, this indicates that an additional
+ * character set structure follows immediately after this one. The plane id
+ * field in the structure metadata indicates which plane the structure relates
+ * to. Planes are specified in ascending order (as the most commonly used
+ * codepoints occur in earlier planes). Planes for which the font has no
+ * glyphs are omitted entirely.
+ *
+ * Each plane is subdivided into 256 codepoint blocks (each block representing
+ * 256 contiguous codepoints). Note, however, that two index values are
+ * reserved (to indicate full or empty blocks) so only 254 partial blocks may
+ * be represented. As of Unicode 13, all planes have at least two blocks
+ * unused (or, in the case of the surrogate ranges in the Basic Multilingual
+ * Plane, defined as containing no characters), so all valid codepoints should
+ * be representable using this scheme.
+ *
+ * The size of the structure is 4 + 256 + 32 * blocks. A typical 200 glyph
+ * font might represent codepoints in 10 blocks, using 580 bytes of storage.
+ * A plane with glyphs in every block (but no block fully populated) requires
+ * the maximum possible structure size of (4 + 256 + 32 * 254 =) 8388 bytes.
+ * The maximum storage required for (the unlikely scenario of) a font
+ * providing glyphs in every block in each of the 17 Unicode planes is
+ * 17 * 8388 = 142596 bytes.
+ *
+ * The primary aim of this structure is to make lookup fast.
+ */
 struct rufl_character_set {
-       /** Size of structure / bytes. */
-       size_t size;
+       /** Structure metadata.
+        *
+        * This field contains metadata about the structure in the form:
+        *
+        *    3                   2                   1                   0
+        *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+        * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+        * |E|   PID   |     Reserved      |             Size              |
+        * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+        *
+        * where:
+        *
+        *   extension (E): 1 bit
+        *     If set, another character set covering a different plane
+        *     follows.
+        *
+        *   plane id (PID): 5 bits
+        *     The 0-based index of the Unicode plane this structure relates
+        *     to. Valid values are in the range [0, 16], where 0 represents
+        *     the Basic Multilingual Plane, and 16 represents the
+        *     Supplementary Private Use Area - B.
+        *
+        *   reserved: 10 bits
+        *     These bits are currently unused and must be set to 0.
+        *
+        *   size: 16 bits
+        *     The total size of this structure, in bytes.
+        */
+       uint32_t metadata;
+#      define EXTENSION_FOLLOWS(x) ((x) & (1u<<31))
+#      define PLANE_ID(x) (((x) >> 26) & 0x1f)
+#      define PLANE_SIZE(x) ((x) & 0xffff)
 
-       /** Index table. Each entry represents a block of 256 characters, so
-        * i[k] refers to characters [256*k, 256*(k+1)). The value is either
-        * BLOCK_EMPTY, BLOCK_FULL, or an offset into the block table. */
+       /** Index table.
+        *
+        * Each entry represents a block of 256 codepoints, so i[k] refers
+        * to codepoints [256*k, 256*(k+1)). The value is either BLOCK_EMPTY,
+        * BLOCK_FULL, or an offset into the block table.
+        * */
        uint8_t index[256];
        /** The block has no characters present. */
 #      define BLOCK_EMPTY 254
@@ -142,8 +200,8 @@ rufl_code rufl_find_font_family(const char *family, 
rufl_style font_style,
                struct rufl_character_set **charset);
 rufl_code rufl_find_font(unsigned int font, unsigned int font_size,
                const char *encoding, font_f *fhandle);
-bool rufl_character_set_test(struct rufl_character_set *charset,
-               unsigned int c);
+bool rufl_character_set_test(const struct rufl_character_set *charset,
+               uint32_t u);
 
 
 #define rufl_utf8_read(s, l, u)                                                
       \


commitdiff 
http://git.netsurf-browser.org/librufl.git/commit/?id=b7d315249f56dffa626354a4feaea5135f930e8a
commit b7d315249f56dffa626354a4feaea5135f930e8a
Author: John-Mark Bell <[email protected]>
Commit: John-Mark Bell <[email protected]>

    Use types with explicit sizes

diff --git a/src/rufl_internal.h b/src/rufl_internal.h
index 711c5ae..7d793a1 100644
--- a/src/rufl_internal.h
+++ b/src/rufl_internal.h
@@ -5,6 +5,7 @@
  * Copyright 2006 James Bursa <[email protected]>
  */
 
+#include <inttypes.h>
 #include <limits.h>
 #include <oslib/font.h>
 #include "rufl.h"
@@ -27,7 +28,7 @@ struct rufl_character_set {
        /** Index table. Each entry represents a block of 256 characters, so
         * i[k] refers to characters [256*k, 256*(k+1)). The value is either
         * BLOCK_EMPTY, BLOCK_FULL, or an offset into the block table. */
-       unsigned char index[256];
+       uint8_t index[256];
        /** The block has no characters present. */
 #      define BLOCK_EMPTY 254
        /** All characters in the block are present. */
@@ -35,16 +36,16 @@ struct rufl_character_set {
 
        /** Block table. Each entry is a 256-bit bitmap indicating which
         * characters in the block are present and absent. */
-       unsigned char block[254][32];
+       uint8_t block[254][32];
 };
 
 
 /** Part of struct rufl_unicode_map. */
 struct rufl_unicode_map_entry {
-       /** Unicode value. */
-       unsigned short u;
+       /** Unicode value (must be in Basic Multilingual Plane). */
+       uint16_t u;
        /** Corresponding character. */
-       unsigned char c;
+       uint8_t c;
 };
 
 
@@ -181,7 +182,8 @@ bool rufl_character_set_test(struct rufl_character_set 
*charset,
 
 struct rufl_glyph_map_entry {
        const char *glyph_name;
-       unsigned short u;
+       /* The glyph map contains codepoints in the BMP only */
+       uint16_t u;
 };
 
 extern const struct rufl_glyph_map_entry rufl_glyph_map[];


commitdiff 
http://git.netsurf-browser.org/librufl.git/commit/?id=d8a8a3cc929986ed793b24f7cac1727f1f356bc9
commit d8a8a3cc929986ed793b24f7cac1727f1f356bc9
Author: John-Mark Bell <[email protected]>
Commit: John-Mark Bell <[email protected]>

    Need OSLib when building tests for RISC OS
    
    More fallout from the ancient BUILD/HOST confusion

diff --git a/Makefile b/Makefile
index 1e44ea3..562cc5c 100644
--- a/Makefile
+++ b/Makefile
@@ -31,7 +31,7 @@ endif
 
 # OSLib
 ifneq ($(findstring clean,$(MAKECMDGOALS)),clean)
-  ifeq ($(BUILD),arm-unknown-riscos)
+  ifeq ($(HOST),arm-unknown-riscos)
     CFLAGS := $(CFLAGS) -I$(PREFIX)/include
     LDFLAGS := $(LDFLAGS) -lOSLib32
   endif


commitdiff 
http://git.netsurf-browser.org/librufl.git/commit/?id=7b1430ad00add849dcbaa97472b5f71a9b9d5699
commit 7b1430ad00add849dcbaa97472b5f71a9b9d5699
Author: John-Mark Bell <[email protected]>
Commit: John-Mark Bell <[email protected]>

    Detect overlong and invalid UTF-8 sequences

diff --git a/src/rufl_internal.h b/src/rufl_internal.h
index 0868571..711c5ae 100644
--- a/src/rufl_internal.h
+++ b/src/rufl_internal.h
@@ -151,22 +151,28 @@ bool rufl_character_set_test(struct rufl_character_set 
*charset,
                u = ((s[0] & 0x7) << 18) | ((s[1] & 0x3f) << 12) |             \
                                ((s[2] & 0x3f) << 6) | (s[3] & 0x3f);          \
                s += 4; l -= 4;                                                \
+               if (u < 0x10000) u = 0xfffd;                                   \
        } else if (3 <= l && ((s[0] & 0xf0) == 0xe0) &&                        \
                        ((s[1] & 0xc0) == 0x80) &&                             \
                        ((s[2] & 0xc0) == 0x80)) {                             \
                u = ((s[0] & 0xf) << 12) | ((s[1] & 0x3f) << 6) |              \
                                (s[2] & 0x3f);                                 \
                s += 3; l -= 3;                                                \
+               if (u < 0x800) u = 0xfffd;                                     \
        } else if (2 <= l && ((s[0] & 0xe0) == 0xc0) &&                        \
                        ((s[1] & 0xc0) == 0x80)) {                             \
                u = ((s[0] & 0x3f) << 6) | (s[1] & 0x3f);                      \
                s += 2; l -= 2;                                                \
+               if (u < 0x80) u = 0xfffd;                                      \
        } else if ((s[0] & 0x80) == 0) {                                       \
                u = s[0];                                                      \
                s++; l--;                                                      \
        } else {                                                               \
                u = 0xfffd;                                                    \
                s++; l--;                                                      \
+       }                                                                      \
+       if ((u >= 0xd800 && u <= 0xdfff) || u == 0xfffe || u == 0xffff) {      \
+               u = 0xfffd;                                                    \
        }
 
 #define rufl_CACHE "<Wimp$ScrapDir>.RUfl_cache"


-----------------------------------------------------------------------


-- 
RISC OS Unicode Font Library
_______________________________________________
netsurf-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to