src/Makefile.am | 32 +++ src/Makefile.sources | 2 src/dump-indic-data.cc | 43 +++++ src/dump-khmer-data.cc | 43 +++++ src/dump-myanmar-data.cc | 43 +++++ src/dump-use-data.cc | 38 ++++ src/gen-use-table.py | 2 src/hb-ot-shape-complex-indic-private.hh | 244 +++++++++++++++++++++++++++-- src/hb-ot-shape-complex-indic.cc | 214 ------------------------- src/hb-ot-shape-complex-khmer-private.hh | 124 ++++++++++++++ src/hb-ot-shape-complex-khmer.cc | 118 -------------- src/hb-ot-shape-complex-myanmar-private.hh | 171 ++++++++++++++++++++ src/hb-ot-shape-complex-myanmar.cc | 153 ------------------ src/hb-ot-shape-complex-use-private.hh | 2 src/hb-ot-shape-complex-use-table.cc | 2 src/hb-ot-shape-complex-use.cc | 4 16 files changed, 728 insertions(+), 507 deletions(-)
New commits: commit 507804479a81672ec0c089313adad9fa308d78fd Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 21:46:28 2018 -0800 [use] Add dump-use-data diff --git a/src/Makefile.am b/src/Makefile.am index f90b2ac2..1ccd8438 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -355,6 +355,7 @@ check_PROGRAMS += \ dump-indic-data \ dump-khmer-data \ dump-myanmar-data \ + dump-use-data \ $(NULL) dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc dump_indic_data_CPPFLAGS = $(HBCFLAGS) @@ -365,6 +366,9 @@ dump_khmer_data_LDADD = libharfbuzz.la $(HBLIBS) dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc dump_myanmar_data_CPPFLAGS = $(HBCFLAGS) dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS) +dump_use_data_SOURCES = dump-use-data.cc hb-ot-shape-complex-use-table.cc +dump_use_data_CPPFLAGS = $(HBCFLAGS) +dump_use_data_LDADD = libharfbuzz.la $(HBLIBS) check_PROGRAMS += test-ot-tag TESTS += test-ot-tag diff --git a/src/dump-use-data.cc b/src/dump-use-data.cc new file mode 100644 index 00000000..0e64688f --- /dev/null +++ b/src/dump-use-data.cc @@ -0,0 +1,38 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-use-private.hh" + +int +main (void) +{ + for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++) + { + unsigned int category = hb_use_get_category (u); + if (category != USE_O) + printf("U+%04X %u\n", u, category); + } +} diff --git a/src/gen-use-table.py b/src/gen-use-table.py index 5391f27e..06817255 100755 --- a/src/gen-use-table.py +++ b/src/gen-use-table.py @@ -449,7 +449,7 @@ page_bits = 12 print "}; /* Table items: %d; occupancy: %d%% */" % (offset, occupancy) print print "USE_TABLE_ELEMENT_TYPE" -print "hb_use_get_categories (hb_codepoint_t u)" +print "hb_use_get_category (hb_codepoint_t u)" print "{" print " switch (u >> %d)" % page_bits print " {" diff --git a/src/hb-ot-shape-complex-use-private.hh b/src/hb-ot-shape-complex-use-private.hh index 3e763ae3..f7ded133 100644 --- a/src/hb-ot-shape-complex-use-private.hh +++ b/src/hb-ot-shape-complex-use-private.hh @@ -92,6 +92,6 @@ enum use_category_t { }; HB_INTERNAL USE_TABLE_ELEMENT_TYPE -hb_use_get_categories (hb_codepoint_t u); +hb_use_get_category (hb_codepoint_t u); #endif /* HB_OT_SHAPE_COMPLEX_USE_PRIVATE_HH */ diff --git a/src/hb-ot-shape-complex-use-table.cc b/src/hb-ot-shape-complex-use-table.cc index a67ef061..6823392f 100644 --- a/src/hb-ot-shape-complex-use-table.cc +++ b/src/hb-ot-shape-complex-use-table.cc @@ -690,7 +690,7 @@ static const USE_TABLE_ELEMENT_TYPE use_table[] = { }; /* Table items: 5424; occupancy: 73% */ USE_TABLE_ELEMENT_TYPE -hb_use_get_categories (hb_codepoint_t u) +hb_use_get_category (hb_codepoint_t u) { switch (u >> 12) { diff --git a/src/hb-ot-shape-complex-use.cc b/src/hb-ot-shape-complex-use.cc index 62acd697..ee7653b5 100644 --- a/src/hb-ot-shape-complex-use.cc +++ b/src/hb-ot-shape-complex-use.cc @@ -262,7 +262,7 @@ setup_masks_use (const hb_ot_shape_plan_t *plan, unsigned int count = buffer->len; hb_glyph_info_t *info = buffer->info; for (unsigned int i = 0; i < count; i++) - info[i].use_category() = hb_use_get_categories (info[i].codepoint); + info[i].use_category() = hb_use_get_category (info[i].codepoint); } static void @@ -505,7 +505,7 @@ insert_dotted_circles (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_glyph_info_t dottedcircle = {0}; if (!font->get_nominal_glyph (0x25CCu, &dottedcircle.codepoint)) return; - dottedcircle.use_category() = hb_use_get_categories (0x25CC); + dottedcircle.use_category() = hb_use_get_category (0x25CC); buffer->clear_output (); commit 15ba4fbe01433c8627f9e6a60106ca77d3e1ad4c Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 21:41:51 2018 -0800 [khmer] Add dump-khmer-data diff --git a/src/Makefile.am b/src/Makefile.am index b3be138d..f90b2ac2 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -353,11 +353,15 @@ endif check_PROGRAMS += \ dump-indic-data \ + dump-khmer-data \ dump-myanmar-data \ $(NULL) dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc dump_indic_data_CPPFLAGS = $(HBCFLAGS) dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS) +dump_khmer_data_SOURCES = dump-khmer-data.cc hb-ot-shape-complex-indic-table.cc +dump_khmer_data_CPPFLAGS = $(HBCFLAGS) +dump_khmer_data_LDADD = libharfbuzz.la $(HBLIBS) dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc dump_myanmar_data_CPPFLAGS = $(HBCFLAGS) dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS) diff --git a/src/Makefile.sources b/src/Makefile.sources index 376d543a..ec60ec0a 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -109,6 +109,7 @@ HB_OT_sources = \ hb-ot-shape-complex-indic.cc \ hb-ot-shape-complex-indic-private.hh \ hb-ot-shape-complex-indic-table.cc \ + hb-ot-shape-complex-khmer-private.hh \ hb-ot-shape-complex-khmer.cc \ hb-ot-shape-complex-myanmar-private.hh \ hb-ot-shape-complex-myanmar.cc \ diff --git a/src/dump-khmer-data.cc b/src/dump-khmer-data.cc new file mode 100644 index 00000000..7dd09b2b --- /dev/null +++ b/src/dump-khmer-data.cc @@ -0,0 +1,43 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-khmer-private.hh" + +int +main (void) +{ + for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++) + { + hb_glyph_info_t info; + info.codepoint = u; + set_khmer_properties (info); + if (info.khmer_category() != INDIC_SYLLABIC_CATEGORY_OTHER || + info.khmer_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE) + printf("U+%04X %u %u\n", u, + info.khmer_category(), + info.khmer_position()); + } +} diff --git a/src/hb-ot-shape-complex-khmer-private.hh b/src/hb-ot-shape-complex-khmer-private.hh new file mode 100644 index 00000000..f90ef967 --- /dev/null +++ b/src/hb-ot-shape-complex-khmer-private.hh @@ -0,0 +1,124 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH +#define HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH + +#include "hb-private.hh" + +#include "hb-ot-shape-complex-indic-private.hh" + + +/* buffer var allocations */ +#define khmer_category() indic_category() /* khmer_category_t */ +#define khmer_position() indic_position() /* khmer_position_t */ + + +typedef indic_category_t khmer_category_t; +typedef indic_position_t khmer_position_t; + + +static inline khmer_position_t +matra_position_khmer (khmer_position_t side) +{ + switch ((int) side) + { + case POS_PRE_C: + return POS_PRE_M; + + case POS_POST_C: + case POS_ABOVE_C: + case POS_BELOW_C: + return POS_AFTER_POST; + + default: + return side; + }; +} + +static inline bool +is_consonant_or_vowel (const hb_glyph_info_t &info) +{ + return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V)); +} + +static inline bool +is_coeng (const hb_glyph_info_t &info) +{ + return is_one_of (info, FLAG (OT_Coeng)); +} + +static inline void +set_khmer_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + khmer_category_t cat = (khmer_category_t) (type & 0x7Fu); + khmer_position_t pos = (khmer_position_t) (type >> 8); + + + /* + * Re-assign category + */ + + if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) || + u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */ + { + /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier. + * https://github.com/roozbehp/unicode-data/issues/5 */ + cat = OT_M; + pos = POS_ABOVE_C; + } + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER; + else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; + + + /* + * Re-assign position. + */ + + if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) + { + pos = POS_BASE_C; + if (u == 0x179Au) + cat = OT_Ra; + } + else if (cat == OT_M) + { + pos = matra_position_khmer (pos); + } + else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol)))) + { + pos = POS_SMVD; + } + + info.khmer_category() = cat; + info.khmer_position() = pos; +} + + +#endif /* HB_OT_SHAPE_COMPLEX_KHMER_PRIVATE_HH */ diff --git a/src/hb-ot-shape-complex-khmer.cc b/src/hb-ot-shape-complex-khmer.cc index 0e2ca88c..304879d8 100644 --- a/src/hb-ot-shape-complex-khmer.cc +++ b/src/hb-ot-shape-complex-khmer.cc @@ -24,105 +24,9 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-ot-shape-complex-indic-private.hh" +#include "hb-ot-shape-complex-khmer-private.hh" #include "hb-ot-layout-private.hh" -/* buffer var allocations */ -#define khmer_category() indic_category() /* khmer_category_t */ -#define khmer_position() indic_position() /* khmer_position_t */ - - -/* - * Khmer shaper. - */ - -typedef indic_category_t khmer_category_t; -typedef indic_position_t khmer_position_t; - - -static inline khmer_position_t -matra_position_khmer (khmer_position_t side) -{ - switch ((int) side) - { - case POS_PRE_C: - return POS_PRE_M; - - case POS_POST_C: - case POS_ABOVE_C: - case POS_BELOW_C: - return POS_AFTER_POST; - - default: - return side; - }; -} - -static inline bool -is_consonant_or_vowel (const hb_glyph_info_t &info) -{ - return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V)); -} - -static inline bool -is_coeng (const hb_glyph_info_t &info) -{ - return is_one_of (info, FLAG (OT_Coeng)); -} - -static inline void -set_khmer_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - khmer_category_t cat = (khmer_category_t) (type & 0x7Fu); - khmer_position_t pos = (khmer_position_t) (type >> 8); - - - /* - * Re-assign category - */ - - if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */ - else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) || - u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */ - { - /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier. - * https://github.com/roozbehp/unicode-data/issues/5 */ - cat = OT_M; - pos = POS_ABOVE_C; - } - else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER; - else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; - - - /* - * Re-assign position. - */ - - if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) - { - pos = POS_BASE_C; - if (u == 0x179Au) - cat = OT_Ra; - } - else if (cat == OT_M) - { - pos = matra_position_khmer (pos); - } - else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol)))) - { - pos = POS_SMVD; - } - - info.khmer_category() = cat; - info.khmer_position() = pos; -} - -/* - * Things above this line should ideally be moved to the Indic table itself. - */ - /* * Khmer shaper. commit effddd03bd6fb0aac14b46a16b281f3749e44780 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 21:38:15 2018 -0800 [myanmar] Add dump-myanmar-data diff --git a/src/Makefile.am b/src/Makefile.am index 571ec113..b3be138d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -353,10 +353,14 @@ endif check_PROGRAMS += \ dump-indic-data \ + dump-myanmar-data \ $(NULL) dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc dump_indic_data_CPPFLAGS = $(HBCFLAGS) dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS) +dump_myanmar_data_SOURCES = dump-myanmar-data.cc hb-ot-shape-complex-indic-table.cc +dump_myanmar_data_CPPFLAGS = $(HBCFLAGS) +dump_myanmar_data_LDADD = libharfbuzz.la $(HBLIBS) check_PROGRAMS += test-ot-tag TESTS += test-ot-tag diff --git a/src/Makefile.sources b/src/Makefile.sources index f412a65d..376d543a 100644 --- a/src/Makefile.sources +++ b/src/Makefile.sources @@ -110,6 +110,7 @@ HB_OT_sources = \ hb-ot-shape-complex-indic-private.hh \ hb-ot-shape-complex-indic-table.cc \ hb-ot-shape-complex-khmer.cc \ + hb-ot-shape-complex-myanmar-private.hh \ hb-ot-shape-complex-myanmar.cc \ hb-ot-shape-complex-thai.cc \ hb-ot-shape-complex-tibetan.cc \ diff --git a/src/dump-myanmar-data.cc b/src/dump-myanmar-data.cc new file mode 100644 index 00000000..2df9cd98 --- /dev/null +++ b/src/dump-myanmar-data.cc @@ -0,0 +1,43 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-myanmar-private.hh" + +int +main (void) +{ + for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++) + { + hb_glyph_info_t info; + info.codepoint = u; + set_myanmar_properties (info); + if (info.myanmar_category() != INDIC_SYLLABIC_CATEGORY_OTHER || + info.myanmar_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE) + printf("U+%04X %u %u\n", u, + info.myanmar_category(), + info.myanmar_position()); + } +} diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 34a9653d..867b9362 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -34,6 +34,11 @@ #include "hb-ot-shape-private.hh" /* XXX Remove */ +/* buffer var allocations */ +#define indic_category() complex_var_u8_0() /* indic_category_t */ +#define indic_position() complex_var_u8_1() /* indic_position_t */ + + #define INDIC_TABLE_ELEMENT_TYPE uint16_t /* Cateories used in the OpenType spec: @@ -186,9 +191,6 @@ enum indic_matra_category_t { HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE hb_indic_get_categories (hb_codepoint_t u); -/* buffer var allocations */ -#define indic_category() complex_var_u8_0() /* indic_category_t */ -#define indic_position() complex_var_u8_1() /* indic_position_t */ static inline bool is_one_of (const hb_glyph_info_t &info, unsigned int flags) diff --git a/src/hb-ot-shape-complex-myanmar-private.hh b/src/hb-ot-shape-complex-myanmar-private.hh new file mode 100644 index 00000000..04f81bd1 --- /dev/null +++ b/src/hb-ot-shape-complex-myanmar-private.hh @@ -0,0 +1,171 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#ifndef HB_OT_SHAPE_COMPLEX_MYANMAR_PRIVATE_HH +#define HB_OT_SHAPE_COMPLEX_MYANMAR_PRIVATE_HH + +#include "hb-private.hh" + +#include "hb-ot-shape-complex-indic-private.hh" + + +/* buffer var allocations */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ + + +/* Note: This enum is duplicated in the -machine.rl source file. + * Not sure how to avoid duplication. */ +enum myanmar_category_t { + OT_As = 18, /* Asat */ + OT_D0 = 20, /* Digit zero */ + OT_DB = OT_N, /* Dot below */ + OT_GB = OT_PLACEHOLDER, + OT_MH = 21, /* Various consonant medial types */ + OT_MR = 22, /* Various consonant medial types */ + OT_MW = 23, /* Various consonant medial types */ + OT_MY = 24, /* Various consonant medial types */ + OT_PT = 25, /* Pwo and other tones */ + OT_VAbv = 26, + OT_VBlw = 27, + OT_VPre = 28, + OT_VPst = 29, + OT_VS = 30, /* Variation selectors */ + OT_P = 31, /* Punctuation */ + OT_D = 32, /* Digits except zero */ +}; + + +static inline void +set_myanmar_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + indic_category_t cat = (indic_category_t) (type & 0x7Fu); + indic_position_t pos = (indic_position_t) (type >> 8); + + /* Myanmar + * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze + */ + if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu))) + cat = (indic_category_t) OT_VS; + + switch (u) + { + case 0x104Eu: + cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ + break; + + case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: + case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: + case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: + case 0x25FEu: + cat = (indic_category_t) OT_GB; + break; + + case 0x1004u: case 0x101Bu: case 0x105Au: + cat = (indic_category_t) OT_Ra; + break; + + case 0x1032u: case 0x1036u: + cat = (indic_category_t) OT_A; + break; + + case 0x1039u: + cat = (indic_category_t) OT_H; + break; + + case 0x103Au: + cat = (indic_category_t) OT_As; + break; + + case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: + case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: + case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: + case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: + case 0x1097u: case 0x1098u: case 0x1099u: + cat = (indic_category_t) OT_D; + break; + + case 0x1040u: + cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ + break; + + case 0x103Eu: case 0x1060u: + cat = (indic_category_t) OT_MH; + break; + + case 0x103Cu: + cat = (indic_category_t) OT_MR; + break; + + case 0x103Du: case 0x1082u: + cat = (indic_category_t) OT_MW; + break; + + case 0x103Bu: case 0x105Eu: case 0x105Fu: + cat = (indic_category_t) OT_MY; + break; + + case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: + case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: + cat = (indic_category_t) OT_PT; + break; + + case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: + case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: + case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: + cat = (indic_category_t) OT_SM; + break; + + case 0x104Au: case 0x104Bu: + cat = (indic_category_t) OT_P; + break; + + case 0xAA74u: case 0xAA75u: case 0xAA76u: + /* https://github.com/roozbehp/unicode-data/issues/3 */ + cat = (indic_category_t) OT_C; + break; + } + + if (cat == OT_M) + { + switch ((int) pos) + { + case POS_PRE_C: cat = (indic_category_t) OT_VPre; + pos = POS_PRE_M; break; + case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break; + case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break; + case POS_POST_C: cat = (indic_category_t) OT_VPst; break; + } + } + + info.myanmar_category() = (myanmar_category_t) cat; + info.myanmar_position() = pos; +} + + +#endif /* HB_OT_SHAPE_COMPLEX_MYANMAR_PRIVATE_HH */ diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc index dacb1abe..3c57bc1f 100644 --- a/src/hb-ot-shape-complex-myanmar.cc +++ b/src/hb-ot-shape-complex-myanmar.cc @@ -24,11 +24,7 @@ * Google Author(s): Behdad Esfahbod */ -#include "hb-ot-shape-complex-indic-private.hh" - -/* buffer var allocations */ -#define myanmar_category() indic_category() /* myanmar_category_t */ -#define myanmar_position() indic_position() /* myanmar_position_t */ +#include "hb-ot-shape-complex-myanmar-private.hh" /* @@ -127,138 +123,6 @@ enum syllable_type_t { #include "hb-ot-shape-complex-myanmar-machine.hh" -/* Note: This enum is duplicated in the -machine.rl source file. - * Not sure how to avoid duplication. */ -enum myanmar_category_t { - OT_As = 18, /* Asat */ - OT_D0 = 20, /* Digit zero */ - OT_DB = OT_N, /* Dot below */ - OT_GB = OT_PLACEHOLDER, - OT_MH = 21, /* Various consonant medial types */ - OT_MR = 22, /* Various consonant medial types */ - OT_MW = 23, /* Various consonant medial types */ - OT_MY = 24, /* Various consonant medial types */ - OT_PT = 25, /* Pwo and other tones */ - OT_VAbv = 26, - OT_VBlw = 27, - OT_VPre = 28, - OT_VPst = 29, - OT_VS = 30, /* Variation selectors */ - OT_P = 31, /* Punctuation */ - OT_D = 32, /* Digits except zero */ -}; - - -static inline void -set_myanmar_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - indic_category_t cat = (indic_category_t) (type & 0x7Fu); - indic_position_t pos = (indic_position_t) (type >> 8); - - /* Myanmar - * http://www.microsoft.com/typography/OpenTypeDev/myanmar/intro.htm#analyze - */ - if (unlikely (hb_in_range<hb_codepoint_t> (u, 0xFE00u, 0xFE0Fu))) - cat = (indic_category_t) OT_VS; - - switch (u) - { - case 0x104Eu: - cat = (indic_category_t) OT_C; /* The spec says C, IndicSyllableCategory doesn't have. */ - break; - - case 0x002Du: case 0x00A0u: case 0x00D7u: case 0x2012u: - case 0x2013u: case 0x2014u: case 0x2015u: case 0x2022u: - case 0x25CCu: case 0x25FBu: case 0x25FCu: case 0x25FDu: - case 0x25FEu: - cat = (indic_category_t) OT_GB; - break; - - case 0x1004u: case 0x101Bu: case 0x105Au: - cat = (indic_category_t) OT_Ra; - break; - - case 0x1032u: case 0x1036u: - cat = (indic_category_t) OT_A; - break; - - case 0x1039u: - cat = (indic_category_t) OT_H; - break; - - case 0x103Au: - cat = (indic_category_t) OT_As; - break; - - case 0x1041u: case 0x1042u: case 0x1043u: case 0x1044u: - case 0x1045u: case 0x1046u: case 0x1047u: case 0x1048u: - case 0x1049u: case 0x1090u: case 0x1091u: case 0x1092u: - case 0x1093u: case 0x1094u: case 0x1095u: case 0x1096u: - case 0x1097u: case 0x1098u: case 0x1099u: - cat = (indic_category_t) OT_D; - break; - - case 0x1040u: - cat = (indic_category_t) OT_D; /* XXX The spec says D0, but Uniscribe doesn't seem to do. */ - break; - - case 0x103Eu: case 0x1060u: - cat = (indic_category_t) OT_MH; - break; - - case 0x103Cu: - cat = (indic_category_t) OT_MR; - break; - - case 0x103Du: case 0x1082u: - cat = (indic_category_t) OT_MW; - break; - - case 0x103Bu: case 0x105Eu: case 0x105Fu: - cat = (indic_category_t) OT_MY; - break; - - case 0x1063u: case 0x1064u: case 0x1069u: case 0x106Au: - case 0x106Bu: case 0x106Cu: case 0x106Du: case 0xAA7Bu: - cat = (indic_category_t) OT_PT; - break; - - case 0x1038u: case 0x1087u: case 0x1088u: case 0x1089u: - case 0x108Au: case 0x108Bu: case 0x108Cu: case 0x108Du: - case 0x108Fu: case 0x109Au: case 0x109Bu: case 0x109Cu: - cat = (indic_category_t) OT_SM; - break; - - case 0x104Au: case 0x104Bu: - cat = (indic_category_t) OT_P; - break; - - case 0xAA74u: case 0xAA75u: case 0xAA76u: - /* https://github.com/roozbehp/unicode-data/issues/3 */ - cat = (indic_category_t) OT_C; - break; - } - - if (cat == OT_M) - { - switch ((int) pos) - { - case POS_PRE_C: cat = (indic_category_t) OT_VPre; - pos = POS_PRE_M; break; - case POS_ABOVE_C: cat = (indic_category_t) OT_VAbv; break; - case POS_BELOW_C: cat = (indic_category_t) OT_VBlw; break; - case POS_POST_C: cat = (indic_category_t) OT_VPst; break; - } - } - - info.myanmar_category() = (myanmar_category_t) cat; - info.myanmar_position() = pos; -} - - - static void setup_masks_myanmar (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer, commit c0e45b60a001a45c8ed1f12af8f2f222161eca6a Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 21:23:38 2018 -0800 [indic] Rename print-indic-table to dump-indic-data Makes more sense given what this prints, err, dumps. diff --git a/src/Makefile.am b/src/Makefile.am index 67686d8b..571ec113 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -352,11 +352,11 @@ dist_check_SCRIPTS += \ endif check_PROGRAMS += \ - print-indic-table \ + dump-indic-data \ $(NULL) -print_indic_table_SOURCES = print-indic-table.cc hb-ot-shape-complex-indic-table.cc -print_indic_table_CPPFLAGS = $(HBCFLAGS) -DMAIN -print_indic_table_LDADD = libharfbuzz.la $(HBLIBS) +dump_indic_data_SOURCES = dump-indic-data.cc hb-ot-shape-complex-indic-table.cc +dump_indic_data_CPPFLAGS = $(HBCFLAGS) +dump_indic_data_LDADD = libharfbuzz.la $(HBLIBS) check_PROGRAMS += test-ot-tag TESTS += test-ot-tag diff --git a/src/print-indic-table.cc b/src/dump-indic-data.cc similarity index 100% rename from src/print-indic-table.cc rename to src/dump-indic-data.cc commit 9b878bd2165236b067d59410673cf5cc54968fa7 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 21:22:47 2018 -0800 [indic] Explicitly number indic_position_t So it's easier to cross-reference from output of print-indic-table. diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 8c152c92..34a9653d 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -77,29 +77,29 @@ enum indic_category_t { /* Visual positions in a syllable from left to right. */ enum indic_position_t { - POS_START, + POS_START = 0, - POS_RA_TO_BECOME_REPH, - POS_PRE_M, - POS_PRE_C, + POS_RA_TO_BECOME_REPH = 1, + POS_PRE_M = 2, + POS_PRE_C = 3, - POS_BASE_C, - POS_AFTER_MAIN, + POS_BASE_C = 4, + POS_AFTER_MAIN = 5, - POS_ABOVE_C, + POS_ABOVE_C = 6, - POS_BEFORE_SUB, - POS_BELOW_C, - POS_AFTER_SUB, + POS_BEFORE_SUB = 7, + POS_BELOW_C = 8, + POS_AFTER_SUB = 9, - POS_BEFORE_POST, - POS_POST_C, - POS_AFTER_POST, + POS_BEFORE_POST = 10, + POS_POST_C = 11, + POS_AFTER_POST = 12, - POS_FINAL_C, - POS_SMVD, + POS_FINAL_C = 13, + POS_SMVD = 14, - POS_END + POS_END = 15 }; /* Categories used in IndicSyllabicCategory.txt from UCD. */ commit 8634846a0a1049c42ea84ca34402957c981b4c73 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 21:18:00 2018 -0800 More tweaking linker and linker flags diff --git a/src/Makefile.am b/src/Makefile.am index 7616ac6e..67686d8b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -178,7 +178,7 @@ FUZZING_CPPFLAGS = \ -DHB_BUFFER_MAX_OPS_DEFAULT=1024 \ $(NULL) EXTRA_LTLIBRARIES = libharfbuzz-fuzzing.la -libharfbuzz_fuzzing_la_LINK = $(libharfbuzz_la_LINK) +libharfbuzz_fuzzing_la_LINK = $(chosen_linker) $(libharfbuzz_fuzzing_la_LDFLAGS) libharfbuzz_fuzzing_la_SOURCES = $(libharfbuzz_la_SOURCES) libharfbuzz_fuzzing_la_CPPFLAGS = $(HBCFLAGS) $(HBLIBCXXFLAGS) $(FUZZING_CPPFLAGS) libharfbuzz_fuzzing_la_LDFLAGS = $(AM_LDFLAGS) @@ -207,11 +207,11 @@ EXTRA_DIST += harfbuzz-icu.pc.in if HAVE_GOBJECT lib_LTLIBRARIES += libharfbuzz-gobject.la -libharfbuzz_gobject_la_LINK = $(libharfbuzz_la_LINK) +libharfbuzz_gobject_la_LINK = $(chosen_linker) $(libharfbuzz_gobject_la_LDFLAGS) libharfbuzz_gobject_la_SOURCES = $(HB_GOBJECT_sources) nodist_libharfbuzz_gobject_la_SOURCES = $(HB_GOBJECT_ENUM_sources) libharfbuzz_gobject_la_CPPFLAGS = $(HBCFLAGS) $(HBNOLIBCXXFLAGS) $(GOBJECT_CFLAGS) -libharfbuzz_gobject_la_LDFLAGS = $(libharfbuzz_la_LDFLAGS) +libharfbuzz_gobject_la_LDFLAGS = $(base_link_flags) libharfbuzz_gobject_la_LIBADD = $(GOBJECT_LIBS) libharfbuzz.la pkginclude_HEADERS += $(HB_GOBJECT_headers) nodist_pkginclude_HEADERS += $(HB_GOBJECT_ENUM_headers) commit d830d3d2448b2248dff642c701b442e1260e2d85 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 19:01:03 2018 -0800 [indic] Share some copy/pasta code diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 4d700f74..8c152c92 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -190,6 +190,31 @@ hb_indic_get_categories (hb_codepoint_t u); #define indic_category() complex_var_u8_0() /* indic_category_t */ #define indic_position() complex_var_u8_1() /* indic_position_t */ +static inline bool +is_one_of (const hb_glyph_info_t &info, unsigned int flags) +{ + /* If it ligated, all bets are off. */ + if (_hb_glyph_info_ligated (&info)) return false; + return !!(FLAG_UNSAFE (info.indic_category()) & flags); +} + +static inline bool +is_joiner (const hb_glyph_info_t &info) +{ + return is_one_of (info, JOINER_FLAGS); +} + +static inline bool +is_consonant (const hb_glyph_info_t &info) +{ + return is_one_of (info, CONSONANT_FLAGS); +} + +static inline bool +is_halant (const hb_glyph_info_t &info) +{ + return is_one_of (info, FLAG (OT_H)); +} #define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) @@ -245,7 +270,7 @@ hb_indic_get_categories (hb_codepoint_t u); ) static inline indic_position_t -matra_position (hb_codepoint_t u, indic_position_t side) +matra_position_indic (hb_codepoint_t u, indic_position_t side) { switch ((int) side) { @@ -355,7 +380,7 @@ set_indic_properties (hb_glyph_info_t &info) } else if (cat == OT_M) { - pos = matra_position (u, pos); + pos = matra_position_indic (u, pos); } else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) /* | FLAG (OT_VD) */ | FLAG (OT_A) | FLAG (OT_Symbol)))) { diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 5594f8bd..32ad86a5 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -33,33 +33,6 @@ */ -static inline bool -is_one_of (const hb_glyph_info_t &info, unsigned int flags) -{ - /* If it ligated, all bets are off. */ - if (_hb_glyph_info_ligated (&info)) return false; - return !!(FLAG_UNSAFE (info.indic_category()) & flags); -} - -static inline bool -is_joiner (const hb_glyph_info_t &info) -{ - return is_one_of (info, JOINER_FLAGS); -} - -static inline bool -is_consonant (const hb_glyph_info_t &info) -{ - return is_one_of (info, CONSONANT_FLAGS); -} - -static inline bool -is_halant (const hb_glyph_info_t &info) -{ - return is_one_of (info, FLAG (OT_H)); -} - - /* * Indic configurations. Note that we do not want to keep every single script-specific * behavior in these tables necessarily. This should mainly be used for per-script diff --git a/src/hb-ot-shape-complex-khmer.cc b/src/hb-ot-shape-complex-khmer.cc index 2db4f609..0e2ca88c 100644 --- a/src/hb-ot-shape-complex-khmer.cc +++ b/src/hb-ot-shape-complex-khmer.cc @@ -28,8 +28,8 @@ #include "hb-ot-layout-private.hh" /* buffer var allocations */ -#define khmer_category() complex_var_u8_0() /* khmer_category_t */ -#define khmer_position() complex_var_u8_1() /* khmer_position_t */ +#define khmer_category() indic_category() /* khmer_category_t */ +#define khmer_position() indic_position() /* khmer_position_t */ /* @@ -41,7 +41,7 @@ typedef indic_position_t khmer_position_t; static inline khmer_position_t -matra_position (khmer_position_t side) +matra_position_khmer (khmer_position_t side) { switch ((int) side) { @@ -59,21 +59,7 @@ matra_position (khmer_position_t side) } static inline bool -is_one_of (const hb_glyph_info_t &info, unsigned int flags) -{ - /* If it ligated, all bets are off. */ - if (_hb_glyph_info_ligated (&info)) return false; - return !!(FLAG_UNSAFE (info.khmer_category()) & flags); -} - -static inline bool -is_joiner (const hb_glyph_info_t &info) -{ - return is_one_of (info, JOINER_FLAGS); -} - -static inline bool -is_consonant (const hb_glyph_info_t &info) +is_consonant_or_vowel (const hb_glyph_info_t &info) { return is_one_of (info, CONSONANT_FLAGS | FLAG (OT_V)); } @@ -122,7 +108,7 @@ set_khmer_properties (hb_glyph_info_t &info) } else if (cat == OT_M) { - pos = matra_position (pos); + pos = matra_position_khmer (pos); } else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol)))) { @@ -404,7 +390,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, /* Mark all subsequent consonants as below. */ for (unsigned int i = base + 1; i < end; i++) - if (is_consonant (info[i])) + if (is_consonant_or_vowel (info[i])) info[i].khmer_position() = POS_BELOW_C; /* Mark final consonants. A final consonant is one appearing after a matra, @@ -412,7 +398,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, for (unsigned int i = base + 1; i < end; i++) if (info[i].khmer_category() == OT_M) { for (unsigned int j = i + 1; j < end; j++) - if (is_consonant (info[j])) { + if (is_consonant_or_vowel (info[j])) { info[j].khmer_position() = POS_FINAL_C; break; } @@ -455,7 +441,7 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan, { unsigned int last = base; for (unsigned int i = base + 1; i < end; i++) - if (is_consonant (info[i])) + if (is_consonant_or_vowel (info[i])) { for (unsigned int j = last + 1; j < i; j++) if (info[j].khmer_position() < POS_SMVD) diff --git a/src/hb-ot-shape-complex-myanmar.cc b/src/hb-ot-shape-complex-myanmar.cc index 31bb976b..dacb1abe 100644 --- a/src/hb-ot-shape-complex-myanmar.cc +++ b/src/hb-ot-shape-complex-myanmar.cc @@ -27,8 +27,8 @@ #include "hb-ot-shape-complex-indic-private.hh" /* buffer var allocations */ -#define myanmar_category() complex_var_u8_0() /* myanmar_category_t */ -#define myanmar_position() complex_var_u8_1() /* myanmar_position_t */ +#define myanmar_category() indic_category() /* myanmar_category_t */ +#define myanmar_position() indic_position() /* myanmar_position_t */ /* @@ -149,21 +149,6 @@ enum myanmar_category_t { }; -static inline bool -is_one_of (const hb_glyph_info_t &info, unsigned int flags) -{ - /* If it ligated, all bets are off. */ - if (_hb_glyph_info_ligated (&info)) return false; - return !!(FLAG_UNSAFE (info.myanmar_category()) & flags); -} - -static inline bool -is_consonant (const hb_glyph_info_t &info) -{ - return is_one_of (info, CONSONANT_FLAGS); -} - - static inline void set_myanmar_properties (hb_glyph_info_t &info) { commit 397ed53e55b9450742867a43d164b498ec735f50 Author: Behdad Esfahbod <beh...@behdad.org> Date: Tue Feb 13 18:54:26 2018 -0800 [indic] Add print-indic-table diff --git a/src/Makefile.am b/src/Makefile.am index ee8e1d56..7616ac6e 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -9,6 +9,8 @@ CLEANFILES = DISTCLEANFILES = MAINTAINERCLEANFILES = DISTCHECK_CONFIGURE_FLAGS = --enable-introspection +TESTS = +check_PROGRAMS = # The following warning options are useful for debugging: -Wpadded #AM_CXXFLAGS = @@ -341,6 +343,7 @@ dist_check_SCRIPTS = \ check-static-inits.sh \ check-symbols.sh \ $(NULL) +TESTS += $(dist_check_SCRIPTS) if !WITH_LIBSTDCXX dist_check_SCRIPTS += \ @@ -348,14 +351,19 @@ dist_check_SCRIPTS += \ $(NULL) endif -check_PROGRAMS = \ - test-ot-tag \ +check_PROGRAMS += \ + print-indic-table \ $(NULL) +print_indic_table_SOURCES = print-indic-table.cc hb-ot-shape-complex-indic-table.cc +print_indic_table_CPPFLAGS = $(HBCFLAGS) -DMAIN +print_indic_table_LDADD = libharfbuzz.la $(HBLIBS) + +check_PROGRAMS += test-ot-tag +TESTS += test-ot-tag test_ot_tag_SOURCES = hb-ot-tag.cc test_ot_tag_CPPFLAGS = $(HBCFLAGS) -DMAIN test_ot_tag_LDADD = libharfbuzz.la $(HBLIBS) -TESTS = $(dist_check_SCRIPTS) $(check_PROGRAMS) TESTS_ENVIRONMENT = \ srcdir="$(srcdir)" \ MAKE="$(MAKE) $(AM_MAKEFLAGS)" \ diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index a64e9d03..4d700f74 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -186,4 +186,189 @@ enum indic_matra_category_t { HB_INTERNAL INDIC_TABLE_ELEMENT_TYPE hb_indic_get_categories (hb_codepoint_t u); +/* buffer var allocations */ +#define indic_category() complex_var_u8_0() /* indic_category_t */ +#define indic_position() complex_var_u8_1() /* indic_position_t */ + + +#define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) + +#define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) +#define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) +#define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) +#define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) +#define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) +#define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) +#define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) +#define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) +#define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) +#define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) + + +#define MATRA_POS_LEFT(u) POS_PRE_M +#define MATRA_POS_RIGHT(u) ( \ + IS_DEVA(u) ? POS_AFTER_SUB : \ + IS_BENG(u) ? POS_AFTER_POST : \ + IS_GURU(u) ? POS_AFTER_POST : \ + IS_GUJR(u) ? POS_AFTER_POST : \ + IS_ORYA(u) ? POS_AFTER_POST : \ + IS_TAML(u) ? POS_AFTER_POST : \ + IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ + IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ + IS_MLYM(u) ? POS_AFTER_POST : \ + IS_SINH(u) ? POS_AFTER_SUB : \ + /*default*/ POS_AFTER_SUB \ + ) +#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ + IS_DEVA(u) ? POS_AFTER_SUB : \ + IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ + IS_GUJR(u) ? POS_AFTER_SUB : \ + IS_ORYA(u) ? POS_AFTER_MAIN : \ + IS_TAML(u) ? POS_AFTER_SUB : \ + IS_TELU(u) ? POS_BEFORE_SUB : \ + IS_KNDA(u) ? POS_BEFORE_SUB : \ + IS_SINH(u) ? POS_AFTER_SUB : \ + /*default*/ POS_AFTER_SUB \ + ) +#define MATRA_POS_BOTTOM(u) ( \ + IS_DEVA(u) ? POS_AFTER_SUB : \ + IS_BENG(u) ? POS_AFTER_SUB : \ + IS_GURU(u) ? POS_AFTER_POST : \ + IS_GUJR(u) ? POS_AFTER_POST : \ + IS_ORYA(u) ? POS_AFTER_SUB : \ + IS_TAML(u) ? POS_AFTER_POST : \ + IS_TELU(u) ? POS_BEFORE_SUB : \ + IS_KNDA(u) ? POS_BEFORE_SUB : \ + IS_MLYM(u) ? POS_AFTER_POST : \ + IS_SINH(u) ? POS_AFTER_SUB : \ + /*default*/ POS_AFTER_SUB \ + ) + +static inline indic_position_t +matra_position (hb_codepoint_t u, indic_position_t side) +{ + switch ((int) side) + { + case POS_PRE_C: return MATRA_POS_LEFT (u); + case POS_POST_C: return MATRA_POS_RIGHT (u); + case POS_ABOVE_C: return MATRA_POS_TOP (u); + case POS_BELOW_C: return MATRA_POS_BOTTOM (u); + }; + return side; +} + +/* XXX + * This is a hack for now. We should move this data into the main Indic table. + * Or completely remove it and just check in the tables. + */ +static const hb_codepoint_t ra_chars[] = { + 0x0930u, /* Devanagari */ + 0x09B0u, /* Bengali */ + 0x09F0u, /* Bengali */ + 0x0A30u, /* Gurmukhi */ /* No Reph */ + 0x0AB0u, /* Gujarati */ + 0x0B30u, /* Oriya */ + 0x0BB0u, /* Tamil */ /* No Reph */ + 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ + 0x0CB0u, /* Kannada */ + 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ + + 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ +}; + +static inline bool +is_ra (hb_codepoint_t u) +{ + for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) + if (u == ra_chars[i]) + return true; + return false; +} + +static inline void +set_indic_properties (hb_glyph_info_t &info) +{ + hb_codepoint_t u = info.codepoint; + unsigned int type = hb_indic_get_categories (u); + indic_category_t cat = (indic_category_t) (type & 0x7Fu); + indic_position_t pos = (indic_position_t) (type >> 8); + + + /* + * Re-assign category + */ + + /* The following act more like the Bindus. */ + if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u))) + cat = OT_SM; + /* The following act like consonants. */ + else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u, + 0x1CF5u, 0x1CF6u))) + cat = OT_C; + /* TODO: The following should only be allowed after a Visarga. + * For now, just treat them like regular tone marks. */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u))) + cat = OT_A; + /* TODO: The following should only be allowed after some of + * the nasalization marks, maybe only for U+1CE9..U+1CF1. + * For now, just treat them like tone marks. */ + else if (unlikely (u == 0x1CEDu)) + cat = OT_A; + /* The following take marks in standalone clusters, similar to Avagraha. */ + else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u, + 0x1CE9u, 0x1CECu, + 0x1CEEu, 0x1CF1u))) + { + cat = OT_Symbol; + static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); + } + else if (unlikely (u == 0x0A51u)) + { + /* https://github.com/harfbuzz/harfbuzz/issues/524 */ + cat = OT_M; + pos = POS_BELOW_C; + } + + /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, + * so the Indic shaper needs to know their categories. */ + else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; + else if (unlikely (u == 0x1133cu)) cat = OT_N; + + else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ + + else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ + else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ + else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) + cat = OT_PLACEHOLDER; + else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; + + + /* + * Re-assign position. + */ + + if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) + { + pos = POS_BASE_C; + if (is_ra (u)) + cat = OT_Ra; + } + else if (cat == OT_M) + { + pos = matra_position (u, pos); + } + else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) /* | FLAG (OT_VD) */ | FLAG (OT_A) | FLAG (OT_Symbol)))) + { + pos = POS_SMVD; + } + + if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ + + + + info.indic_category() = cat; + info.indic_position() = pos; +} + + #endif /* HB_OT_SHAPE_COMPLEX_INDIC_PRIVATE_HH */ diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 9d150e0d..5594f8bd 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -27,110 +27,12 @@ #include "hb-ot-shape-complex-indic-private.hh" #include "hb-ot-layout-private.hh" -/* buffer var allocations */ -#define indic_category() complex_var_u8_0() /* indic_category_t */ -#define indic_position() complex_var_u8_1() /* indic_position_t */ - /* * Indic shaper. */ -#define IN_HALF_BLOCK(u, Base) (((u) & ~0x7Fu) == (Base)) - -#define IS_DEVA(u) (IN_HALF_BLOCK (u, 0x0900u)) -#define IS_BENG(u) (IN_HALF_BLOCK (u, 0x0980u)) -#define IS_GURU(u) (IN_HALF_BLOCK (u, 0x0A00u)) -#define IS_GUJR(u) (IN_HALF_BLOCK (u, 0x0A80u)) -#define IS_ORYA(u) (IN_HALF_BLOCK (u, 0x0B00u)) -#define IS_TAML(u) (IN_HALF_BLOCK (u, 0x0B80u)) -#define IS_TELU(u) (IN_HALF_BLOCK (u, 0x0C00u)) -#define IS_KNDA(u) (IN_HALF_BLOCK (u, 0x0C80u)) -#define IS_MLYM(u) (IN_HALF_BLOCK (u, 0x0D00u)) -#define IS_SINH(u) (IN_HALF_BLOCK (u, 0x0D80u)) - - -#define MATRA_POS_LEFT(u) POS_PRE_M -#define MATRA_POS_RIGHT(u) ( \ - IS_DEVA(u) ? POS_AFTER_SUB : \ - IS_BENG(u) ? POS_AFTER_POST : \ - IS_GURU(u) ? POS_AFTER_POST : \ - IS_GUJR(u) ? POS_AFTER_POST : \ - IS_ORYA(u) ? POS_AFTER_POST : \ - IS_TAML(u) ? POS_AFTER_POST : \ - IS_TELU(u) ? (u <= 0x0C42u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ - IS_KNDA(u) ? (u < 0x0CC3u || u > 0xCD6u ? POS_BEFORE_SUB : POS_AFTER_SUB) : \ - IS_MLYM(u) ? POS_AFTER_POST : \ - IS_SINH(u) ? POS_AFTER_SUB : \ - /*default*/ POS_AFTER_SUB \ - ) -#define MATRA_POS_TOP(u) ( /* BENG and MLYM don't have top matras. */ \ - IS_DEVA(u) ? POS_AFTER_SUB : \ - IS_GURU(u) ? POS_AFTER_POST : /* Deviate from spec */ \ - IS_GUJR(u) ? POS_AFTER_SUB : \ - IS_ORYA(u) ? POS_AFTER_MAIN : \ - IS_TAML(u) ? POS_AFTER_SUB : \ - IS_TELU(u) ? POS_BEFORE_SUB : \ - IS_KNDA(u) ? POS_BEFORE_SUB : \ - IS_SINH(u) ? POS_AFTER_SUB : \ - /*default*/ POS_AFTER_SUB \ - ) -#define MATRA_POS_BOTTOM(u) ( \ - IS_DEVA(u) ? POS_AFTER_SUB : \ - IS_BENG(u) ? POS_AFTER_SUB : \ - IS_GURU(u) ? POS_AFTER_POST : \ - IS_GUJR(u) ? POS_AFTER_POST : \ - IS_ORYA(u) ? POS_AFTER_SUB : \ - IS_TAML(u) ? POS_AFTER_POST : \ - IS_TELU(u) ? POS_BEFORE_SUB : \ - IS_KNDA(u) ? POS_BEFORE_SUB : \ - IS_MLYM(u) ? POS_AFTER_POST : \ - IS_SINH(u) ? POS_AFTER_SUB : \ - /*default*/ POS_AFTER_SUB \ - ) - -static inline indic_position_t -matra_position (hb_codepoint_t u, indic_position_t side) -{ - switch ((int) side) - { - case POS_PRE_C: return MATRA_POS_LEFT (u); - case POS_POST_C: return MATRA_POS_RIGHT (u); - case POS_ABOVE_C: return MATRA_POS_TOP (u); - case POS_BELOW_C: return MATRA_POS_BOTTOM (u); - }; - return side; -} - -/* XXX - * This is a hack for now. We should move this data into the main Indic table. - * Or completely remove it and just check in the tables. - */ -static const hb_codepoint_t ra_chars[] = { - 0x0930u, /* Devanagari */ - 0x09B0u, /* Bengali */ - 0x09F0u, /* Bengali */ - 0x0A30u, /* Gurmukhi */ /* No Reph */ - 0x0AB0u, /* Gujarati */ - 0x0B30u, /* Oriya */ - 0x0BB0u, /* Tamil */ /* No Reph */ - 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */ - 0x0CB0u, /* Kannada */ - 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */ - - 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */ -}; - -static inline bool -is_ra (hb_codepoint_t u) -{ - for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++) - if (u == ra_chars[i]) - return true; - return false; -} - static inline bool is_one_of (const hb_glyph_info_t &info, unsigned int flags) { @@ -157,95 +59,6 @@ is_halant (const hb_glyph_info_t &info) return is_one_of (info, FLAG (OT_H)); } -static inline void -set_indic_properties (hb_glyph_info_t &info) -{ - hb_codepoint_t u = info.codepoint; - unsigned int type = hb_indic_get_categories (u); - indic_category_t cat = (indic_category_t) (type & 0x7Fu); - indic_position_t pos = (indic_position_t) (type >> 8); - - - /* - * Re-assign category - */ - - /* The following act more like the Bindus. */ - if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x0953u, 0x0954u))) - cat = OT_SM; - /* The following act like consonants. */ - else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0x0A72u, 0x0A73u, - 0x1CF5u, 0x1CF6u))) - cat = OT_C; - /* TODO: The following should only be allowed after a Visarga. - * For now, just treat them like regular tone marks. */ - else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x1CE2u, 0x1CE8u))) - cat = OT_A; - /* TODO: The following should only be allowed after some of - * the nasalization marks, maybe only for U+1CE9..U+1CF1. - * For now, just treat them like tone marks. */ - else if (unlikely (u == 0x1CEDu)) - cat = OT_A; - /* The following take marks in standalone clusters, similar to Avagraha. */ - else if (unlikely (hb_in_ranges<hb_codepoint_t> (u, 0xA8F2u, 0xA8F7u, - 0x1CE9u, 0x1CECu, - 0x1CEEu, 0x1CF1u))) - { - cat = OT_Symbol; - static_assert (((int) INDIC_SYLLABIC_CATEGORY_AVAGRAHA == OT_Symbol), ""); - } - else if (unlikely (u == 0x0A51u)) - { - /* https://github.com/harfbuzz/harfbuzz/issues/524 */ - cat = OT_M; - pos = POS_BELOW_C; - } - - /* According to ScriptExtensions.txt, these Grantha marks may also be used in Tamil, - * so the Indic shaper needs to know their categories. */ - else if (unlikely (u == 0x11301u || u == 0x11303u)) cat = OT_SM; - else if (unlikely (u == 0x1133cu)) cat = OT_N; - - else if (unlikely (u == 0x0AFBu)) cat = OT_N; /* https://github.com/harfbuzz/harfbuzz/issues/552 */ - - else if (unlikely (u == 0x0980u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/issues/538 */ - else if (unlikely (u == 0x0C80u)) cat = OT_PLACEHOLDER; /* https://github.com/harfbuzz/harfbuzz/pull/623 */ - else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) - cat = OT_PLACEHOLDER; - else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE; - - - /* - * Re-assign position. - */ - - if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS)) - { - pos = POS_BASE_C; - if (is_ra (u)) - cat = OT_Ra; - } - else if (cat == OT_M) - { - pos = matra_position (u, pos); - } - else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) /* | FLAG (OT_VD) */ | FLAG (OT_A) | FLAG (OT_Symbol)))) - { - pos = POS_SMVD; - } - - if (unlikely (u == 0x0B01u)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ - - - - info.indic_category() = cat; - info.indic_position() = pos; -} - -/* - * Things above this line should ideally be moved to the Indic table itself. - */ - /* * Indic configurations. Note that we do not want to keep every single script-specific diff --git a/src/print-indic-table.cc b/src/print-indic-table.cc new file mode 100644 index 00000000..d5741388 --- /dev/null +++ b/src/print-indic-table.cc @@ -0,0 +1,43 @@ +/* + * Copyright © 2018 Google, Inc. + * + * This is part of HarfBuzz, a text shaping library. + * + * Permission is hereby granted, without written agreement and without + * license or royalty fees, to use, copy, modify, and distribute this + * software and its documentation for any purpose, provided that the + * above copyright notice and the following two paragraphs appear in + * all copies of this software. + * + * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR + * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES + * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN + * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS + * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO + * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + * + * Google Author(s): Behdad Esfahbod + */ + +#include "hb-ot-shape-complex-indic-private.hh" + +int +main (void) +{ + for (hb_codepoint_t u = 0; u <= 0x10FFFF; u++) + { + hb_glyph_info_t info; + info.codepoint = u; + set_indic_properties (info); + if (info.indic_category() != INDIC_SYLLABIC_CATEGORY_OTHER || + info.indic_position() != INDIC_MATRA_CATEGORY_NOT_APPLICABLE) + printf("U+%04X %u %u\n", u, + info.indic_category(), + info.indic_position()); + } +} _______________________________________________ HarfBuzz mailing list HarfBuzz@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/harfbuzz