src/hb-buffer-private.hh | 17 ++++- src/hb-buffer.cc | 25 ++----- src/hb-ot-shape-complex-indic-machine.rl | 23 +++--- src/hb-ot-shape-complex-indic-private.hh | 2 src/hb-ot-shape-complex-indic.cc | 105 +++++++++++++++++++++++++++++-- src/hb-ot-shape-normalize-private.hh | 1 src/hb-ot-shape-normalize.cc | 11 +-- src/hb-ot-shape.cc | 12 ++- 8 files changed, 153 insertions(+), 43 deletions(-)
New commits: commit b85800f9de8976a7418ef9df467d3080c6ab0199 Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 31 18:12:01 2012 -0400 [Indic] Implement dotted-circle insertion for broken clusters No panic, we reeally insert dotted circle when it's absolutely broken. Fixes most of the dotted-circle cases against Uniscribe. (for Devanagari fixes 80% of them, for Khmer 70%; the rest look like Uniscribe being really bogus...) I had to make a decision. Apparently Uniscribe adds one dotted circle to each broken character. I tried that, but that goes wrong easily with split matras. So I made it add only one dotted circle to an entire broken syllable tail. As in: "if there was a dotted circle here, this would have formed a correct cluster." That works better for split stuff, and I like it more. diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh index 9fa1e4b..456e1b8 100644 --- a/src/hb-buffer-private.hh +++ b/src/hb-buffer-private.hh @@ -152,6 +152,7 @@ struct hb_buffer_t { HB_INTERNAL void replace_glyph (hb_codepoint_t glyph_index); /* Makes a copy of the glyph at idx to output and replace glyph_index */ HB_INTERNAL void output_glyph (hb_codepoint_t glyph_index); + HB_INTERNAL void output_info (hb_glyph_info_t &glyph_info); /* Copies glyph at idx to output but doesn't advance idx */ HB_INTERNAL void copy_glyph (void); /* Copies glyph at idx to output and advance idx. diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index 3f039d0..f25a8bc 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -268,6 +268,16 @@ hb_buffer_t::output_glyph (hb_codepoint_t glyph_index) } void +hb_buffer_t::output_info (hb_glyph_info_t &glyph_info) +{ + if (unlikely (!make_room_for (0, 1))) return; + + out_info[out_len] = glyph_info; + + out_len++; +} + +void hb_buffer_t::copy_glyph (void) { if (unlikely (!make_room_for (0, 1))) return; diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 6c76d24..c9309e9 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -75,12 +75,14 @@ halant_or_matra_group = (final_halant_group | matra_group{0,4}); consonant_syllable = Repha? (cn.halant_group){0,4} cn A? halant_or_matra_group? syllable_tail; vowel_syllable = reph? V.n? (ZWJ | (halant_group.cn){0,4} halant_or_matra_group? syllable_tail); standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matra_group? syllable_tail; +broken_cluster = n? (halant_group.cn){0,4} halant_or_matra_group syllable_tail; other = any; main := |* consonant_syllable => { found_syllable (consonant_syllable); }; vowel_syllable => { found_syllable (vowel_syllable); }; standalone_cluster => { found_syllable (standalone_cluster); }; + broken_cluster => { found_syllable (broken_cluster); *had_broken_cluster = true; }; other => { found_syllable (non_indic_cluster); }; *|; @@ -98,7 +100,7 @@ main := |* } HB_STMT_END static void -find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer) +find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer, bool *had_broken_cluster) { unsigned int p, pe, eof, ts, te, act; int cs; diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 79daba5..91b0be5 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -300,7 +300,7 @@ is_halant_or_coeng (const hb_glyph_info_t &info) } static inline void -set_indic_properties (hb_glyph_info_t &info) +set_indic_properties (hb_glyph_info_t &info) { hb_codepoint_t u = info.codepoint; unsigned int type = get_indic_categories (u); diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 15b00b0..2417ab7 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -770,6 +770,15 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, } static void +initial_reordering_broken_cluster (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + /* We already inserted dotted-circles, so just call the standalone_cluster. */ + initial_reordering_standalone_cluster (plan, buffer, start, end); +} + +static void initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED, hb_buffer_t *buffer HB_UNUSED, unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) @@ -799,23 +808,63 @@ initial_reordering_syllable (const hb_ot_shape_plan_t *plan, case consonant_syllable: initial_reordering_consonant_syllable (plan, buffer, start, end); return; case vowel_syllable: initial_reordering_vowel_syllable (plan, buffer, start, end); return; case standalone_cluster: initial_reordering_standalone_cluster (plan, buffer, start, end); return; - case broken_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return; + case broken_cluster: initial_reordering_broken_cluster (plan, buffer, start, end); return; case non_indic_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return; } } static void +insert_dotted_circles (const hb_ot_shape_plan_t *plan, + hb_font_t *font, + hb_buffer_t *buffer) +{ + hb_codepoint_t dottedcircle_glyph; + if (!font->get_glyph (0x25CC, 0, &dottedcircle_glyph)) + return; + + hb_glyph_info_t dottedcircle; + dottedcircle.codepoint = 0x25CC; + set_indic_properties (dottedcircle); + dottedcircle.codepoint = dottedcircle_glyph; + + buffer->clear_output (); + + buffer->idx = 0; + unsigned int last_syllable = 0; + while (buffer->idx < buffer->len) + { + unsigned int syllable = buffer->cur().syllable(); + syllable_type_t syllable_type = (syllable_type_t) (syllable & 0x0F); + if (unlikely (last_syllable != syllable && syllable_type == broken_cluster)) + { + hb_glyph_info_t info = dottedcircle; + info.cluster = buffer->cur().cluster; + info.mask = buffer->cur().mask; + info.syllable() = buffer->cur().syllable(); + buffer->output_info (info); + last_syllable = syllable; + } + buffer->next_glyph (); + } + + buffer->swap_buffers (); +} + +static void initial_reordering (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) { - unsigned int count = buffer->len; - if (unlikely (!count)) return; - update_consonant_positions (plan, font, buffer); - find_syllables (plan, buffer); + + bool had_broken_clusters = false; + find_syllables (plan, buffer, &had_broken_clusters); + if (unlikely (had_broken_clusters)) + insert_dotted_circles (plan, font, buffer); hb_glyph_info_t *info = buffer->info; + unsigned int count = buffer->len; + if (unlikely (!count)) return; unsigned int last = 0; unsigned int last_syllable = info[0].syllable(); for (unsigned int i = 1; i < count; i++) @@ -1170,6 +1219,12 @@ final_reordering (const hb_ot_shape_plan_t *plan, } +static hb_ot_shape_normalization_mode_t +normalization_preference_indic (const hb_ot_shape_plan_t *plan) +{ + return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; +} + const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = { "indic", @@ -1178,7 +1233,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_indic = data_create_indic, data_destroy_indic, NULL, /* preprocess_text */ - NULL, /* normalization_preference */ + normalization_preference_indic, setup_masks_indic, false, /* zero_width_attached_marks */ }; diff --git a/src/hb-ot-shape-normalize-private.hh b/src/hb-ot-shape-normalize-private.hh index 462b87d..c5fcbea 100644 --- a/src/hb-ot-shape-normalize-private.hh +++ b/src/hb-ot-shape-normalize-private.hh @@ -38,6 +38,7 @@ enum hb_ot_shape_normalization_mode_t { HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED, HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS, /* never composes base-to-base */ + HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT, /* always fully decomposes and then recompose back */ HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_FULL, /* including base-to-base composition */ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT = HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS diff --git a/src/hb-ot-shape-normalize.cc b/src/hb-ot-shape-normalize.cc index 93dd00c..f4d8330 100644 --- a/src/hb-ot-shape-normalize.cc +++ b/src/hb-ot-shape-normalize.cc @@ -414,10 +414,10 @@ decompose_multi_char_cluster (hb_font_t *font, hb_buffer_t *buffer, unsigned int } static inline bool -decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool recompose, unsigned int end) +decompose_cluster (hb_font_t *font, hb_buffer_t *buffer, bool short_circuit, unsigned int end) { if (likely (buffer->idx + 1 == end)) - return decompose_current_character (font, buffer, recompose); + return decompose_current_character (font, buffer, short_circuit); else return decompose_multi_char_cluster (font, buffer, end); } @@ -437,7 +437,8 @@ void _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, hb_ot_shape_normalization_mode_t mode) { - bool recompose = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; + bool short_circuit = mode != HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED && + mode != HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS_NO_SHORT_CIRCUIT; bool can_use_recompose = false; unsigned int count; @@ -459,7 +460,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, if (buffer->cur().cluster != buffer->info[end].cluster) break; - can_use_recompose = decompose_cluster (font, buffer, recompose, end) || can_use_recompose; + can_use_recompose = decompose_cluster (font, buffer, short_circuit, end) || can_use_recompose; } buffer->swap_buffers (); @@ -495,7 +496,7 @@ _hb_ot_shape_normalize (hb_font_t *font, hb_buffer_t *buffer, } - if (!recompose) + if (mode == HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED) return; /* Third round, recompose */ commit 327d14ef188396006d54af976506ab6f8bb2869a Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 31 16:49:34 2012 -0400 [Indic] Start adding dotted-circle instrastructure diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index b6a372e..6c76d24 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -78,24 +78,23 @@ standalone_cluster = reph? place_holder.n? (halant_group.cn){0,4} halant_or_matr other = any; main := |* - consonant_syllable => { process_syllable (consonant_syllable); }; - vowel_syllable => { process_syllable (vowel_syllable); }; - standalone_cluster => { process_syllable (standalone_cluster); }; - other => { process_syllable (non_indic); }; + consonant_syllable => { found_syllable (consonant_syllable); }; + vowel_syllable => { found_syllable (vowel_syllable); }; + standalone_cluster => { found_syllable (standalone_cluster); }; + other => { found_syllable (non_indic_cluster); }; *|; }%% -#define process_syllable(func) \ +#define found_syllable(syllable_type) \ HB_STMT_START { \ - if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #func); \ + if (0) fprintf (stderr, "syllable %d..%d %s\n", last, p+1, #syllable_type); \ for (unsigned int i = last; i < p+1; i++) \ - info[i].syllable() = syllable_serial; \ - PASTE (initial_reordering_, func) (plan, buffer, last, p+1); \ + info[i].syllable() = (syllable_serial << 4) | syllable_type; \ last = p+1; \ syllable_serial++; \ - if (unlikely (!syllable_serial)) syllable_serial++; \ + if (unlikely (syllable_serial == 16)) syllable_serial = 1; \ } HB_STMT_END static void @@ -113,7 +112,7 @@ find_syllables (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer) pe = eof = buffer->len; unsigned int last = 0; - uint8_t syllable_serial = 1; + unsigned int syllable_serial = 1; %%{ write exec; }%% diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index e39629f..15b00b0 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -770,23 +770,61 @@ initial_reordering_standalone_cluster (const hb_ot_shape_plan_t *plan, } static void -initial_reordering_non_indic (const hb_ot_shape_plan_t *plan HB_UNUSED, - hb_buffer_t *buffer HB_UNUSED, - unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) +initial_reordering_non_indic_cluster (const hb_ot_shape_plan_t *plan HB_UNUSED, + hb_buffer_t *buffer HB_UNUSED, + unsigned int start HB_UNUSED, unsigned int end HB_UNUSED) { /* Nothing to do right now. If we ever switch to using the output * buffer in the reordering process, we'd need to next_glyph() here. */ } + +enum syllable_type_t { + consonant_syllable, + vowel_syllable, + standalone_cluster, + broken_cluster, + non_indic_cluster, +}; + #include "hb-ot-shape-complex-indic-machine.hh" static void +initial_reordering_syllable (const hb_ot_shape_plan_t *plan, + hb_buffer_t *buffer, + unsigned int start, unsigned int end) +{ + syllable_type_t syllable_type = (syllable_type_t) (buffer->info[start].syllable() & 0x0F); + switch (syllable_type) { + case consonant_syllable: initial_reordering_consonant_syllable (plan, buffer, start, end); return; + case vowel_syllable: initial_reordering_vowel_syllable (plan, buffer, start, end); return; + case standalone_cluster: initial_reordering_standalone_cluster (plan, buffer, start, end); return; + case broken_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return; + case non_indic_cluster: initial_reordering_non_indic_cluster (plan, buffer, start, end); return; + } +} + +static void initial_reordering (const hb_ot_shape_plan_t *plan, hb_font_t *font, hb_buffer_t *buffer) { + unsigned int count = buffer->len; + if (unlikely (!count)) return; + update_consonant_positions (plan, font, buffer); find_syllables (plan, buffer); + + hb_glyph_info_t *info = buffer->info; + unsigned int last = 0; + unsigned int last_syllable = info[0].syllable(); + for (unsigned int i = 1; i < count; i++) + if (last_syllable != info[i].syllable()) { + initial_reordering_syllable (plan, buffer, last, i); + last = i; + last_syllable = info[last].syllable(); + } + initial_reordering_syllable (plan, buffer, last, count); } static void @@ -1110,7 +1148,7 @@ final_reordering (const hb_ot_shape_plan_t *plan, hb_buffer_t *buffer) { unsigned int count = buffer->len; - if (!count) return; + if (unlikely (!count)) return; hb_glyph_info_t *info = buffer->info; unsigned int last = 0; commit 1be368e96fb7de8c77bf992874e0d5bd6b272ebe Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 31 16:29:17 2012 -0400 Minor diff --git a/src/hb-buffer-private.hh b/src/hb-buffer-private.hh index 91e7560..9fa1e4b 100644 --- a/src/hb-buffer-private.hh +++ b/src/hb-buffer-private.hh @@ -156,7 +156,21 @@ struct hb_buffer_t { HB_INTERNAL void copy_glyph (void); /* Copies glyph at idx to output and advance idx. * If there's no output, just advance idx. */ - HB_INTERNAL void next_glyph (void); + inline void + next_glyph (void) + { + if (have_output) + { + if (unlikely (out_info != info || out_len != idx)) { + if (unlikely (!make_room_for (1, 1))) return; + out_info[out_len] = info[idx]; + } + out_len++; + } + + idx++; + } + /* Advance idx without copying to output. */ inline void skip_glyph (void) { idx++; } diff --git a/src/hb-buffer.cc b/src/hb-buffer.cc index e9bb15e..3f039d0 100644 --- a/src/hb-buffer.cc +++ b/src/hb-buffer.cc @@ -290,21 +290,6 @@ hb_buffer_t::replace_glyph (hb_codepoint_t glyph_index) out_len++; } -void -hb_buffer_t::next_glyph (void) -{ - if (have_output) - { - if (unlikely (out_info != info || out_len != idx)) { - if (unlikely (!make_room_for (1, 1))) return; - out_info[out_len] = info[idx]; - } - out_len++; - } - - idx++; -} - void hb_buffer_t::set_masks (hb_mask_t value, commit 784f29d061a2939562eca0c4943feb01174aee00 Author: Behdad Esfahbod <beh...@behdad.org> Date: Fri Aug 31 14:06:26 2012 -0400 Minor diff --git a/src/hb-ot-shape.cc b/src/hb-ot-shape.cc index 473bc17..929406e 100644 --- a/src/hb-ot-shape.cc +++ b/src/hb-ot-shape.cc @@ -487,14 +487,18 @@ hb_ot_position (hb_ot_shape_context_t *c) static void hb_ot_hide_zerowidth (hb_ot_shape_context_t *c) { - hb_codepoint_t space; - if (!c->font->get_glyph (' ', 0, &space)) - return; /* No point! */ + hb_codepoint_t space = 0; unsigned int count = c->buffer->len; for (unsigned int i = 0; i < count; i++) if (unlikely (!is_a_ligature (c->buffer->info[i]) && - _hb_glyph_info_is_zero_width (&c->buffer->info[i]))) { + _hb_glyph_info_is_zero_width (&c->buffer->info[i]))) + { + if (!space) { + /* We assume that the space glyph is not gid0. */ + if (unlikely (!c->font->get_glyph (' ', 0, &space)) || !space) + return; /* No point! */ + } c->buffer->info[i].codepoint = space; c->buffer->pos[i].x_advance = 0; c->buffer->pos[i].y_advance = 0; _______________________________________________ HarfBuzz mailing list HarfBuzz@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/harfbuzz