src/hb-ot-layout-gpos-table.hh | 4 src/hb-ot-shape-complex-indic-machine.rl | 7 src/hb-ot-shape-complex-indic-private.hh | 5 src/hb-ot-shape-complex-indic.cc | 126 +++++----- src/hb-private.hh | 1 src/hb-unicode.cc | 10 src/hb-uniscribe.cc | 34 +- test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt | 1 test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST | 1 test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt | 2 10 files changed, 108 insertions(+), 83 deletions(-)
New commits: commit 3d4c111b7a13700b2f7a0b087eb3992283295f21 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 19:34:39 2012 -0400 Add a test case diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt index 843ee4f..35ce952 100644 --- a/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-bengali/misc/misc.txt @@ -49,3 +49,4 @@ ঠৠনà§à¦¤à§à¦° তà§à¦¯à§ +à¦à§à¦¯à§à¦° commit 92a1ad7bef9efb456ab87bd63818cfbed7da3f6f Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 18:38:27 2012 -0400 [Indic] Stop searching for base if a post form is found before below form Improves Bengali and Gurmukhi. Malayalam regressed a bit. We will deal with that later. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index b5ad4ae..ad55f77 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -505,6 +505,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff { /* -> starting from the end of the syllable, move backwards */ unsigned int i = end; + bool seen_below = false; do { i--; /* -> until a consonant is found */ @@ -513,11 +514,13 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff /* -> that does not have a below-base or post-base form * (post-base forms have to follow below-base forms), */ if (info[i].indic_position() != POS_BELOW_C && - info[i].indic_position() != POS_POST_C) + (info[i].indic_position() != POS_POST_C || seen_below)) { base = i; break; } + if (info[i].indic_position() == POS_BELOW_C) + seen_below = true; /* -> or that is not a pre-base reordering Ra, * commit 4c450c703f8e4618c587bcd7ef46dcc1f2c7947b Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 18:13:04 2012 -0400 [Indic] Recompose Bengali Ya,Nukta This is a bunch of hacks for now. Improves Bengali a bit. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 0316691..b5ad4ae 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -407,7 +407,8 @@ hb_ot_shape_normalization_mode_t _hb_ot_shape_complex_normalization_preference_indic (void) { /* We want split matras decomposed by the common shaping logic. */ - return HB_OT_SHAPE_NORMALIZATION_MODE_DECOMPOSED; + /* XXX sort this out after adding per-shaper normalizers. */ + return HB_OT_SHAPE_NORMALIZATION_MODE_COMPOSED_DIACRITICS; } @@ -549,7 +550,6 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff if (base < start) base = start; /* Just in case... */ - /* -> If the syllable starts with Ra + Halant (in a script that has Reph) * and has more than one consonant, Ra is excluded from candidates for * base consonants. */ diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index c527340..140f382 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -258,6 +258,14 @@ hb_unicode_compose (hb_unicode_funcs_t *ufuncs, hb_codepoint_t *ab) { *ab = 0; + /* XXX, this belongs to indic normalizer. */ + if ((FLAG (hb_unicode_general_category (ufuncs, a)) & + (FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | + FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | + FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))) + return false; + /* XXX, add composition-exclusion exceptions to Indic shaper. */ + if (a == 0x09AF && b == 0x09BC) { *ab = 0x09DF; return true; } return ufuncs->func.compose (ufuncs, a, b, ab, ufuncs->user_data.compose); } commit e9c0f152a38cb2e76650a3e43f7fdcda266af696 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 17:05:46 2012 -0400 [Uniscribe] Fix script fallback Gurmukhi failures half now. Others changed slightly. diff --git a/src/hb-uniscribe.cc b/src/hb-uniscribe.cc index e5b98a8..d41542b 100644 --- a/src/hb-uniscribe.cc +++ b/src/hb-uniscribe.cc @@ -331,10 +331,6 @@ retry: } OPENTYPE_TAG language_tag = hb_uint32_swap (hb_ot_tag_from_language (buffer->props.language)); - hb_tag_t buffer_script_tags[2]; - hb_ot_tags_from_script (buffer->props.script, - &buffer_script_tags[0], - &buffer_script_tags[1]); unsigned int glyphs_offset = 0; unsigned int glyphs_len; @@ -345,20 +341,11 @@ retry: unsigned int chars_offset = items[i].iCharPos; unsigned int item_chars_len = items[i + 1].iCharPos - chars_offset; - OPENTYPE_TAG script_tag; - /* We ignore what script tag Uniscribe chose, except to differentiate - * between old/new tags. Not sure if this picks DFLT up correctly... - * This also screws things up as the item.analysis also has an opaque - * script member. */ - if (script_tags[i] == hb_uint32_swap (buffer_script_tags[1])) - script_tag = hb_uint32_swap (buffer_script_tags[1]); - else - script_tag = hb_uint32_swap (buffer_script_tags[0]); - + retry_shape: hr = ScriptShapeOpenType (font_data->hdc, &font_data->script_cache, &items[i].a, - script_tag, + script_tags[i], language_tag, range_char_counts, range_properties, @@ -373,9 +360,6 @@ retry: glyph_props + glyphs_offset, (int *) &glyphs_len); - for (unsigned int j = chars_offset; j < chars_offset + item_chars_len; j++) - log_clusters[j] += glyphs_offset; - if (unlikely (items[i].a.fNoGlyphIndex)) FAIL ("ScriptShapeOpenType() set fNoGlyphIndex"); if (unlikely (hr == E_OUTOFMEMORY)) @@ -386,14 +370,24 @@ retry: goto retry; } if (unlikely (hr == USP_E_SCRIPT_NOT_IN_FONT)) - FAIL ("ScriptShapeOpenType() failed: Font doesn't support script"); + { + if (items[i].a.eScript == SCRIPT_UNDEFINED) + FAIL ("ScriptShapeOpenType() failed: Font doesn't support script"); + items[i].a.eScript = SCRIPT_UNDEFINED; + goto retry_shape; + } if (unlikely (FAILED (hr))) + { FAIL ("ScriptShapeOpenType() failed: 0x%08xL", hr); + } + + for (unsigned int j = chars_offset; j < chars_offset + item_chars_len; j++) + log_clusters[j] += glyphs_offset; hr = ScriptPlaceOpenType (font_data->hdc, &font_data->script_cache, &items[i].a, - script_tag, + script_tags[i], language_tag, range_char_counts, range_properties, commit 5791f329159c9863317e2b507514c29321be31a7 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 16:26:55 2012 -0400 [Indic] Allow a ZWNJ after SM's Malayalam failures go way down. Other scripts benefitted slightly too. Sinhala had one or two test regressions, but... diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 4be7698..62091e2 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -65,7 +65,7 @@ reph = (Ra H | Repha); # possible reph cn = c.n?; forced_rakar = ZWJ H ZWJ Ra; matra_group = z*.M.N?.(H | forced_rakar)?; -syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?; +syllable_tail = (SM.ZWNJ?)? (Coeng (cn|V))? (VD VD?)?; place_holder = NBSP | DOTTEDCIRCLE; halant_group = (z?.h.ZWJ?); final_halant_group = halant_group | h.ZWNJ; commit 34ae336f3fae93ef9372881d545c817bce383041 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 16:17:28 2012 -0400 [Indic] Improve Reph AfterMain positioning Fixes 20 out of 48 failing Oriya tests. Failure rate down to 0.066% now. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 228fc63..0316691 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -975,8 +975,7 @@ final_reordering_syllable (hb_buffer_t *buffer, { new_reph_pos = base; /* XXX Skip potential pre-base reordering Ra. */ - while (new_reph_pos < end && - !( FLAG (info[new_reph_pos + 1].indic_position()) & (FLAG (POS_BELOW_C) | FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD)))) + while (new_reph_pos + 1 < end && info[new_reph_pos + 1].indic_position() <= POS_AFTER_MAIN) new_reph_pos++; if (new_reph_pos < end) goto reph_move; commit bdd080431a40bc941ece3230f338b94a46bd12a2 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 16:03:09 2012 -0400 [Indic] Reposition Oriya Candrabindu Oriya failures down from 0.65% to 0.20%. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index f75a500..228fc63 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -291,6 +291,7 @@ set_indic_properties (hb_glyph_info_t &info, hb_ot_map_t *map, hb_font_t *font) pos = POS_SMVD; } + if (unlikely (u == 0x0B01)) pos = POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */ info.indic_category() = cat; diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST index 29cfb2f..66a2468 100644 --- a/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/MANIFEST @@ -1 +1,2 @@ +bindu.txt misc.txt diff --git a/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt new file mode 100644 index 0000000..13de6ee --- /dev/null +++ b/test/shaping/texts/in-tree/shaper-indic/indic/script-oriya/misc/bindu.txt @@ -0,0 +1,2 @@ +ମàଠ+ମàଠcommit 5f0eaaad129ff04d56b8756bebf19fbc242718c9 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 15:47:24 2012 -0400 [Indic] Fix base search in final_reordering Fixes most Malayalam failures. Down from 1.6% to 0.38% now. Fixes a few more in other scripts too. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 1a75e78..f75a500 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -712,7 +712,7 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff } /* XXX This will not match for old-Indic spec since the Halant-Ra order is reversed already. */ - if (basic_mask_array[PREF] && base + 3 <= end) + if (basic_mask_array[PREF] && base + 2 < end) { /* Find a Halant,Ra sequence and mark it fore pre-base reordering processing. */ for (unsigned int i = base + 1; i + 1 < end; i++) @@ -829,9 +829,13 @@ final_reordering_syllable (hb_buffer_t *buffer, */ /* Find base again */ - unsigned int base = end; - while (start < base && info[base - 1].indic_position() >= POS_BASE_C) - base--; + unsigned int base; + for (base = start; base < end; base++) + if (info[base].indic_position() >= POS_BASE_C) { + if (start < base && info[base].indic_position() > POS_BASE_C) + base--; + break; + } unsigned int start_of_last_cluster = base; commit 81202bd860e4034c18d9f80c5a4f33d9f48463a3 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 15:10:02 2012 -0400 [Indic] Don't attach SM/VD to other characters diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index 5f4856e..4a4c8c0 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -83,6 +83,8 @@ enum indic_category_t { /* Visual positions in a syllable from left to right. */ enum indic_position_t { + POS_START, + POS_RA_TO_BECOME_REPH, POS_PRE_M, POS_PRE_C, @@ -102,6 +104,7 @@ enum indic_position_t { POS_FINAL_C, POS_SMVD, + POS_END }; diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 36bf240..1a75e78 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -634,29 +634,32 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff } } - /* Attach ZWJ, ZWNJ, nukta, and halant to previous char to move with them. */ - if (!indic_options ().uniscribe_bug_compatible) + /* Attach misc marks to previous char to move with them. */ { - /* Please update the Uniscribe branch when touching this! */ - for (unsigned int i = start + 1; i < end; i++) - if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) - info[i].indic_position() = info[i - 1].indic_position(); - } else { - /* - * Uniscribe doesn't move the Halant with Left Matra. - * TEST: U+092B,U+093F,U+094DE - */ - /* Please update the non-Uniscribe branch when touching this! */ - for (unsigned int i = start + 1; i < end; i++) - if ((FLAG (info[i].indic_category()) & (FLAG (OT_ZWNJ) | FLAG (OT_ZWJ) | FLAG (OT_N) | FLAG (OT_RS) | FLAG (OT_H)))) { - info[i].indic_position() = info[i - 1].indic_position(); - if (info[i].indic_category() == OT_H && info[i].indic_position() == POS_PRE_M) + indic_position_t last_pos = POS_START; + for (unsigned int i = start; i < end; i++) + { + if ((FLAG (info[i].indic_category()) & (JOINER_FLAGS | FLAG (OT_N) | FLAG (OT_RS) | HALANT_OR_COENG_FLAGS))) + { + info[i].indic_position() = last_pos; + if (unlikely (indic_options ().uniscribe_bug_compatible && + info[i].indic_category() == OT_H && + info[i].indic_position() == POS_PRE_M)) + { + /* + * Uniscribe doesn't move the Halant with Left Matra. + * TEST: U+092B,U+093F,U+094DE + */ for (unsigned int j = i; j > start; j--) if (info[j - 1].indic_position() != POS_PRE_M) { info[i].indic_position() = info[j - 1].indic_position(); break; } + } + } else if (info[i].indic_position() != POS_SMVD) { + last_pos = (indic_position_t) info[i].indic_position(); } + } } /* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */ { @@ -666,7 +669,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff last_halant = i; else if (is_consonant (info[i])) { for (unsigned int j = last_halant; j < i; j++) - info[j].indic_position() = info[i].indic_position(); + if (info[j].indic_position() != POS_SMVD) + info[j].indic_position() = info[i].indic_position(); } } commit efb4ad735691837a52447bedc1a66a87d0d9af51 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 14:27:38 2012 -0400 Fix compiler warnings If x is not constant, we cannot ASSERT_STATIC on it. diff --git a/src/hb-private.hh b/src/hb-private.hh index bdfd3f5..3f710ed 100644 --- a/src/hb-private.hh +++ b/src/hb-private.hh @@ -734,7 +734,7 @@ hb_in_range (T u, T lo, T hi) * For example, for testing "x â {x1, x2, x3}" use: * (FLAG(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3))) */ -#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO((x) < 8 * sizeof(int)) + (1<<(x))) +#define FLAG(x) (1<<(x)) #define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x)) commit f31d97e44eeb6fb141f3de928e27e033fc7b1f47 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 14:13:35 2012 -0400 [Indic] Form Telugu Reph out of Ra,Virama,ZWJ Apparently this was approved in Feb 2012. No font yet. diff --git a/src/hb-ot-shape-complex-indic-private.hh b/src/hb-ot-shape-complex-indic-private.hh index bbf5484..5f4856e 100644 --- a/src/hb-ot-shape-complex-indic-private.hh +++ b/src/hb-ot-shape-complex-indic-private.hh @@ -177,7 +177,7 @@ static const hb_codepoint_t ra_chars[] = { 0x0AB0, /* Gujarati */ 0x0B30, /* Oriya */ 0x0BB0, /* Tamil */ /* No Reph */ - 0x0C30, /* Telugu */ /* No Reph */ + 0x0C30, /* Telugu */ /* Reph formed only with ZWJ */ 0x0CB0, /* Kannada */ 0x0D30, /* Malayalam */ /* No Reph, Logical Repha */ diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index ffba986..36bf240 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -471,8 +471,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff start + 3 <= end && info[start].indic_category() == OT_Ra && info[start + 1].indic_category() == OT_H && - (unlikely (buffer->props.script == HB_SCRIPT_SINHALA) ? - info[start + 2].indic_category() == OT_ZWJ /* In Sinhala, form Reph only if ZWJ is present */: + (unlikely (buffer->props.script == HB_SCRIPT_SINHALA || buffer->props.script == HB_SCRIPT_TELUGU) ? + info[start + 2].indic_category() == OT_ZWJ /* In Sinhala & Telugu, form Reph only if ZWJ is present */: !is_joiner (info[start + 2] /* In other scripts, any joiner blocks Reph formation */ ) )) { commit 2e193b240ec85cab0d4e2f8a375c5a7f0ef99985 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 14:02:35 2012 -0400 [Indic] Don't split U+0AC9 Althought IndicMatraCategory.txt classifies it as Top_And_Right matra, it does not have Unicode decomposition, and Uniscribe does not do anything special about it either. Gujarati failures down from 0.672% to 0.0130966%. diff --git a/src/hb-unicode.cc b/src/hb-unicode.cc index 3e8f807..c527340 100644 --- a/src/hb-unicode.cc +++ b/src/hb-unicode.cc @@ -269,7 +269,7 @@ hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, { /* XXX FIXME, move these to complex shapers and propagage to normalizer.*/ switch (ab) { - case 0x0AC9 : *a = 0x0AC5; *b= 0x0ABE; return true; + case 0x0AC9 : return false; case 0x0931 : return false; case 0x0B94 : return false; commit 30c3d5e9fc61b49c2c6ad4e744300edd6f3e0261 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 13:56:32 2012 -0400 [Indic] Simplify Uniscribe cluster emulation Now that we break syllables on Halant,ZWNJ, this code can be simplified. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 7979e24..ffba986 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -1110,18 +1110,11 @@ final_reordering_syllable (hb_buffer_t *buffer, if (indic_options ().uniscribe_bug_compatible) { - /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWNJ. + /* Uniscribe merges the entire cluster. * This means, half forms are submerged into the main consonants cluster. * This is unnecessary, and makes cursor positioning harder, but that's what * Uniscribe does. */ - unsigned int cluster_start = start; - for (unsigned int i = start + 1; i < start_of_last_cluster; i++) - if (is_halant_or_coeng (info[i - 1]) && info[i].indic_category() == OT_ZWNJ) { - i++; - buffer->merge_clusters (cluster_start, i); - cluster_start = i; - } - start_of_last_cluster = cluster_start; + start_of_last_cluster = start; } buffer->merge_clusters (start_of_last_cluster, end); commit decf6ffca475fe01ff3151b7641f629f031137d2 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 13:51:31 2012 -0400 [Indic] Minor! diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index e771e57..7979e24 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -1108,7 +1108,7 @@ final_reordering_syllable (hb_buffer_t *buffer, * Finish off the clusters and go home! */ - if (!indic_options ().uniscribe_bug_compatible) + if (indic_options ().uniscribe_bug_compatible) { /* This is what Uniscribe does. Ie. add cluster boundaries after Halant,ZWNJ. * This means, half forms are submerged into the main consonants cluster. commit 9e4f94a72cea6d65a6a7ba5a47db92e00dbfbb91 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 13:48:03 2012 -0400 [Indic] Break syllables at Halant,ZWNJ That's really what Uniscribe does, and explains a lot of pecularities of Halant,ZWNJ before the base. Sent Telugu from 1% failures to 0.03%. Improved Kannada and Malayalam slightly. Fixed half of Bengali, and did NOT break anything! diff --git a/src/hb-ot-shape-complex-indic-machine.rl b/src/hb-ot-shape-complex-indic-machine.rl index 4501773..4be7698 100644 --- a/src/hb-ot-shape-complex-indic-machine.rl +++ b/src/hb-ot-shape-complex-indic-machine.rl @@ -67,8 +67,9 @@ forced_rakar = ZWJ H ZWJ Ra; matra_group = z*.M.N?.(H | forced_rakar)?; syllable_tail = SM? (Coeng (cn|V))? (VD VD?)?; place_holder = NBSP | DOTTEDCIRCLE; -halant_group = (z?.h.z?); -halant_or_matra_group = (halant_group | matra_group*); +halant_group = (z?.h.ZWJ?); +final_halant_group = halant_group | h.ZWNJ; +halant_or_matra_group = (final_halant_group | matra_group*); consonant_syllable = Repha? (cn.halant_group)* cn A? halant_or_matra_group? syllable_tail; diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 42a7e8d..e771e57 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -531,9 +531,8 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff } else { - /* A ZWJ at the end of syllable, or any ZWJ/ZWNJ in other places, stop the base - * search (to request explicit half or halant forms. */ - if (is_joiner (info[i]) && (i + 1 < end || info[i].indic_category() == OT_ZWJ)) + /* A ZWJ stops the base search, and requests an explicit half form. */ + if (info[i].indic_category() == OT_ZWJ) break; } } while (i > limit); commit 2c372b80f6befad69e216e3f218b38640b8cc044 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 13:37:48 2012 -0400 [Indic] Better check for applying 'init' Specifically, don't apply 'init' if previous char is a joiner. Fixes some more of Bengali. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index bbb881e..42a7e8d 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -1101,14 +1101,7 @@ final_reordering_syllable (hb_buffer_t *buffer, if (info[start].indic_position () == POS_PRE_M && (!start || !(FLAG (_hb_glyph_info_get_general_category (&info[start - 1])) & - (FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | - FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK))))) + FLAG_RANGE (HB_UNICODE_GENERAL_CATEGORY_FORMAT, HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)))) info[start].mask |= init_mask; diff --git a/src/hb-private.hh b/src/hb-private.hh index 7b778b7..bdfd3f5 100644 --- a/src/hb-private.hh +++ b/src/hb-private.hh @@ -734,7 +734,8 @@ hb_in_range (T u, T lo, T hi) * For example, for testing "x â {x1, x2, x3}" use: * (FLAG(x) & (FLAG(x1) | FLAG(x2) | FLAG(x3))) */ -#define FLAG(x) (1<<(x)) +#define FLAG(x) (ASSERT_STATIC_EXPR_ZERO((x) < 8 * sizeof(int)) + (1<<(x))) +#define FLAG_RANGE(x,y) (ASSERT_STATIC_EXPR_ZERO ((x) < (y)) + FLAG(y+1) - FLAG(x)) template <typename T, typename T2> inline void commit 34a7440b7c6c6e53394ddbdbedaad57b23f85105 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 12:32:59 2012 -0400 [GPOS] Don't zero mark advances Fixes more of Telugu, Kannada, and Oriya. May break things (outside Indic...), but we cannot think of any font relying on this immediately. diff --git a/src/hb-ot-layout-gpos-table.hh b/src/hb-ot-layout-gpos-table.hh index 94055b3..9eadbd6 100644 --- a/src/hb-ot-layout-gpos-table.hh +++ b/src/hb-ot-layout-gpos-table.hh @@ -1486,8 +1486,8 @@ fix_mark_attachment (hb_glyph_position_t *pos, unsigned int i, hb_direction_t di unsigned int j = i - pos[i].attach_lookback(); - pos[i].x_advance = 0; - pos[i].y_advance = 0; +// pos[i].x_advance = 0; +// pos[i].y_advance = 0; pos[i].x_offset += pos[j].x_offset; pos[i].y_offset += pos[j].y_offset; commit 8ed248de77e5d2ed978e55c0ce1a11727bc9e34c Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 11:42:24 2012 -0400 [Indic] Minor diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index ae3af2c..bbb881e 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -1112,8 +1112,9 @@ final_reordering_syllable (hb_buffer_t *buffer, info[start].mask |= init_mask; - - /* Finish off the clusters and go home! */ + /* + * Finish off the clusters and go home! + */ if (!indic_options ().uniscribe_bug_compatible) { commit d0e68dbd0b9fc9a42c4280d01c8ffd9c5015d550 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 11:25:41 2012 -0400 [Indic] Implement reph positioning step 5 Not tuned, just copied from step 2. Fixes another 0.5% of Kannada failures. 1% to go. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index ffae430..ae3af2c 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -999,7 +999,17 @@ final_reordering_syllable (hb_buffer_t *buffer, */ reph_step_5: { - /* XXX */ + /* Copied from step 2. */ + new_reph_pos = start + 1; + while (new_reph_pos < base && !is_halant_or_coeng (info[new_reph_pos])) + new_reph_pos++; + + if (new_reph_pos < base && is_halant_or_coeng (info[new_reph_pos])) { + /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */ + if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1])) + new_reph_pos++; + goto reph_move; + } } /* 6. Otherwise, reorder reph to the end of the syllable. commit a9e45c32e4a0d6da33c52f8427aa694e57f52eb9 Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 11:04:15 2012 -0400 [Indic] Don't let ZWNJ at the end of syllable affect base search Fixes a few Devanagari, half of remaining Kannada failures, quarter for Telugu, and others slightly improved or unchanged. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index a3f20b1..ffae430 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -530,8 +530,12 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff base = i; } else - if (is_joiner (info[i])) + { + /* A ZWJ at the end of syllable, or any ZWJ/ZWNJ in other places, stop the base + * search (to request explicit half or halant forms. */ + if (is_joiner (info[i]) && (i + 1 < end || info[i].indic_category() == OT_ZWJ)) break; + } } while (i > limit); } else commit 20b68e699f73e6ce046c0ec143d40b3d6d48e06b Author: Behdad Esfahbod <[email protected]> Date: Fri Jul 20 10:47:46 2012 -0400 [Indic] Apply 'cjct' globally Fixes 5 Devanagari failures, and no regressions. diff --git a/src/hb-ot-shape-complex-indic.cc b/src/hb-ot-shape-complex-indic.cc index 42e0f70..a3f20b1 100644 --- a/src/hb-ot-shape-complex-indic.cc +++ b/src/hb-ot-shape-complex-indic.cc @@ -322,7 +322,7 @@ indic_basic_features[] = {HB_TAG('a','b','v','f'), false}, {HB_TAG('p','s','t','f'), false}, {HB_TAG('c','f','a','r'), false}, - {HB_TAG('c','j','c','t'), false}, + {HB_TAG('c','j','c','t'), true}, {HB_TAG('v','a','t','u'), true}, }; @@ -338,7 +338,7 @@ enum { ABVF, PSTF, CFAR, - CJCT, + _CJCT, VATU }; @@ -691,15 +691,15 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff info[i].mask |= basic_mask_array[RPHF]; /* Pre-base */ - mask = basic_mask_array[HALF] | basic_mask_array[CJCT]; + mask = basic_mask_array[HALF]; for (unsigned int i = start; i < base; i++) info[i].mask |= mask; /* Base */ - mask = basic_mask_array[CJCT]; + mask = 0; if (base < end) info[base].mask |= mask; /* Post-base */ - mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF] | basic_mask_array[CJCT]; + mask = basic_mask_array[BLWF] | basic_mask_array[ABVF] | basic_mask_array[PSTF]; for (unsigned int i = base + 1; i < end; i++) info[i].mask |= mask; } @@ -737,7 +737,10 @@ initial_reordering_consonant_syllable (const hb_ot_map_t *map, hb_buffer_t *buff do { j--; - info[j].mask &= ~basic_mask_array[CJCT]; + /* A ZWJ disables CJCT, however, it's mere presence is enough + * to disable ligation. No explicit action needed. */ + + /* A ZWNJ disables HALF. */ if (non_joiner) info[j].mask &= ~basic_mask_array[HALF];
_______________________________________________ HarfBuzz mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/harfbuzz
