Maybe we should try something along the lines of the attached patch. The idea here is to ONLY apply the *jmo features to jamo that are specifically identified by the shaper, rather than applying them globally.

The extra work being done here should be offset by the fact that we won't generally be trying as many non-matching lookups on every glyph; we'll only apply the lookups for (at most) one of the *jmo features, instead of all of them.

WDYT?

On 6/1/14 14:44, Jonathan Kew wrote:
The Hangul shaper should NOT apply the *jmo features to glyphs that are
not part of a properly-structured Korean syllable.

Some examples of current behavior:

[GOOD: complete LVT sequence, proper features applied]
hb-unicode-encode 1101,1161,11f0 | hb-shape UnBatang_0613.ttf
[uni1101.ljmo01=0+1024|uni1161.vjmo01=1+0|uni11F0.tjmo01=2+0]

[GOOD: lone L does not have features applied]
hb-unicode-encode 1101 | hb-shape UnBatang_0613.ttf
[uni1101=0+1024]

[GOOD: LT without V is not valid, don't apply features]
hb-unicode-encode 1101,11f0 | hb-shape UnBatang_0613.ttf
[uni1101=0+1024|uni11F0=1+0]

[GOOD: lone T, don't apply features]
hb-unicode-encode 11f0 | hb-shape UnBatang_0613.ttf
[uni11F0=0+0]

[BAD: lone V, shouldn't apply vjmo]
hb-unicode-encode 1161 | hb-shape UnBatang_0613.ttf
[uni1161.vjmo02=0+0]

[BAD: VT pair without L is not valid, shouldn't apply *jmo]
hb-unicode-encode 1161,11f0 | hb-shape UnBatang_0613.ttf
[uni1161.vjmo01=0+0|uni11F0.tjmo01=1+0]

Note the last two examples; these are incorrect, IMO. In both these
cases, Uniscribe does not apply the *jmo features.

JK

diff --git a/src/hb-ot-shape-complex-hangul.cc b/src/hb-ot-shape-complex-hangul.cc
index 7c137c6..ed18a5a 100644
--- a/src/hb-ot-shape-complex-hangul.cc
+++ b/src/hb-ot-shape-complex-hangul.cc
@@ -30,19 +30,33 @@
 /* Hangul shaper */
 
 
-static const hb_tag_t hangul_features[] =
+/* Same order as the feature array below */
+enum {
+  NONE,
+
+  LJMO,
+  VJMO,
+  TJMO,
+
+  FIRST_HANGUL_FEATURE = LJMO,
+  HANGUL_FEATURE_COUNT = TJMO + 1
+};
+
+static const hb_tag_t hangul_features[HANGUL_FEATURE_COUNT] =
 {
+  HB_TAG_NONE,
   HB_TAG('l','j','m','o'),
   HB_TAG('v','j','m','o'),
-  HB_TAG('t','j','m','o'),
-  HB_TAG_NONE
+  HB_TAG('t','j','m','o')
 };
 
 static void
 collect_features_hangul (hb_ot_shape_planner_t *plan)
 {
-  for (const hb_tag_t *script_features = hangul_features; script_features && *script_features; script_features++)
-    plan->map.add_global_bool_feature (*script_features);
+  hb_ot_map_builder_t *map = &plan->map;
+
+  for (unsigned int i = FIRST_HANGUL_FEATURE; i < HANGUL_FEATURE_COUNT; i++)
+    map->add_feature (hangul_features[i], 1, F_NONE);
 }
 
 #define LBase 0x1100
@@ -62,11 +76,16 @@ collect_features_hangul (hb_ot_shape_planner_t *plan)
 
 #define isT(u) (hb_in_ranges<hb_codepoint_t> ((u),  0x11A8, 0x11FF, 0xD7CB, 0xD7FB))
 
+/* buffer var allocations */
+#define hangul_shaping_feature() complex_var_u8_0() /* hangul jamo shaping feature */
+
 static void
 preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
 			hb_buffer_t              *buffer,
 			hb_font_t                *font)
 {
+  HB_BUFFER_ALLOCATE_VAR (buffer, hangul_shaping_feature);
+
   /* Hangul syllables come in two shapes: LV, and LVT.  Of those:
    *
    *   - LV can be precomposed, or decomposed.  Lets call those
@@ -134,11 +153,13 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
 	  {
 	    len = 3;
 	    tindex = t - TBase;
+            buffer->cur(+2).hangul_shaping_feature() = TJMO;
 	  }
 	  else if (isT (t))
 	  {
 	    /* Old T jamo.  Doesn't combine.  Don't combine *anything*. */
-	   len = 0;
+	    len = 0;
+            buffer->cur(+2).hangul_shaping_feature() = TJMO;
 	  }
 	}
 
@@ -154,6 +175,8 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
 	  }
 	}
       }
+      buffer->cur().hangul_shaping_feature() = LJMO;
+      buffer->cur(+1).hangul_shaping_feature() = VJMO;
     }
 
     else if (isCombinedS(u))
@@ -200,6 +223,14 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
 	  buffer->replace_glyphs (1, tindex ? 3 : 2, decomposed);
 	  if (unlikely (buffer->in_error))
 	    return;
+          if (tindex) {
+            buffer->cur(-3).hangul_shaping_feature() = LJMO;
+            buffer->cur(-2).hangul_shaping_feature() = VJMO;
+            buffer->cur(-1).hangul_shaping_feature() = TJMO;
+          } else {
+            buffer->cur(-2).hangul_shaping_feature() = LJMO;
+            buffer->cur(-1).hangul_shaping_feature() = VJMO;
+          }
 	  continue;
 	}
       }
@@ -210,6 +241,26 @@ preprocess_text_hangul (const hb_ot_shape_plan_t *plan,
   buffer->swap_buffers ();
 }
 
+static void
+setup_masks_hangul (const hb_ot_shape_plan_t *plan,
+		    hb_buffer_t              *buffer,
+		    hb_font_t                *font HB_UNUSED)
+{
+  // TODO: cache the mask_array in plan data instead of looking them up here
+  static uint32_t mask_array[HANGUL_FEATURE_COUNT] = { 0 };
+  for (unsigned int i = 0; i < HANGUL_FEATURE_COUNT; i++) {
+    mask_array[i] = plan->map.get_1_mask (hangul_features[i]);
+  }
+
+  unsigned int count = buffer->len;
+  for (unsigned int i = 0; i < count; i++) {
+    buffer->info[i].mask |= mask_array[buffer->info[i].hangul_shaping_feature()];
+  }
+
+  HB_BUFFER_DEALLOCATE_VAR (buffer, hangul_shaping_feature);
+}
+
+
 const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
 {
   "hangul",
@@ -221,7 +272,7 @@ const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hangul =
   HB_OT_SHAPE_NORMALIZATION_MODE_NONE,
   NULL, /* decompose */
   NULL, /* compose */
-  NULL, /* setup_masks */
+  setup_masks_hangul, /* setup_masks */
   HB_OT_SHAPE_ZERO_WIDTH_MARKS_DEFAULT,
   false, /* fallback_position */
 };
_______________________________________________
HarfBuzz mailing list
[email protected]
http://lists.freedesktop.org/mailman/listinfo/harfbuzz

Reply via email to