Makes two changes to the set of characters considered nonspacing:

- Makes `Prepended_Concatenation_Mark`s no longer nonspacing.
  This matches the Unicode spec (which specifies these as taking up space
  in front of the characters they modify), and also aligns with
  glibc `wcwidth()`.
- Makes `Default_Ignorable_Code_Point`s other than U+115F HANGUL CHOSEONG FILLER
  nonspacing. Unicode specifies (https://www.unicode.org/faq/unsup_char.html#3)
  that these "should be rendered as completely invisible (and non advancing, 
i.e.
  “zero width”), if not explicitly supported in rendering." U+115F is exempted
  because it is expected to be combined with other jamo to form a width-2 Hangul
  syllable block.

Signed-off-by: Jules Bertholet <[email protected]>
---
 lib/gen-uni-tables.c  | 18 +++++++++++++++---
 lib/uniwidth/width0.h | 18 +++++++++---------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c
index b948489fbf..7c0de35be6 100644
--- a/lib/gen-uni-tables.c
+++ b/lib/gen-uni-tables.c
@@ -3105,6 +3105,13 @@ is_property_other_default_ignorable_code_point (unsigned 
int ch)
   return ((unicode_properties[ch] & (1ULL << 
PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0);
 }
 
+/* See PropList.txt, UCD.html.  */
+static bool
+is_property_prepended_concatenation_mark (unsigned int ch)
+{
+  return ((unicode_properties[ch] & (1ULL << 
PROP_PREPENDED_CONCATENATION_MARK)) != 0);
+}
+
 /* See PropList.txt, UCD.html.  */
 static bool
 is_property_deprecated (unsigned int ch)
@@ -6661,10 +6668,13 @@ fill_width (const char *width_filename)
 /* The non-spacing attribute table consists of:
    * Non-spacing characters; generated from PropList.txt or
      "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt"
-   * Format control characters; generated from
-     "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt"
+   * Format control characters except for `Prepended_Concatenation_Mark`s;
+     generated from "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt" and from
+     PropList.txt
    * Zero width characters; generated from
      "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt"
+   * `Default_Ignorable_Code_Point`s other than U+115F HANGUL CHOSEONG FILLER;
+     generated from DerivedCoreProperties.txt
    * Hangul Jamo characters that have conjoining behaviour:
        - jungseong = syllable-middle vowels
        - jongseong = syllable-final consonants
@@ -6687,8 +6697,10 @@ is_nonspacing (unsigned int ch)
 {
   return (unicode_attributes[ch].name != NULL
           && (get_bidi_category (ch) == UC_BIDI_NSM
-              || is_category_Cc (ch) || is_category_Cf (ch)
+              || is_category_Cc (ch)
+              || (is_category_Cf (ch) && 
!is_property_prepended_concatenation_mark (ch))
               || strncmp (unicode_attributes[ch].name, "ZERO WIDTH ", 11) == 0
+              || (is_property_default_ignorable_code_point (ch) && ch != 
0x115F)
               || (ch >= 0x1160 && ch <= 0x11A7) || (ch >= 0xD7B0 && ch <= 
0xD7C6) /* jungseong */
               || (ch >= 0x11A8 && ch <= 0x11FF) || (ch >= 0xD7CB && ch <= 
0xD7FB) /* jongseong */
          )   );
diff --git a/lib/uniwidth/width0.h b/lib/uniwidth/width0.h
index 77954eb4d8..041c3c12b7 100644
--- a/lib/uniwidth/width0.h
+++ b/lib/uniwidth/width0.h
@@ -46,19 +46,19 @@ static const unsigned char nonspacing_table_data[48*64] = {
   0x00, 0x00, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xbf, /* 0x0580-0x05bf */
   0xb6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x05c0-0x05ff */
   /* 0x0600-0x07ff */
-  0x3f, 0x00, 0xff, 0x17, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */
+  0x00, 0x00, 0xff, 0x17, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */
   0x00, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x01, 0x00, /* 0x0640-0x067f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0680-0x06bf */
-  0x00, 0x00, 0xc0, 0xbf, 0x9f, 0x3d, 0x00, 0x00, /* 0x06c0-0x06ff */
-  0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x0700-0x073f */
+  0x00, 0x00, 0xc0, 0x9f, 0x9f, 0x3d, 0x00, 0x00, /* 0x06c0-0x06ff */
+  0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x0700-0x073f */
   0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0740-0x077f */
   0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x01, 0x00, /* 0x0780-0x07bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x0f, 0x20, /* 0x07c0-0x07ff */
   /* 0x0800-0x09ff */
   0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */
   0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */
-  0x00, 0x00, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */
-  0x00, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
+  0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */
+  0x00, 0xfc, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */
   0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */
   0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */
   0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */
@@ -168,7 +168,7 @@ static const unsigned char nonspacing_table_data[48*64] = {
   0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, /* 0x3080-0x30bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30c0-0x30ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3100-0x313f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3140-0x317f */
+  0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, /* 0x3140-0x317f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3180-0x31bf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x31c0-0x31ff */
   /* 0xa600-0xa7ff */
@@ -223,7 +223,7 @@ static const unsigned char nonspacing_table_data[48*64] = {
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0xfec0-0xfeff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xff00-0xff3f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xff40-0xff7f */
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xff80-0xffbf */
+  0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* 0xff80-0xffbf */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, /* 0xffc0-0xffff */
   /* 0x10000-0x101ff */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10000-0x1003f */
@@ -273,8 +273,8 @@ static const unsigned char nonspacing_table_data[48*64] = {
   /* 0x11000-0x111ff */
   0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0x11000-0x1103f */
   0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x80, /* 0x11040-0x1107f */
-  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x26, /* 0x11080-0x110bf */
-  0x04, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */
+  0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x06, /* 0x11080-0x110bf */
+  0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */
   0x07, 0x00, 0x00, 0x00, 0x80, 0xef, 0x1f, 0x00, /* 0x11100-0x1113f */
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, /* 0x11140-0x1117f */
   0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */
-- 
2.43.0


Reply via email to