Makes two changes to the set of characters considered nonspacing: - Makes `Prepended_Concatenation_Mark`s no longer nonspacing. This matches the Unicode spec (which specifies these as taking up space in front of the characters they modify), and also aligns with glibc `wcwidth()`. - Makes `Default_Ignorable_Code_Point`s other than U+115F HANGUL CHOSEONG FILLER nonspacing. Unicode specifies (https://www.unicode.org/faq/unsup_char.html#3) that these "should be rendered as completely invisible (and non advancing, i.e. “zero width”), if not explicitly supported in rendering." U+115F is exempted because it is expected to be combined with other jamo to form a width-2 Hangul syllable block.
Signed-off-by: Jules Bertholet <[email protected]> --- lib/gen-uni-tables.c | 18 +++++++++++++++--- lib/uniwidth/width0.h | 18 +++++++++--------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/lib/gen-uni-tables.c b/lib/gen-uni-tables.c index b948489fbf..7c0de35be6 100644 --- a/lib/gen-uni-tables.c +++ b/lib/gen-uni-tables.c @@ -3105,6 +3105,13 @@ is_property_other_default_ignorable_code_point (unsigned int ch) return ((unicode_properties[ch] & (1ULL << PROP_OTHER_DEFAULT_IGNORABLE_CODE_POINT)) != 0); } +/* See PropList.txt, UCD.html. */ +static bool +is_property_prepended_concatenation_mark (unsigned int ch) +{ + return ((unicode_properties[ch] & (1ULL << PROP_PREPENDED_CONCATENATION_MARK)) != 0); +} + /* See PropList.txt, UCD.html. */ static bool is_property_deprecated (unsigned int ch) @@ -6661,10 +6668,13 @@ fill_width (const char *width_filename) /* The non-spacing attribute table consists of: * Non-spacing characters; generated from PropList.txt or "grep '^[^;]*;[^;]*;[^;]*;[^;]*;NSM;' UnicodeData.txt" - * Format control characters; generated from - "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt" + * Format control characters except for `Prepended_Concatenation_Mark`s; + generated from "grep '^[^;]*;[^;]*;Cf;' UnicodeData.txt" and from + PropList.txt * Zero width characters; generated from "grep '^[^;]*;ZERO WIDTH ' UnicodeData.txt" + * `Default_Ignorable_Code_Point`s other than U+115F HANGUL CHOSEONG FILLER; + generated from DerivedCoreProperties.txt * Hangul Jamo characters that have conjoining behaviour: - jungseong = syllable-middle vowels - jongseong = syllable-final consonants @@ -6687,8 +6697,10 @@ is_nonspacing (unsigned int ch) { return (unicode_attributes[ch].name != NULL && (get_bidi_category (ch) == UC_BIDI_NSM - || is_category_Cc (ch) || is_category_Cf (ch) + || is_category_Cc (ch) + || (is_category_Cf (ch) && !is_property_prepended_concatenation_mark (ch)) || strncmp (unicode_attributes[ch].name, "ZERO WIDTH ", 11) == 0 + || (is_property_default_ignorable_code_point (ch) && ch != 0x115F) || (ch >= 0x1160 && ch <= 0x11A7) || (ch >= 0xD7B0 && ch <= 0xD7C6) /* jungseong */ || (ch >= 0x11A8 && ch <= 0x11FF) || (ch >= 0xD7CB && ch <= 0xD7FB) /* jongseong */ ) ); diff --git a/lib/uniwidth/width0.h b/lib/uniwidth/width0.h index 77954eb4d8..041c3c12b7 100644 --- a/lib/uniwidth/width0.h +++ b/lib/uniwidth/width0.h @@ -46,19 +46,19 @@ static const unsigned char nonspacing_table_data[48*64] = { 0x00, 0x00, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xbf, /* 0x0580-0x05bf */ 0xb6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x05c0-0x05ff */ /* 0x0600-0x07ff */ - 0x3f, 0x00, 0xff, 0x17, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */ + 0x00, 0x00, 0xff, 0x17, 0x00, 0x00, 0x00, 0x00, /* 0x0600-0x063f */ 0x00, 0xf8, 0xff, 0xff, 0x00, 0x00, 0x01, 0x00, /* 0x0640-0x067f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0680-0x06bf */ - 0x00, 0x00, 0xc0, 0xbf, 0x9f, 0x3d, 0x00, 0x00, /* 0x06c0-0x06ff */ - 0x00, 0x80, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x0700-0x073f */ + 0x00, 0x00, 0xc0, 0x9f, 0x9f, 0x3d, 0x00, 0x00, /* 0x06c0-0x06ff */ + 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xff, 0xff, /* 0x0700-0x073f */ 0xff, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x0740-0x077f */ 0x00, 0x00, 0x00, 0x00, 0xc0, 0xff, 0x01, 0x00, /* 0x0780-0x07bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x0f, 0x20, /* 0x07c0-0x07ff */ /* 0x0800-0x09ff */ 0x00, 0x00, 0xc0, 0xfb, 0xef, 0x3e, 0x00, 0x00, /* 0x0800-0x083f */ 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x00, /* 0x0840-0x087f */ - 0x00, 0x00, 0x03, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */ - 0x00, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */ + 0x00, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, /* 0x0880-0x08bf */ + 0x00, 0xfc, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, /* 0x08c0-0x08ff */ 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, /* 0x0900-0x093f */ 0xfe, 0x21, 0xfe, 0x00, 0x0c, 0x00, 0x00, 0x00, /* 0x0940-0x097f */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, /* 0x0980-0x09bf */ @@ -168,7 +168,7 @@ static const unsigned char nonspacing_table_data[48*64] = { 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, /* 0x3080-0x30bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30c0-0x30ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3100-0x313f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3140-0x317f */ + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, /* 0x3140-0x317f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x3180-0x31bf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x31c0-0x31ff */ /* 0xa600-0xa7ff */ @@ -223,7 +223,7 @@ static const unsigned char nonspacing_table_data[48*64] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, /* 0xfec0-0xfeff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xff00-0xff3f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xff40-0xff7f */ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xff80-0xffbf */ + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, /* 0xff80-0xffbf */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, /* 0xffc0-0xffff */ /* 0x10000-0x101ff */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10000-0x1003f */ @@ -273,8 +273,8 @@ static const unsigned char nonspacing_table_data[48*64] = { /* 0x11000-0x111ff */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, /* 0x11000-0x1103f */ 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x80, /* 0x11040-0x1107f */ - 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x26, /* 0x11080-0x110bf */ - 0x04, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */ + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x06, /* 0x11080-0x110bf */ + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x110c0-0x110ff */ 0x07, 0x00, 0x00, 0x00, 0x80, 0xef, 0x1f, 0x00, /* 0x11100-0x1113f */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, /* 0x11140-0x1117f */ 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x7f, /* 0x11180-0x111bf */ -- 2.43.0
