On Tue, Jun 27, 2023 at 12:49:53AM +0200, Steinar H. Gunderson wrote:
> The attached patch seems to get us halfway there; screen now combines all of
> them correctly into one cluster. However, it's still split for whatever
> reason; only if I redraw (C-a l) the flag shows up, and the text breaking is
> wrong, so I assume there's at least something wrong with the width
> calculation, and possibly lots of other things I've not thought of. :-)

Version 2 of the patch; this seems to actually work. Only lightly tested,
though, and there are enough subtleties in the code that it probably needs
review from someone who knows screen from before.

/* Steinar */
-- 
Homepage: https://www.sesse.net/
diff -ur /home/sesse/nmu/orig/screen-4.9.0/ansi.c ../ansi.c
--- /home/sesse/nmu/orig/screen-4.9.0/ansi.c	2023-06-27 22:53:44.000000000 +0200
+++ ../ansi.c	2023-06-27 23:00:26.667216364 +0200
@@ -694,9 +694,9 @@
 		    }
 		  curr->w_rend.font = 0;
 		}
-	      if (curr->w_encoding == UTF8 && c >= 0x0300 && utf8_iscomb(c))
+	      if (curr->w_encoding == UTF8 && c >= 0x0300)
 		{
-		  int ox, oy;
+		  int ox, oy, c_prev;
 		  struct mchar omc;
 
 		  ox = curr->w_x - 1;
@@ -718,15 +718,35 @@
 			  omc.mbcs = 0xff;
 			}
 		    }
-		  if (ox >= 0)
-		    {
-		      utf8_handle_comb(c, &omc);
-		      MFixLine(curr, oy, &omc);
-		      copy_mchar2mline(&omc, &curr->w_mlines[oy], ox);
-		      LPutChar(&curr->w_layer, &omc, ox, oy);
-		      LGotoPos(&curr->w_layer, curr->w_x, curr->w_y);
-		    }
-		  break;
+                  c_prev = omc.image | (omc.font << 8) | omc.fontx << 16;
+                  if (!grapheme_cluster_break(c_prev, c))
+                    {
+		      if (ox >= 0)
+		        {
+		          utf8_handle_comb(c, &omc);
+		          MFixLine(curr, oy, &omc);
+		          copy_mchar2mline(&omc, &curr->w_mlines[oy], ox);
+                          if (!utf8_isdouble(c_prev) &&
+                              utf8_isdouble(omc.image | (omc.font << 8) | omc.fontx << 16))
+                            {
+                              /* A combining character switched us from single-width to double-width.
+                                 Do what the w_mbcs = 0xff path would have done below if the character
+                                 was double-width to begin with. */
+                              omc.mbcs = 0xff;
+                              if (ox < cols - 1)
+                                {
+		                  curr->w_mlines[oy].image[ox + 1] = 0xff;
+		                  curr->w_mlines[oy].font[ox + 1] = 0xff;
+		                  curr->w_mlines[oy].fontx[ox + 1] = 0;
+		                  curr->w_x++;
+                                }
+                           }
+		          LPutChar(&curr->w_layer, &omc, ox, oy);
+		          LGotoPos(&curr->w_layer, curr->w_x, curr->w_y);
+		        }
+		      break;
+                    }
+                  /* fall through */
 		}
 #  ifdef DW_CHARS
 		if (curr->w_encoding == UTF8 && utf8_isdouble(c))
diff -ur /home/sesse/nmu/orig/screen-4.9.0/encoding.c ../encoding.c
--- /home/sesse/nmu/orig/screen-4.9.0/encoding.c	2023-06-27 22:53:44.000000000 +0200
+++ ../encoding.c	2023-06-27 22:57:29.386767268 +0200
@@ -1215,6 +1233,398 @@
   return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1);
 }
 
+static bool
+is_grapheme_extend(c)
+int c;
+{
+  /* https://unicode.org/Public/15.0.0/ucd/DerivedCoreProperties.txt */
+  static const struct interval grapheme_extend[] = {
+    { 0x0300, 0x036F }, { 0x0483, 0x0487 }, { 0x0488, 0x0489 }, 
+    { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, 
+    { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, 
+    { 0x064B, 0x065F }, { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, 
+    { 0x06DF, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, 
+    { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, 
+    { 0x07EB, 0x07F3 }, { 0x07FD, 0x07FD }, { 0x0816, 0x0819 }, 
+    { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, { 0x0829, 0x082D }, 
+    { 0x0859, 0x085B }, { 0x0898, 0x089F }, { 0x08CA, 0x08E1 }, 
+    { 0x08E3, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C }, 
+    { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 }, 
+    { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, 
+    { 0x09BE, 0x09BE }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, 
+    { 0x09D7, 0x09D7 }, { 0x09E2, 0x09E3 }, { 0x09FE, 0x09FE }, 
+    { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, 
+    { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, 
+    { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, 
+    { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, 
+    { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0AFA, 0x0AFF }, 
+    { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, { 0x0B3E, 0x0B3E }, 
+    { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, { 0x0B4D, 0x0B4D }, 
+    { 0x0B55, 0x0B56 }, { 0x0B57, 0x0B57 }, { 0x0B62, 0x0B63 }, 
+    { 0x0B82, 0x0B82 }, { 0x0BBE, 0x0BBE }, { 0x0BC0, 0x0BC0 }, 
+    { 0x0BCD, 0x0BCD }, { 0x0BD7, 0x0BD7 }, { 0x0C00, 0x0C00 }, 
+    { 0x0C04, 0x0C04 }, { 0x0C3C, 0x0C3C }, { 0x0C3E, 0x0C40 }, 
+    { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, 
+    { 0x0C62, 0x0C63 }, { 0x0C81, 0x0C81 }, { 0x0CBC, 0x0CBC }, 
+    { 0x0CBF, 0x0CBF }, { 0x0CC2, 0x0CC2 }, { 0x0CC6, 0x0CC6 }, 
+    { 0x0CCC, 0x0CCD }, { 0x0CD5, 0x0CD6 }, { 0x0CE2, 0x0CE3 }, 
+    { 0x0D00, 0x0D01 }, { 0x0D3B, 0x0D3C }, { 0x0D3E, 0x0D3E }, 
+    { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, { 0x0D57, 0x0D57 }, 
+    { 0x0D62, 0x0D63 }, { 0x0D81, 0x0D81 }, { 0x0DCA, 0x0DCA }, 
+    { 0x0DCF, 0x0DCF }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, 
+    { 0x0DDF, 0x0DDF }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, 
+    { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EBC }, 
+    { 0x0EC8, 0x0ECE }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, 
+    { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, 
+    { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F8D, 0x0F97 }, 
+    { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, 
+    { 0x1032, 0x1037 }, { 0x1039, 0x103A }, { 0x103D, 0x103E }, 
+    { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, { 0x1071, 0x1074 }, 
+    { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, { 0x108D, 0x108D }, 
+    { 0x109D, 0x109D }, { 0x135D, 0x135F }, { 0x1712, 0x1714 }, 
+    { 0x1732, 0x1733 }, { 0x1752, 0x1753 }, { 0x1772, 0x1773 }, 
+    { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, 
+    { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, { 0x180B, 0x180D }, 
+    { 0x180F, 0x180F }, { 0x1885, 0x1886 }, { 0x18A9, 0x18A9 }, 
+    { 0x1920, 0x1922 }, { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, 
+    { 0x1939, 0x193B }, { 0x1A17, 0x1A18 }, { 0x1A1B, 0x1A1B }, 
+    { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, { 0x1A60, 0x1A60 }, 
+    { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, { 0x1A73, 0x1A7C }, 
+    { 0x1A7F, 0x1A7F }, { 0x1AB0, 0x1ABD }, { 0x1ABE, 0x1ABE }, 
+    { 0x1ABF, 0x1ACE }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 }, 
+    { 0x1B35, 0x1B35 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, 
+    { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, 
+    { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAD }, 
+    { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED }, 
+    { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 }, 
+    { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 }, 
+    { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1CF8, 0x1CF9 }, 
+    { 0x1DC0, 0x1DFF }, { 0x200C, 0x200C }, { 0x20D0, 0x20DC }, 
+    { 0x20DD, 0x20E0 }, { 0x20E1, 0x20E1 }, { 0x20E2, 0x20E4 }, 
+    { 0x20E5, 0x20F0 }, { 0x2CEF, 0x2CF1 }, { 0x2D7F, 0x2D7F }, 
+    { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, { 0x302E, 0x302F }, 
+    { 0x3099, 0x309A }, { 0xA66F, 0xA66F }, { 0xA670, 0xA672 }, 
+    { 0xA674, 0xA67D }, { 0xA69E, 0xA69F }, { 0xA6F0, 0xA6F1 }, 
+    { 0xA802, 0xA802 }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, 
+    { 0xA825, 0xA826 }, { 0xA82C, 0xA82C }, { 0xA8C4, 0xA8C5 }, 
+    { 0xA8E0, 0xA8F1 }, { 0xA8FF, 0xA8FF }, { 0xA926, 0xA92D }, 
+    { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, 
+    { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BD }, { 0xA9E5, 0xA9E5 }, 
+    { 0xAA29, 0xAA2E }, { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, 
+    { 0xAA43, 0xAA43 }, { 0xAA4C, 0xAA4C }, { 0xAA7C, 0xAA7C }, 
+    { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 }, { 0xAAB7, 0xAAB8 }, 
+    { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 }, { 0xAAEC, 0xAAED }, 
+    { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 }, { 0xABE8, 0xABE8 }, 
+    { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F }, 
+    { 0xFE20, 0xFE2F }, { 0xFF9E, 0xFF9F }, { 0x101FD, 0x101FD }, 
+    { 0x102E0, 0x102E0 }, { 0x10376, 0x1037A }, { 0x10A01, 0x10A03 }, 
+    { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, { 0x10A38, 0x10A3A }, 
+    { 0x10A3F, 0x10A3F }, { 0x10AE5, 0x10AE6 }, { 0x10D24, 0x10D27 }, 
+    { 0x10EAB, 0x10EAC }, { 0x10EFD, 0x10EFF }, { 0x10F46, 0x10F50 }, 
+    { 0x10F82, 0x10F85 }, { 0x11001, 0x11001 }, { 0x11038, 0x11046 }, 
+    { 0x11070, 0x11070 }, { 0x11073, 0x11074 }, { 0x1107F, 0x11081 }, 
+    { 0x110B3, 0x110B6 }, { 0x110B9, 0x110BA }, { 0x110C2, 0x110C2 }, 
+    { 0x11100, 0x11102 }, { 0x11127, 0x1112B }, { 0x1112D, 0x11134 }, 
+    { 0x11173, 0x11173 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE }, 
+    { 0x111C9, 0x111CC }, { 0x111CF, 0x111CF }, { 0x1122F, 0x11231 }, 
+    { 0x11234, 0x11234 }, { 0x11236, 0x11237 }, { 0x1123E, 0x1123E }, 
+    { 0x11241, 0x11241 }, { 0x112DF, 0x112DF }, { 0x112E3, 0x112EA }, 
+    { 0x11300, 0x11301 }, { 0x1133B, 0x1133C }, { 0x1133E, 0x1133E }, 
+    { 0x11340, 0x11340 }, { 0x11357, 0x11357 }, { 0x11366, 0x1136C }, 
+    { 0x11370, 0x11374 }, { 0x11438, 0x1143F }, { 0x11442, 0x11444 }, 
+    { 0x11446, 0x11446 }, { 0x1145E, 0x1145E }, { 0x114B0, 0x114B0 }, 
+    { 0x114B3, 0x114B8 }, { 0x114BA, 0x114BA }, { 0x114BD, 0x114BD }, 
+    { 0x114BF, 0x114C0 }, { 0x114C2, 0x114C3 }, { 0x115AF, 0x115AF }, 
+    { 0x115B2, 0x115B5 }, { 0x115BC, 0x115BD }, { 0x115BF, 0x115C0 }, 
+    { 0x115DC, 0x115DD }, { 0x11633, 0x1163A }, { 0x1163D, 0x1163D }, 
+    { 0x1163F, 0x11640 }, { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, 
+    { 0x116B0, 0x116B5 }, { 0x116B7, 0x116B7 }, { 0x1171D, 0x1171F }, 
+    { 0x11722, 0x11725 }, { 0x11727, 0x1172B }, { 0x1182F, 0x11837 }, 
+    { 0x11839, 0x1183A }, { 0x11930, 0x11930 }, { 0x1193B, 0x1193C }, 
+    { 0x1193E, 0x1193E }, { 0x11943, 0x11943 }, { 0x119D4, 0x119D7 }, 
+    { 0x119DA, 0x119DB }, { 0x119E0, 0x119E0 }, { 0x11A01, 0x11A0A }, 
+    { 0x11A33, 0x11A38 }, { 0x11A3B, 0x11A3E }, { 0x11A47, 0x11A47 }, 
+    { 0x11A51, 0x11A56 }, { 0x11A59, 0x11A5B }, { 0x11A8A, 0x11A96 }, 
+    { 0x11A98, 0x11A99 }, { 0x11C30, 0x11C36 }, { 0x11C38, 0x11C3D }, 
+    { 0x11C3F, 0x11C3F }, { 0x11C92, 0x11CA7 }, { 0x11CAA, 0x11CB0 }, 
+    { 0x11CB2, 0x11CB3 }, { 0x11CB5, 0x11CB6 }, { 0x11D31, 0x11D36 }, 
+    { 0x11D3A, 0x11D3A }, { 0x11D3C, 0x11D3D }, { 0x11D3F, 0x11D45 }, 
+    { 0x11D47, 0x11D47 }, { 0x11D90, 0x11D91 }, { 0x11D95, 0x11D95 }, 
+    { 0x11D97, 0x11D97 }, { 0x11EF3, 0x11EF4 }, { 0x11F00, 0x11F01 }, 
+    { 0x11F36, 0x11F3A }, { 0x11F40, 0x11F40 }, { 0x11F42, 0x11F42 }, 
+    { 0x13440, 0x13440 }, { 0x13447, 0x13455 }, { 0x16AF0, 0x16AF4 }, 
+    { 0x16B30, 0x16B36 }, { 0x16F4F, 0x16F4F }, { 0x16F8F, 0x16F92 }, 
+    { 0x16FE4, 0x16FE4 }, { 0x1BC9D, 0x1BC9E }, { 0x1CF00, 0x1CF2D }, 
+    { 0x1CF30, 0x1CF46 }, { 0x1D165, 0x1D165 }, { 0x1D167, 0x1D169 }, 
+    { 0x1D16E, 0x1D172 }, { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, 
+    { 0x1D1AA, 0x1D1AD }, { 0x1D242, 0x1D244 }, { 0x1DA00, 0x1DA36 }, 
+    { 0x1DA3B, 0x1DA6C }, { 0x1DA75, 0x1DA75 }, { 0x1DA84, 0x1DA84 }, 
+    { 0x1DA9B, 0x1DA9F }, { 0x1DAA1, 0x1DAAF }, { 0x1E000, 0x1E006 }, 
+    { 0x1E008, 0x1E018 }, { 0x1E01B, 0x1E021 }, { 0x1E023, 0x1E024 }, 
+    { 0x1E026, 0x1E02A }, { 0x1E08F, 0x1E08F }, { 0x1E130, 0x1E136 }, 
+    { 0x1E2AE, 0x1E2AE }, { 0x1E2EC, 0x1E2EF }, { 0x1E4EC, 0x1E4EF }, 
+    { 0x1E8D0, 0x1E8D6 }, { 0x1E944, 0x1E94A }, { 0xE0020, 0xE007F }, 
+    { 0xE0100, 0xE01EF }
+  };
+
+  return bisearch(c, grapheme_extend, sizeof(grapheme_extend) / sizeof(struct interval) - 1);
+}
+
+static bool
+is_emoji_modifier(c)
+int c;
+{
+  /* https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt */
+  return c >= 0x1F3FB && c <= 0x1F3FF;
+}
+
+/* for grapheme_cluster_break() below */
+static bool
+is_extend(c)
+int c;
+{
+  return is_grapheme_extend(c) || is_emoji_modifier(c);
+}
+
+static bool
+is_extended_pictographic(c)
+int c;
+{
+  /* https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt */
+  static const struct interval extended_pictographic[] = {
+    { 0x00A9, 0x00A9 }, { 0x00AE, 0x00AE }, { 0x203C, 0x203C }, 
+    { 0x2049, 0x2049 }, { 0x2122, 0x2122 }, { 0x2139, 0x2139 }, 
+    { 0x2194, 0x2199 }, { 0x21A9, 0x21AA }, { 0x231A, 0x231B }, 
+    { 0x2328, 0x2328 }, { 0x2388, 0x2388 }, { 0x23CF, 0x23CF }, 
+    { 0x23E9, 0x23EC }, { 0x23ED, 0x23EE }, { 0x23EF, 0x23EF }, 
+    { 0x23F0, 0x23F0 }, { 0x23F1, 0x23F2 }, { 0x23F3, 0x23F3 }, 
+    { 0x23F8, 0x23FA }, { 0x24C2, 0x24C2 }, { 0x25AA, 0x25AB }, 
+    { 0x25B6, 0x25B6 }, { 0x25C0, 0x25C0 }, { 0x25FB, 0x25FE }, 
+    { 0x2600, 0x2601 }, { 0x2602, 0x2603 }, { 0x2604, 0x2604 }, 
+    { 0x2605, 0x2605 }, { 0x2607, 0x260D }, { 0x260E, 0x260E }, 
+    { 0x260F, 0x2610 }, { 0x2611, 0x2611 }, { 0x2612, 0x2612 }, 
+    { 0x2614, 0x2615 }, { 0x2616, 0x2617 }, { 0x2618, 0x2618 }, 
+    { 0x2619, 0x261C }, { 0x261D, 0x261D }, { 0x261E, 0x261F }, 
+    { 0x2620, 0x2620 }, { 0x2621, 0x2621 }, { 0x2622, 0x2623 }, 
+    { 0x2624, 0x2625 }, { 0x2626, 0x2626 }, { 0x2627, 0x2629 }, 
+    { 0x262A, 0x262A }, { 0x262B, 0x262D }, { 0x262E, 0x262E }, 
+    { 0x262F, 0x262F }, { 0x2630, 0x2637 }, { 0x2638, 0x2639 }, 
+    { 0x263A, 0x263A }, { 0x263B, 0x263F }, { 0x2640, 0x2640 }, 
+    { 0x2641, 0x2641 }, { 0x2642, 0x2642 }, { 0x2643, 0x2647 }, 
+    { 0x2648, 0x2653 }, { 0x2654, 0x265E }, { 0x265F, 0x265F }, 
+    { 0x2660, 0x2660 }, { 0x2661, 0x2662 }, { 0x2663, 0x2663 }, 
+    { 0x2664, 0x2664 }, { 0x2665, 0x2666 }, { 0x2667, 0x2667 }, 
+    { 0x2668, 0x2668 }, { 0x2669, 0x267A }, { 0x267B, 0x267B }, 
+    { 0x267C, 0x267D }, { 0x267E, 0x267E }, { 0x267F, 0x267F }, 
+    { 0x2680, 0x2685 }, { 0x2690, 0x2691 }, { 0x2692, 0x2692 }, 
+    { 0x2693, 0x2693 }, { 0x2694, 0x2694 }, { 0x2695, 0x2695 }, 
+    { 0x2696, 0x2697 }, { 0x2698, 0x2698 }, { 0x2699, 0x2699 }, 
+    { 0x269A, 0x269A }, { 0x269B, 0x269C }, { 0x269D, 0x269F }, 
+    { 0x26A0, 0x26A1 }, { 0x26A2, 0x26A6 }, { 0x26A7, 0x26A7 }, 
+    { 0x26A8, 0x26A9 }, { 0x26AA, 0x26AB }, { 0x26AC, 0x26AF }, 
+    { 0x26B0, 0x26B1 }, { 0x26B2, 0x26BC }, { 0x26BD, 0x26BE }, 
+    { 0x26BF, 0x26C3 }, { 0x26C4, 0x26C5 }, { 0x26C6, 0x26C7 }, 
+    { 0x26C8, 0x26C8 }, { 0x26C9, 0x26CD }, { 0x26CE, 0x26CE }, 
+    { 0x26CF, 0x26CF }, { 0x26D0, 0x26D0 }, { 0x26D1, 0x26D1 }, 
+    { 0x26D2, 0x26D2 }, { 0x26D3, 0x26D3 }, { 0x26D4, 0x26D4 }, 
+    { 0x26D5, 0x26E8 }, { 0x26E9, 0x26E9 }, { 0x26EA, 0x26EA }, 
+    { 0x26EB, 0x26EF }, { 0x26F0, 0x26F1 }, { 0x26F2, 0x26F3 }, 
+    { 0x26F4, 0x26F4 }, { 0x26F5, 0x26F5 }, { 0x26F6, 0x26F6 }, 
+    { 0x26F7, 0x26F9 }, { 0x26FA, 0x26FA }, { 0x26FB, 0x26FC }, 
+    { 0x26FD, 0x26FD }, { 0x26FE, 0x2701 }, { 0x2702, 0x2702 }, 
+    { 0x2703, 0x2704 }, { 0x2705, 0x2705 }, { 0x2708, 0x270C }, 
+    { 0x270D, 0x270D }, { 0x270E, 0x270E }, { 0x270F, 0x270F }, 
+    { 0x2710, 0x2711 }, { 0x2712, 0x2712 }, { 0x2714, 0x2714 }, 
+    { 0x2716, 0x2716 }, { 0x271D, 0x271D }, { 0x2721, 0x2721 }, 
+    { 0x2728, 0x2728 }, { 0x2733, 0x2734 }, { 0x2744, 0x2744 }, 
+    { 0x2747, 0x2747 }, { 0x274C, 0x274C }, { 0x274E, 0x274E }, 
+    { 0x2753, 0x2755 }, { 0x2757, 0x2757 }, { 0x2763, 0x2763 }, 
+    { 0x2764, 0x2764 }, { 0x2765, 0x2767 }, { 0x2795, 0x2797 }, 
+    { 0x27A1, 0x27A1 }, { 0x27B0, 0x27B0 }, { 0x27BF, 0x27BF }, 
+    { 0x2934, 0x2935 }, { 0x2B05, 0x2B07 }, { 0x2B1B, 0x2B1C }, 
+    { 0x2B50, 0x2B50 }, { 0x2B55, 0x2B55 }, { 0x3030, 0x3030 }, 
+    { 0x303D, 0x303D }, { 0x3297, 0x3297 }, { 0x3299, 0x3299 }, 
+    { 0x1F000, 0x1F003 }, { 0x1F004, 0x1F004 }, { 0x1F005, 0x1F0CE }, 
+    { 0x1F0CF, 0x1F0CF }, { 0x1F0D0, 0x1F0FF }, { 0x1F10D, 0x1F10F }, 
+    { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F }, { 0x1F170, 0x1F171 }, 
+    { 0x1F17E, 0x1F17F }, { 0x1F18E, 0x1F18E }, { 0x1F191, 0x1F19A }, 
+    { 0x1F1AD, 0x1F1E5 }, { 0x1F201, 0x1F202 }, { 0x1F203, 0x1F20F }, 
+    { 0x1F21A, 0x1F21A }, { 0x1F22F, 0x1F22F }, { 0x1F232, 0x1F23A }, 
+    { 0x1F23C, 0x1F23F }, { 0x1F249, 0x1F24F }, { 0x1F250, 0x1F251 }, 
+    { 0x1F252, 0x1F2FF }, { 0x1F300, 0x1F30C }, { 0x1F30D, 0x1F30E }, 
+    { 0x1F30F, 0x1F30F }, { 0x1F310, 0x1F310 }, { 0x1F311, 0x1F311 }, 
+    { 0x1F312, 0x1F312 }, { 0x1F313, 0x1F315 }, { 0x1F316, 0x1F318 }, 
+    { 0x1F319, 0x1F319 }, { 0x1F31A, 0x1F31A }, { 0x1F31B, 0x1F31B }, 
+    { 0x1F31C, 0x1F31C }, { 0x1F31D, 0x1F31E }, { 0x1F31F, 0x1F320 }, 
+    { 0x1F321, 0x1F321 }, { 0x1F322, 0x1F323 }, { 0x1F324, 0x1F32C }, 
+    { 0x1F32D, 0x1F32F }, { 0x1F330, 0x1F331 }, { 0x1F332, 0x1F333 }, 
+    { 0x1F334, 0x1F335 }, { 0x1F336, 0x1F336 }, { 0x1F337, 0x1F34A }, 
+    { 0x1F34B, 0x1F34B }, { 0x1F34C, 0x1F34F }, { 0x1F350, 0x1F350 }, 
+    { 0x1F351, 0x1F37B }, { 0x1F37C, 0x1F37C }, { 0x1F37D, 0x1F37D }, 
+    { 0x1F37E, 0x1F37F }, { 0x1F380, 0x1F393 }, { 0x1F394, 0x1F395 }, 
+    { 0x1F396, 0x1F397 }, { 0x1F398, 0x1F398 }, { 0x1F399, 0x1F39B }, 
+    { 0x1F39C, 0x1F39D }, { 0x1F39E, 0x1F39F }, { 0x1F3A0, 0x1F3C4 }, 
+    { 0x1F3C5, 0x1F3C5 }, { 0x1F3C6, 0x1F3C6 }, { 0x1F3C7, 0x1F3C7 }, 
+    { 0x1F3C8, 0x1F3C8 }, { 0x1F3C9, 0x1F3C9 }, { 0x1F3CA, 0x1F3CA }, 
+    { 0x1F3CB, 0x1F3CE }, { 0x1F3CF, 0x1F3D3 }, { 0x1F3D4, 0x1F3DF }, 
+    { 0x1F3E0, 0x1F3E3 }, { 0x1F3E4, 0x1F3E4 }, { 0x1F3E5, 0x1F3F0 }, 
+    { 0x1F3F1, 0x1F3F2 }, { 0x1F3F3, 0x1F3F3 }, { 0x1F3F4, 0x1F3F4 }, 
+    { 0x1F3F5, 0x1F3F5 }, { 0x1F3F6, 0x1F3F6 }, { 0x1F3F7, 0x1F3F7 }, 
+    { 0x1F3F8, 0x1F3FA }, { 0x1F400, 0x1F407 }, { 0x1F408, 0x1F408 }, 
+    { 0x1F409, 0x1F40B }, { 0x1F40C, 0x1F40E }, { 0x1F40F, 0x1F410 }, 
+    { 0x1F411, 0x1F412 }, { 0x1F413, 0x1F413 }, { 0x1F414, 0x1F414 }, 
+    { 0x1F415, 0x1F415 }, { 0x1F416, 0x1F416 }, { 0x1F417, 0x1F429 }, 
+    { 0x1F42A, 0x1F42A }, { 0x1F42B, 0x1F43E }, { 0x1F43F, 0x1F43F }, 
+    { 0x1F440, 0x1F440 }, { 0x1F441, 0x1F441 }, { 0x1F442, 0x1F464 }, 
+    { 0x1F465, 0x1F465 }, { 0x1F466, 0x1F46B }, { 0x1F46C, 0x1F46D }, 
+    { 0x1F46E, 0x1F4AC }, { 0x1F4AD, 0x1F4AD }, { 0x1F4AE, 0x1F4B5 }, 
+    { 0x1F4B6, 0x1F4B7 }, { 0x1F4B8, 0x1F4EB }, { 0x1F4EC, 0x1F4ED }, 
+    { 0x1F4EE, 0x1F4EE }, { 0x1F4EF, 0x1F4EF }, { 0x1F4F0, 0x1F4F4 }, 
+    { 0x1F4F5, 0x1F4F5 }, { 0x1F4F6, 0x1F4F7 }, { 0x1F4F8, 0x1F4F8 }, 
+    { 0x1F4F9, 0x1F4FC }, { 0x1F4FD, 0x1F4FD }, { 0x1F4FE, 0x1F4FE }, 
+    { 0x1F4FF, 0x1F502 }, { 0x1F503, 0x1F503 }, { 0x1F504, 0x1F507 }, 
+    { 0x1F508, 0x1F508 }, { 0x1F509, 0x1F509 }, { 0x1F50A, 0x1F514 }, 
+    { 0x1F515, 0x1F515 }, { 0x1F516, 0x1F52B }, { 0x1F52C, 0x1F52D }, 
+    { 0x1F52E, 0x1F53D }, { 0x1F546, 0x1F548 }, { 0x1F549, 0x1F54A }, 
+    { 0x1F54B, 0x1F54E }, { 0x1F54F, 0x1F54F }, { 0x1F550, 0x1F55B }, 
+    { 0x1F55C, 0x1F567 }, { 0x1F568, 0x1F56E }, { 0x1F56F, 0x1F570 }, 
+    { 0x1F571, 0x1F572 }, { 0x1F573, 0x1F579 }, { 0x1F57A, 0x1F57A }, 
+    { 0x1F57B, 0x1F586 }, { 0x1F587, 0x1F587 }, { 0x1F588, 0x1F589 }, 
+    { 0x1F58A, 0x1F58D }, { 0x1F58E, 0x1F58F }, { 0x1F590, 0x1F590 }, 
+    { 0x1F591, 0x1F594 }, { 0x1F595, 0x1F596 }, { 0x1F597, 0x1F5A3 }, 
+    { 0x1F5A4, 0x1F5A4 }, { 0x1F5A5, 0x1F5A5 }, { 0x1F5A6, 0x1F5A7 }, 
+    { 0x1F5A8, 0x1F5A8 }, { 0x1F5A9, 0x1F5B0 }, { 0x1F5B1, 0x1F5B2 }, 
+    { 0x1F5B3, 0x1F5BB }, { 0x1F5BC, 0x1F5BC }, { 0x1F5BD, 0x1F5C1 }, 
+    { 0x1F5C2, 0x1F5C4 }, { 0x1F5C5, 0x1F5D0 }, { 0x1F5D1, 0x1F5D3 }, 
+    { 0x1F5D4, 0x1F5DB }, { 0x1F5DC, 0x1F5DE }, { 0x1F5DF, 0x1F5E0 }, 
+    { 0x1F5E1, 0x1F5E1 }, { 0x1F5E2, 0x1F5E2 }, { 0x1F5E3, 0x1F5E3 }, 
+    { 0x1F5E4, 0x1F5E7 }, { 0x1F5E8, 0x1F5E8 }, { 0x1F5E9, 0x1F5EE }, 
+    { 0x1F5EF, 0x1F5EF }, { 0x1F5F0, 0x1F5F2 }, { 0x1F5F3, 0x1F5F3 }, 
+    { 0x1F5F4, 0x1F5F9 }, { 0x1F5FA, 0x1F5FA }, { 0x1F5FB, 0x1F5FF }, 
+    { 0x1F600, 0x1F600 }, { 0x1F601, 0x1F606 }, { 0x1F607, 0x1F608 }, 
+    { 0x1F609, 0x1F60D }, { 0x1F60E, 0x1F60E }, { 0x1F60F, 0x1F60F }, 
+    { 0x1F610, 0x1F610 }, { 0x1F611, 0x1F611 }, { 0x1F612, 0x1F614 }, 
+    { 0x1F615, 0x1F615 }, { 0x1F616, 0x1F616 }, { 0x1F617, 0x1F617 }, 
+    { 0x1F618, 0x1F618 }, { 0x1F619, 0x1F619 }, { 0x1F61A, 0x1F61A }, 
+    { 0x1F61B, 0x1F61B }, { 0x1F61C, 0x1F61E }, { 0x1F61F, 0x1F61F }, 
+    { 0x1F620, 0x1F625 }, { 0x1F626, 0x1F627 }, { 0x1F628, 0x1F62B }, 
+    { 0x1F62C, 0x1F62C }, { 0x1F62D, 0x1F62D }, { 0x1F62E, 0x1F62F }, 
+    { 0x1F630, 0x1F633 }, { 0x1F634, 0x1F634 }, { 0x1F635, 0x1F635 }, 
+    { 0x1F636, 0x1F636 }, { 0x1F637, 0x1F640 }, { 0x1F641, 0x1F644 }, 
+    { 0x1F645, 0x1F64F }, { 0x1F680, 0x1F680 }, { 0x1F681, 0x1F682 }, 
+    { 0x1F683, 0x1F685 }, { 0x1F686, 0x1F686 }, { 0x1F687, 0x1F687 }, 
+    { 0x1F688, 0x1F688 }, { 0x1F689, 0x1F689 }, { 0x1F68A, 0x1F68B }, 
+    { 0x1F68C, 0x1F68C }, { 0x1F68D, 0x1F68D }, { 0x1F68E, 0x1F68E }, 
+    { 0x1F68F, 0x1F68F }, { 0x1F690, 0x1F690 }, { 0x1F691, 0x1F693 }, 
+    { 0x1F694, 0x1F694 }, { 0x1F695, 0x1F695 }, { 0x1F696, 0x1F696 }, 
+    { 0x1F697, 0x1F697 }, { 0x1F698, 0x1F698 }, { 0x1F699, 0x1F69A }, 
+    { 0x1F69B, 0x1F6A1 }, { 0x1F6A2, 0x1F6A2 }, { 0x1F6A3, 0x1F6A3 }, 
+    { 0x1F6A4, 0x1F6A5 }, { 0x1F6A6, 0x1F6A6 }, { 0x1F6A7, 0x1F6AD }, 
+    { 0x1F6AE, 0x1F6B1 }, { 0x1F6B2, 0x1F6B2 }, { 0x1F6B3, 0x1F6B5 }, 
+    { 0x1F6B6, 0x1F6B6 }, { 0x1F6B7, 0x1F6B8 }, { 0x1F6B9, 0x1F6BE }, 
+    { 0x1F6BF, 0x1F6BF }, { 0x1F6C0, 0x1F6C0 }, { 0x1F6C1, 0x1F6C5 }, 
+    { 0x1F6C6, 0x1F6CA }, { 0x1F6CB, 0x1F6CB }, { 0x1F6CC, 0x1F6CC }, 
+    { 0x1F6CD, 0x1F6CF }, { 0x1F6D0, 0x1F6D0 }, { 0x1F6D1, 0x1F6D2 }, 
+    { 0x1F6D3, 0x1F6D4 }, { 0x1F6D5, 0x1F6D5 }, { 0x1F6D6, 0x1F6D7 }, 
+    { 0x1F6D8, 0x1F6DB }, { 0x1F6DC, 0x1F6DC }, { 0x1F6DD, 0x1F6DF }, 
+    { 0x1F6E0, 0x1F6E5 }, { 0x1F6E6, 0x1F6E8 }, { 0x1F6E9, 0x1F6E9 }, 
+    { 0x1F6EA, 0x1F6EA }, { 0x1F6EB, 0x1F6EC }, { 0x1F6ED, 0x1F6EF }, 
+    { 0x1F6F0, 0x1F6F0 }, { 0x1F6F1, 0x1F6F2 }, { 0x1F6F3, 0x1F6F3 }, 
+    { 0x1F6F4, 0x1F6F6 }, { 0x1F6F7, 0x1F6F8 }, { 0x1F6F9, 0x1F6F9 }, 
+    { 0x1F6FA, 0x1F6FA }, { 0x1F6FB, 0x1F6FC }, { 0x1F6FD, 0x1F6FF }, 
+    { 0x1F774, 0x1F77F }, { 0x1F7D5, 0x1F7DF }, { 0x1F7E0, 0x1F7EB }, 
+    { 0x1F7EC, 0x1F7EF }, { 0x1F7F0, 0x1F7F0 }, { 0x1F7F1, 0x1F7FF }, 
+    { 0x1F80C, 0x1F80F }, { 0x1F848, 0x1F84F }, { 0x1F85A, 0x1F85F }, 
+    { 0x1F888, 0x1F88F }, { 0x1F8AE, 0x1F8FF }, { 0x1F90C, 0x1F90C }, 
+    { 0x1F90D, 0x1F90F }, { 0x1F910, 0x1F918 }, { 0x1F919, 0x1F91E }, 
+    { 0x1F91F, 0x1F91F }, { 0x1F920, 0x1F927 }, { 0x1F928, 0x1F92F }, 
+    { 0x1F930, 0x1F930 }, { 0x1F931, 0x1F932 }, { 0x1F933, 0x1F93A }, 
+    { 0x1F93C, 0x1F93E }, { 0x1F93F, 0x1F93F }, { 0x1F940, 0x1F945 }, 
+    { 0x1F947, 0x1F94B }, { 0x1F94C, 0x1F94C }, { 0x1F94D, 0x1F94F }, 
+    { 0x1F950, 0x1F95E }, { 0x1F95F, 0x1F96B }, { 0x1F96C, 0x1F970 }, 
+    { 0x1F971, 0x1F971 }, { 0x1F972, 0x1F972 }, { 0x1F973, 0x1F976 }, 
+    { 0x1F977, 0x1F978 }, { 0x1F979, 0x1F979 }, { 0x1F97A, 0x1F97A }, 
+    { 0x1F97B, 0x1F97B }, { 0x1F97C, 0x1F97F }, { 0x1F980, 0x1F984 }, 
+    { 0x1F985, 0x1F991 }, { 0x1F992, 0x1F997 }, { 0x1F998, 0x1F9A2 }, 
+    { 0x1F9A3, 0x1F9A4 }, { 0x1F9A5, 0x1F9AA }, { 0x1F9AB, 0x1F9AD }, 
+    { 0x1F9AE, 0x1F9AF }, { 0x1F9B0, 0x1F9B9 }, { 0x1F9BA, 0x1F9BF }, 
+    { 0x1F9C0, 0x1F9C0 }, { 0x1F9C1, 0x1F9C2 }, { 0x1F9C3, 0x1F9CA }, 
+    { 0x1F9CB, 0x1F9CB }, { 0x1F9CC, 0x1F9CC }, { 0x1F9CD, 0x1F9CF }, 
+    { 0x1F9D0, 0x1F9E6 }, { 0x1F9E7, 0x1F9FF }, { 0x1FA00, 0x1FA6F }, 
+    { 0x1FA70, 0x1FA73 }, { 0x1FA74, 0x1FA74 }, { 0x1FA75, 0x1FA77 }, 
+    { 0x1FA78, 0x1FA7A }, { 0x1FA7B, 0x1FA7C }, { 0x1FA7D, 0x1FA7F }, 
+    { 0x1FA80, 0x1FA82 }, { 0x1FA83, 0x1FA86 }, { 0x1FA87, 0x1FA88 }, 
+    { 0x1FA89, 0x1FA8F }, { 0x1FA90, 0x1FA95 }, { 0x1FA96, 0x1FAA8 }, 
+    { 0x1FAA9, 0x1FAAC }, { 0x1FAAD, 0x1FAAF }, { 0x1FAB0, 0x1FAB6 }, 
+    { 0x1FAB7, 0x1FABA }, { 0x1FABB, 0x1FABD }, { 0x1FABE, 0x1FABE }, 
+    { 0x1FABF, 0x1FABF }, { 0x1FAC0, 0x1FAC2 }, { 0x1FAC3, 0x1FAC5 }, 
+    { 0x1FAC6, 0x1FACD }, { 0x1FACE, 0x1FACF }, { 0x1FAD0, 0x1FAD6 }, 
+    { 0x1FAD7, 0x1FAD9 }, { 0x1FADA, 0x1FADB }, { 0x1FADC, 0x1FADF }, 
+    { 0x1FAE0, 0x1FAE7 }, { 0x1FAE8, 0x1FAE8 }, { 0x1FAE9, 0x1FAEF }, 
+    { 0x1FAF0, 0x1FAF6 }, { 0x1FAF7, 0x1FAF8 }, { 0x1FAF9, 0x1FAFF }, 
+    { 0x1FC00, 0x1FFFD },
+  };
+
+  return bisearch(c, extended_pictographic, sizeof(extended_pictographic) / sizeof(struct interval) - 1);
+}
+
+static bool
+is_valid_combchar(c)
+int c;
+{
+  return (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800]);
+}
+
+/*
+  Returns whether an there should be a grapheme cluster break between
+  a and b, where a is a sequence of zero or more code points (if there
+  are multiple ones, they will be represented using a fake code point
+  in the surrogate range U+D800..U+DFFF, pointing into combchars[])
+  and b is a single one.
+
+  This is an incomplete implementation of
+
+  https://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
+ */
+int
+grapheme_cluster_break(c1, c2)
+int c1;
+int c2;
+{
+  /* GB1, GB2: Break at the start and end of text, unless the text is empty. */
+  if (c1 == 0 || c2 == 0)
+    return 1;
+
+  /* GB9: Do not break before extending characters or ZWJ. */
+  if (is_extend(c2) || c2 == 0x200d) 
+    return 0;
+
+  /* GB11: Do not break within emoji modifier sequences or emoji zwj sequences. */
+  if (is_extended_pictographic(c2) &&
+      is_valid_combchar(c1) && combchars[c1 - 0xd800]->c2 == 0x200d)
+    {
+      /* sequence that ends with ZWJ, keep expanding and checking */
+      int c;
+
+      c = combchars[c1 - 0xd800]->c1;
+
+      /* zero or more Extend code points (prepended by something else) */
+      while (is_valid_combchar(c) && is_extend(combchars[c - 0xd800]->c2))
+        c = combchars[c - 0xd800]->c1;
+
+      /* an Extended_Pictographic before those (possibly with something yet earlier) */
+      if (is_extended_pictographic(c) ||
+          (is_valid_combchar(c) && is_extended_pictographic(combchars[c - 0xd800]->c2)))
+        return 0;
+    } 
+
+  /* we don't seem to need GB12/GB13 (emoji flag sequences), since they get the right width 
+     by just treating each regional indicator symbol as 1 */
+
+  /* GB999: Otherwise, break everywhere. */
+  return 1;
+}
+
 static void
 comb_tofront(i)
 int i;
@@ -1245,7 +1655,16 @@
   int isdouble;
 
   c1 = mc->image | (mc->font << 8) | mc->fontx << 16;
-  isdouble = c1 >= 0x1100 && utf8_isdouble(c1);
+  isdouble = utf8_isdouble(c) || (c1 >= 0x1100 && utf8_isdouble(c1));
+  /* U+FE0F is a special variant selector (VARIANT SELECTOR-16) that makes
+     the previous character into an emoji. If we are using it in a combiner,
+     it almost certainly means we are now an emoji, which is double-width.
+     (The alternative would be that someone did e.g. U+0041 U+FE0F to get
+     the emoji variant of capital A, which is meaningless.)
+   */
+  if (c == 0xfe0f) {
+    isdouble = 1;
+  }
   if (!combchars)
     {
       combchars = (struct combchar **)calloc(0x802, sizeof(struct combchar *));
diff -ur /home/sesse/nmu/orig/screen-4.9.0/extern.h ../extern.h
--- /home/sesse/nmu/orig/screen-4.9.0/extern.h	2023-06-27 22:53:44.000000000 +0200
+++ ../extern.h	2023-06-27 21:37:08.000000000 +0200
@@ -496,6 +496,7 @@
 extern int   ToUtf8_comb __P((char *, int));
 extern int   utf8_isdouble __P((int));
 extern int   utf8_iscomb __P((int));
+extern int   grapheme_cluster_break __P((int, int));
 extern void  utf8_handle_comb __P((int, struct mchar *));
 extern int   ContainsSpecialDeffont __P((struct mline *, int, int, int));
 extern int   LoadFontTranslation __P((int, char *));

Reply via email to