From 09ac998fdf6d169a7f7a4d053038e2f48205052f Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Fri, 20 Aug 2021 11:10:17 -0400
Subject: [PATCH v2 2/4] Allow callers of mbbisearch to get an explicit character
 width

Add width field to mbinterval and have mbbisearch return a
pointer to a range rather than just bool for success. A future
commit will add other widths besides zero, and this will allow
it to use the same search.
---
 .../unicode/generate-unicode_width_table.pl   |   6 +-
 src/common/wchar.c                            |  20 +-
 src/include/common/unicode_width_table.h      | 388 +++++++++---------
 3 files changed, 209 insertions(+), 205 deletions(-)

diff --git a/src/common/unicode/generate-unicode_width_table.pl b/src/common/unicode/generate-unicode_width_table.pl
index 86aed78907..0cf44b029c 100644
--- a/src/common/unicode/generate-unicode_width_table.pl
+++ b/src/common/unicode/generate-unicode_width_table.pl
@@ -15,9 +15,9 @@ my $prev_codepoint;
 my $count = 0;
 
 print
-  "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";
+  "/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */\n\n";
 
-print "static const struct mbinterval combining[] = {\n";
+print "static const struct mbinterval wcwidth[] = {\n";
 
 foreach my $line (<ARGV>)
 {
@@ -40,7 +40,7 @@ foreach my $line (<ARGV>)
 		# not a combining character, print out previous range if any
 		if (defined($range_start))
 		{
-			printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
+			printf "\t{0x%04X, 0x%04X, 0},\n", $range_start, $prev_codepoint;
 			$range_start = undef;
 		}
 	}
diff --git a/src/common/wchar.c b/src/common/wchar.c
index bb97b5f54f..c0397ca139 100644
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -585,17 +585,18 @@ struct mbinterval
 {
 	unsigned short first;
 	unsigned short last;
+	signed short width;
 };
 
 /* auxiliary function for binary search in interval table */
-static int
+static const struct mbinterval *
 mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
 {
 	int			min = 0;
 	int			mid;
 
 	if (ucs < table[0].first || ucs > table[max].last)
-		return 0;
+		return NULL;
 	while (max >= min)
 	{
 		mid = (min + max) / 2;
@@ -604,10 +605,10 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
 		else if (ucs < table[mid].first)
 			max = mid - 1;
 		else
-			return 1;
+			return &table[mid];
 	}
 
-	return 0;
+	return NULL;
 }
 
 
@@ -653,10 +654,13 @@ ucs_wcwidth(pg_wchar ucs)
 	if (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0) || ucs > 0x0010ffff)
 		return -1;
 
-	/* binary search in table of non-spacing characters */
-	if (mbbisearch(ucs, combining,
-				   sizeof(combining) / sizeof(struct mbinterval) - 1))
-		return 0;
+	/* binary search in table of character widths */
+	const struct mbinterval *range =
+	mbbisearch(ucs, wcwidth,
+			   sizeof(wcwidth) / sizeof(struct mbinterval) - 1);
+
+	if (range != NULL)
+		return range->width;
 
 	/*
 	 * if we arrive here, ucs is not a combining or C0/C1 control character
diff --git a/src/include/common/unicode_width_table.h b/src/include/common/unicode_width_table.h
index a9f10c31bc..3b161f47a4 100644
--- a/src/include/common/unicode_width_table.h
+++ b/src/include/common/unicode_width_table.h
@@ -1,196 +1,196 @@
-/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */
+/* generated by src/common/unicode/generate-unicode_width_table.pl, do not edit */
 
-static const struct mbinterval combining[] = {
-	{0x0300, 0x036F},
-	{0x0483, 0x0489},
-	{0x0591, 0x05BD},
-	{0x05BF, 0x05BF},
-	{0x05C1, 0x05C2},
-	{0x05C4, 0x05C5},
-	{0x05C7, 0x05C7},
-	{0x0610, 0x061A},
-	{0x064B, 0x065F},
-	{0x0670, 0x0670},
-	{0x06D6, 0x06DC},
-	{0x06DF, 0x06E4},
-	{0x06E7, 0x06E8},
-	{0x06EA, 0x06ED},
-	{0x0711, 0x0711},
-	{0x0730, 0x074A},
-	{0x07A6, 0x07B0},
-	{0x07EB, 0x07F3},
-	{0x07FD, 0x07FD},
-	{0x0816, 0x0819},
-	{0x081B, 0x0823},
-	{0x0825, 0x0827},
-	{0x0829, 0x082D},
-	{0x0859, 0x085B},
-	{0x08D3, 0x08E1},
-	{0x08E3, 0x0902},
-	{0x093A, 0x093A},
-	{0x093C, 0x093C},
-	{0x0941, 0x0948},
-	{0x094D, 0x094D},
-	{0x0951, 0x0957},
-	{0x0962, 0x0963},
-	{0x0981, 0x0981},
-	{0x09BC, 0x09BC},
-	{0x09C1, 0x09C4},
-	{0x09CD, 0x09CD},
-	{0x09E2, 0x09E3},
-	{0x09FE, 0x0A02},
-	{0x0A3C, 0x0A3C},
-	{0x0A41, 0x0A51},
-	{0x0A70, 0x0A71},
-	{0x0A75, 0x0A75},
-	{0x0A81, 0x0A82},
-	{0x0ABC, 0x0ABC},
-	{0x0AC1, 0x0AC8},
-	{0x0ACD, 0x0ACD},
-	{0x0AE2, 0x0AE3},
-	{0x0AFA, 0x0B01},
-	{0x0B3C, 0x0B3C},
-	{0x0B3F, 0x0B3F},
-	{0x0B41, 0x0B44},
-	{0x0B4D, 0x0B56},
-	{0x0B62, 0x0B63},
-	{0x0B82, 0x0B82},
-	{0x0BC0, 0x0BC0},
-	{0x0BCD, 0x0BCD},
-	{0x0C00, 0x0C00},
-	{0x0C04, 0x0C04},
-	{0x0C3E, 0x0C40},
-	{0x0C46, 0x0C56},
-	{0x0C62, 0x0C63},
-	{0x0C81, 0x0C81},
-	{0x0CBC, 0x0CBC},
-	{0x0CBF, 0x0CBF},
-	{0x0CC6, 0x0CC6},
-	{0x0CCC, 0x0CCD},
-	{0x0CE2, 0x0CE3},
-	{0x0D00, 0x0D01},
-	{0x0D3B, 0x0D3C},
-	{0x0D41, 0x0D44},
-	{0x0D4D, 0x0D4D},
-	{0x0D62, 0x0D63},
-	{0x0D81, 0x0D81},
-	{0x0DCA, 0x0DCA},
-	{0x0DD2, 0x0DD6},
-	{0x0E31, 0x0E31},
-	{0x0E34, 0x0E3A},
-	{0x0E47, 0x0E4E},
-	{0x0EB1, 0x0EB1},
-	{0x0EB4, 0x0EBC},
-	{0x0EC8, 0x0ECD},
-	{0x0F18, 0x0F19},
-	{0x0F35, 0x0F35},
-	{0x0F37, 0x0F37},
-	{0x0F39, 0x0F39},
-	{0x0F71, 0x0F7E},
-	{0x0F80, 0x0F84},
-	{0x0F86, 0x0F87},
-	{0x0F8D, 0x0FBC},
-	{0x0FC6, 0x0FC6},
-	{0x102D, 0x1030},
-	{0x1032, 0x1037},
-	{0x1039, 0x103A},
-	{0x103D, 0x103E},
-	{0x1058, 0x1059},
-	{0x105E, 0x1060},
-	{0x1071, 0x1074},
-	{0x1082, 0x1082},
-	{0x1085, 0x1086},
-	{0x108D, 0x108D},
-	{0x109D, 0x109D},
-	{0x135D, 0x135F},
-	{0x1712, 0x1714},
-	{0x1732, 0x1734},
-	{0x1752, 0x1753},
-	{0x1772, 0x1773},
-	{0x17B4, 0x17B5},
-	{0x17B7, 0x17BD},
-	{0x17C6, 0x17C6},
-	{0x17C9, 0x17D3},
-	{0x17DD, 0x17DD},
-	{0x180B, 0x180D},
-	{0x1885, 0x1886},
-	{0x18A9, 0x18A9},
-	{0x1920, 0x1922},
-	{0x1927, 0x1928},
-	{0x1932, 0x1932},
-	{0x1939, 0x193B},
-	{0x1A17, 0x1A18},
-	{0x1A1B, 0x1A1B},
-	{0x1A56, 0x1A56},
-	{0x1A58, 0x1A60},
-	{0x1A62, 0x1A62},
-	{0x1A65, 0x1A6C},
-	{0x1A73, 0x1A7F},
-	{0x1AB0, 0x1B03},
-	{0x1B34, 0x1B34},
-	{0x1B36, 0x1B3A},
-	{0x1B3C, 0x1B3C},
-	{0x1B42, 0x1B42},
-	{0x1B6B, 0x1B73},
-	{0x1B80, 0x1B81},
-	{0x1BA2, 0x1BA5},
-	{0x1BA8, 0x1BA9},
-	{0x1BAB, 0x1BAD},
-	{0x1BE6, 0x1BE6},
-	{0x1BE8, 0x1BE9},
-	{0x1BED, 0x1BED},
-	{0x1BEF, 0x1BF1},
-	{0x1C2C, 0x1C33},
-	{0x1C36, 0x1C37},
-	{0x1CD0, 0x1CD2},
-	{0x1CD4, 0x1CE0},
-	{0x1CE2, 0x1CE8},
-	{0x1CED, 0x1CED},
-	{0x1CF4, 0x1CF4},
-	{0x1CF8, 0x1CF9},
-	{0x1DC0, 0x1DFF},
-	{0x20D0, 0x20F0},
-	{0x2CEF, 0x2CF1},
-	{0x2D7F, 0x2D7F},
-	{0x2DE0, 0x2DFF},
-	{0x302A, 0x302D},
-	{0x3099, 0x309A},
-	{0xA66F, 0xA672},
-	{0xA674, 0xA67D},
-	{0xA69E, 0xA69F},
-	{0xA6F0, 0xA6F1},
-	{0xA802, 0xA802},
-	{0xA806, 0xA806},
-	{0xA80B, 0xA80B},
-	{0xA825, 0xA826},
-	{0xA82C, 0xA82C},
-	{0xA8C4, 0xA8C5},
-	{0xA8E0, 0xA8F1},
-	{0xA8FF, 0xA8FF},
-	{0xA926, 0xA92D},
-	{0xA947, 0xA951},
-	{0xA980, 0xA982},
-	{0xA9B3, 0xA9B3},
-	{0xA9B6, 0xA9B9},
-	{0xA9BC, 0xA9BD},
-	{0xA9E5, 0xA9E5},
-	{0xAA29, 0xAA2E},
-	{0xAA31, 0xAA32},
-	{0xAA35, 0xAA36},
-	{0xAA43, 0xAA43},
-	{0xAA4C, 0xAA4C},
-	{0xAA7C, 0xAA7C},
-	{0xAAB0, 0xAAB0},
-	{0xAAB2, 0xAAB4},
-	{0xAAB7, 0xAAB8},
-	{0xAABE, 0xAABF},
-	{0xAAC1, 0xAAC1},
-	{0xAAEC, 0xAAED},
-	{0xAAF6, 0xAAF6},
-	{0xABE5, 0xABE5},
-	{0xABE8, 0xABE8},
-	{0xABED, 0xABED},
-	{0xFB1E, 0xFB1E},
-	{0xFE00, 0xFE0F},
-	{0xFE20, 0xFE2F},
+static const struct mbinterval wcwidth[] = {
+	{0x0300, 0x036F, 0},
+	{0x0483, 0x0489, 0},
+	{0x0591, 0x05BD, 0},
+	{0x05BF, 0x05BF, 0},
+	{0x05C1, 0x05C2, 0},
+	{0x05C4, 0x05C5, 0},
+	{0x05C7, 0x05C7, 0},
+	{0x0610, 0x061A, 0},
+	{0x064B, 0x065F, 0},
+	{0x0670, 0x0670, 0},
+	{0x06D6, 0x06DC, 0},
+	{0x06DF, 0x06E4, 0},
+	{0x06E7, 0x06E8, 0},
+	{0x06EA, 0x06ED, 0},
+	{0x0711, 0x0711, 0},
+	{0x0730, 0x074A, 0},
+	{0x07A6, 0x07B0, 0},
+	{0x07EB, 0x07F3, 0},
+	{0x07FD, 0x07FD, 0},
+	{0x0816, 0x0819, 0},
+	{0x081B, 0x0823, 0},
+	{0x0825, 0x0827, 0},
+	{0x0829, 0x082D, 0},
+	{0x0859, 0x085B, 0},
+	{0x08D3, 0x08E1, 0},
+	{0x08E3, 0x0902, 0},
+	{0x093A, 0x093A, 0},
+	{0x093C, 0x093C, 0},
+	{0x0941, 0x0948, 0},
+	{0x094D, 0x094D, 0},
+	{0x0951, 0x0957, 0},
+	{0x0962, 0x0963, 0},
+	{0x0981, 0x0981, 0},
+	{0x09BC, 0x09BC, 0},
+	{0x09C1, 0x09C4, 0},
+	{0x09CD, 0x09CD, 0},
+	{0x09E2, 0x09E3, 0},
+	{0x09FE, 0x0A02, 0},
+	{0x0A3C, 0x0A3C, 0},
+	{0x0A41, 0x0A51, 0},
+	{0x0A70, 0x0A71, 0},
+	{0x0A75, 0x0A75, 0},
+	{0x0A81, 0x0A82, 0},
+	{0x0ABC, 0x0ABC, 0},
+	{0x0AC1, 0x0AC8, 0},
+	{0x0ACD, 0x0ACD, 0},
+	{0x0AE2, 0x0AE3, 0},
+	{0x0AFA, 0x0B01, 0},
+	{0x0B3C, 0x0B3C, 0},
+	{0x0B3F, 0x0B3F, 0},
+	{0x0B41, 0x0B44, 0},
+	{0x0B4D, 0x0B56, 0},
+	{0x0B62, 0x0B63, 0},
+	{0x0B82, 0x0B82, 0},
+	{0x0BC0, 0x0BC0, 0},
+	{0x0BCD, 0x0BCD, 0},
+	{0x0C00, 0x0C00, 0},
+	{0x0C04, 0x0C04, 0},
+	{0x0C3E, 0x0C40, 0},
+	{0x0C46, 0x0C56, 0},
+	{0x0C62, 0x0C63, 0},
+	{0x0C81, 0x0C81, 0},
+	{0x0CBC, 0x0CBC, 0},
+	{0x0CBF, 0x0CBF, 0},
+	{0x0CC6, 0x0CC6, 0},
+	{0x0CCC, 0x0CCD, 0},
+	{0x0CE2, 0x0CE3, 0},
+	{0x0D00, 0x0D01, 0},
+	{0x0D3B, 0x0D3C, 0},
+	{0x0D41, 0x0D44, 0},
+	{0x0D4D, 0x0D4D, 0},
+	{0x0D62, 0x0D63, 0},
+	{0x0D81, 0x0D81, 0},
+	{0x0DCA, 0x0DCA, 0},
+	{0x0DD2, 0x0DD6, 0},
+	{0x0E31, 0x0E31, 0},
+	{0x0E34, 0x0E3A, 0},
+	{0x0E47, 0x0E4E, 0},
+	{0x0EB1, 0x0EB1, 0},
+	{0x0EB4, 0x0EBC, 0},
+	{0x0EC8, 0x0ECD, 0},
+	{0x0F18, 0x0F19, 0},
+	{0x0F35, 0x0F35, 0},
+	{0x0F37, 0x0F37, 0},
+	{0x0F39, 0x0F39, 0},
+	{0x0F71, 0x0F7E, 0},
+	{0x0F80, 0x0F84, 0},
+	{0x0F86, 0x0F87, 0},
+	{0x0F8D, 0x0FBC, 0},
+	{0x0FC6, 0x0FC6, 0},
+	{0x102D, 0x1030, 0},
+	{0x1032, 0x1037, 0},
+	{0x1039, 0x103A, 0},
+	{0x103D, 0x103E, 0},
+	{0x1058, 0x1059, 0},
+	{0x105E, 0x1060, 0},
+	{0x1071, 0x1074, 0},
+	{0x1082, 0x1082, 0},
+	{0x1085, 0x1086, 0},
+	{0x108D, 0x108D, 0},
+	{0x109D, 0x109D, 0},
+	{0x135D, 0x135F, 0},
+	{0x1712, 0x1714, 0},
+	{0x1732, 0x1734, 0},
+	{0x1752, 0x1753, 0},
+	{0x1772, 0x1773, 0},
+	{0x17B4, 0x17B5, 0},
+	{0x17B7, 0x17BD, 0},
+	{0x17C6, 0x17C6, 0},
+	{0x17C9, 0x17D3, 0},
+	{0x17DD, 0x17DD, 0},
+	{0x180B, 0x180D, 0},
+	{0x1885, 0x1886, 0},
+	{0x18A9, 0x18A9, 0},
+	{0x1920, 0x1922, 0},
+	{0x1927, 0x1928, 0},
+	{0x1932, 0x1932, 0},
+	{0x1939, 0x193B, 0},
+	{0x1A17, 0x1A18, 0},
+	{0x1A1B, 0x1A1B, 0},
+	{0x1A56, 0x1A56, 0},
+	{0x1A58, 0x1A60, 0},
+	{0x1A62, 0x1A62, 0},
+	{0x1A65, 0x1A6C, 0},
+	{0x1A73, 0x1A7F, 0},
+	{0x1AB0, 0x1B03, 0},
+	{0x1B34, 0x1B34, 0},
+	{0x1B36, 0x1B3A, 0},
+	{0x1B3C, 0x1B3C, 0},
+	{0x1B42, 0x1B42, 0},
+	{0x1B6B, 0x1B73, 0},
+	{0x1B80, 0x1B81, 0},
+	{0x1BA2, 0x1BA5, 0},
+	{0x1BA8, 0x1BA9, 0},
+	{0x1BAB, 0x1BAD, 0},
+	{0x1BE6, 0x1BE6, 0},
+	{0x1BE8, 0x1BE9, 0},
+	{0x1BED, 0x1BED, 0},
+	{0x1BEF, 0x1BF1, 0},
+	{0x1C2C, 0x1C33, 0},
+	{0x1C36, 0x1C37, 0},
+	{0x1CD0, 0x1CD2, 0},
+	{0x1CD4, 0x1CE0, 0},
+	{0x1CE2, 0x1CE8, 0},
+	{0x1CED, 0x1CED, 0},
+	{0x1CF4, 0x1CF4, 0},
+	{0x1CF8, 0x1CF9, 0},
+	{0x1DC0, 0x1DFF, 0},
+	{0x20D0, 0x20F0, 0},
+	{0x2CEF, 0x2CF1, 0},
+	{0x2D7F, 0x2D7F, 0},
+	{0x2DE0, 0x2DFF, 0},
+	{0x302A, 0x302D, 0},
+	{0x3099, 0x309A, 0},
+	{0xA66F, 0xA672, 0},
+	{0xA674, 0xA67D, 0},
+	{0xA69E, 0xA69F, 0},
+	{0xA6F0, 0xA6F1, 0},
+	{0xA802, 0xA802, 0},
+	{0xA806, 0xA806, 0},
+	{0xA80B, 0xA80B, 0},
+	{0xA825, 0xA826, 0},
+	{0xA82C, 0xA82C, 0},
+	{0xA8C4, 0xA8C5, 0},
+	{0xA8E0, 0xA8F1, 0},
+	{0xA8FF, 0xA8FF, 0},
+	{0xA926, 0xA92D, 0},
+	{0xA947, 0xA951, 0},
+	{0xA980, 0xA982, 0},
+	{0xA9B3, 0xA9B3, 0},
+	{0xA9B6, 0xA9B9, 0},
+	{0xA9BC, 0xA9BD, 0},
+	{0xA9E5, 0xA9E5, 0},
+	{0xAA29, 0xAA2E, 0},
+	{0xAA31, 0xAA32, 0},
+	{0xAA35, 0xAA36, 0},
+	{0xAA43, 0xAA43, 0},
+	{0xAA4C, 0xAA4C, 0},
+	{0xAA7C, 0xAA7C, 0},
+	{0xAAB0, 0xAAB0, 0},
+	{0xAAB2, 0xAAB4, 0},
+	{0xAAB7, 0xAAB8, 0},
+	{0xAABE, 0xAABF, 0},
+	{0xAAC1, 0xAAC1, 0},
+	{0xAAEC, 0xAAED, 0},
+	{0xAAF6, 0xAAF6, 0},
+	{0xABE5, 0xABE5, 0},
+	{0xABE8, 0xABE8, 0},
+	{0xABED, 0xABED, 0},
+	{0xFB1E, 0xFB1E, 0},
+	{0xFE00, 0xFE0F, 0},
+	{0xFE20, 0xFE2F, 0},
 };
-- 
2.31.1

