On 2019-06-13 15:52, Alvaro Herrera wrote:
> I think there's an off-by-one bug in your script.

Indeed.  Here is an updated script and patch.

-- 
Peter Eisentraut              http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
use strict;
use warnings;

my $range_start = undef;
my $codepoint;
my $prev_codepoint;
my $count = 0;

print "\tstatic const struct mbinterval combining[] = {";

foreach my $line (<ARGV>)
{
    chomp $line;
    my @fields = split ';', $line;
    $codepoint = hex $fields[0];

    next if $codepoint > 0xFFFF;

    if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
    {
        # combining character, save for start of range
        if (!defined($range_start))
        {
            $range_start = $codepoint;
        }
    }
    else
    {
        # not a combining character, print out previous range if any
        if (defined($range_start))
        {
            if ($count++ % 3 == 0)
            {
                print "\n\t\t";
            }
            else
            {
                print " ";
            }
            printf "{0x%04X, 0x%04X},", $range_start, $prev_codepoint;
            $range_start = undef;
        }
    }
}
continue
{
    $prev_codepoint = $codepoint;
}

print "\n\t};\n";
From da90031113908ee9869ae87a5edbf52992d16a96 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Fri, 14 Jun 2019 11:30:44 +0200
Subject: [PATCH v2] Update list of combining characters

The list of combining characters to ignore for calculating the display
width of a string (used for example by psql) was wildly outdated and
incorrect.

https://www.postgresql.org/message-id/flat/bbb19114-af1e-513b-08a9-61272794bd5c%402ndquadrant.com
---
 src/backend/utils/mb/wchar.c | 94 ++++++++++++++++++++++++------------
 1 file changed, 62 insertions(+), 32 deletions(-)

diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 8e5116dfc1..1b5ce1740c 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -645,40 +645,70 @@ ucs_wcwidth(pg_wchar ucs)
 {
        /* sorted list of non-overlapping intervals of non-spacing characters */
        static const struct mbinterval combining[] = {
-               {0x0300, 0x034E}, {0x0360, 0x0362}, {0x0483, 0x0486},
-               {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9},
-               {0x05BB, 0x05BD}, {0x05BF, 0x05BF}, {0x05C1, 0x05C2},
-               {0x05C4, 0x05C4}, {0x064B, 0x0655}, {0x0670, 0x0670},
-               {0x06D6, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED},
-               {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A},
-               {0x07A6, 0x07B0}, {0x0901, 0x0902}, {0x093C, 0x093C},
-               {0x0941, 0x0948}, {0x094D, 0x094D}, {0x0951, 0x0954},
-               {0x0962, 0x0963}, {0x0981, 0x0981}, {0x09BC, 0x09BC},
-               {0x09C1, 0x09C4}, {0x09CD, 0x09CD}, {0x09E2, 0x09E3},
-               {0x0A02, 0x0A02}, {0x0A3C, 0x0A3C}, {0x0A41, 0x0A42},
-               {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71},
-               {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC5},
-               {0x0AC7, 0x0AC8}, {0x0ACD, 0x0ACD}, {0x0B01, 0x0B01},
-               {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B43},
-               {0x0B4D, 0x0B4D}, {0x0B56, 0x0B56}, {0x0B82, 0x0B82},
-               {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C3E, 0x0C40},
-               {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56},
+               {0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD},
+               {0x05BF, 0x05BF}, {0x05C1, 0x05C2}, {0x05C4, 0x05C5},
+               {0x05C7, 0x05C7}, {0x0610, 0x061A}, {0x064B, 0x065F},
+               {0x0670, 0x0670}, {0x06D6, 0x06DC}, {0x06DF, 0x06E4},
+               {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0711},
+               {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x07EB, 0x07F3},
+               {0x07FD, 0x07FD}, {0x0816, 0x0819}, {0x081B, 0x0823},
+               {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B},
+               {0x08D3, 0x08E1}, {0x08E3, 0x0902}, {0x093A, 0x093A},
+               {0x093C, 0x093C}, {0x0941, 0x0948}, {0x094D, 0x094D},
+               {0x0951, 0x0957}, {0x0962, 0x0963}, {0x0981, 0x0981},
+               {0x09BC, 0x09BC}, {0x09C1, 0x09C4}, {0x09CD, 0x09CD},
+               {0x09E2, 0x09E3}, {0x09FE, 0x0A02}, {0x0A3C, 0x0A3C},
+               {0x0A41, 0x0A51}, {0x0A70, 0x0A71}, {0x0A75, 0x0A75},
+               {0x0A81, 0x0A82}, {0x0ABC, 0x0ABC}, {0x0AC1, 0x0AC8},
+               {0x0ACD, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0AFA, 0x0B01},
+               {0x0B3C, 0x0B3C}, {0x0B3F, 0x0B3F}, {0x0B41, 0x0B44},
+               {0x0B4D, 0x0B56}, {0x0B62, 0x0B63}, {0x0B82, 0x0B82},
+               {0x0BC0, 0x0BC0}, {0x0BCD, 0x0BCD}, {0x0C00, 0x0C00},
+               {0x0C04, 0x0C04}, {0x0C3E, 0x0C40}, {0x0C46, 0x0C56},
+               {0x0C62, 0x0C63}, {0x0C81, 0x0C81}, {0x0CBC, 0x0CBC},
                {0x0CBF, 0x0CBF}, {0x0CC6, 0x0CC6}, {0x0CCC, 0x0CCD},
-               {0x0D41, 0x0D43}, {0x0D4D, 0x0D4D}, {0x0DCA, 0x0DCA},
-               {0x0DD2, 0x0DD4}, {0x0DD6, 0x0DD6}, {0x0E31, 0x0E31},
+               {0x0CE2, 0x0CE3}, {0x0D00, 0x0D01}, {0x0D3B, 0x0D3C},
+               {0x0D41, 0x0D44}, {0x0D4D, 0x0D4D}, {0x0D62, 0x0D63},
+               {0x0DCA, 0x0DCA}, {0x0DD2, 0x0DD6}, {0x0E31, 0x0E31},
                {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0EB1},
-               {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD},
-               {0x0F18, 0x0F19}, {0x0F35, 0x0F35}, {0x0F37, 0x0F37},
-               {0x0F39, 0x0F39}, {0x0F71, 0x0F7E}, {0x0F80, 0x0F84},
-               {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC},
-               {0x0FC6, 0x0FC6}, {0x102D, 0x1030}, {0x1032, 0x1032},
-               {0x1036, 0x1037}, {0x1039, 0x1039}, {0x1058, 0x1059},
-               {0x1160, 0x11FF}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
-               {0x17C9, 0x17D3}, {0x180B, 0x180E}, {0x18A9, 0x18A9},
-               {0x200B, 0x200F}, {0x202A, 0x202E}, {0x206A, 0x206F},
-               {0x20D0, 0x20E3}, {0x302A, 0x302F}, {0x3099, 0x309A},
-               {0xFB1E, 0xFB1E}, {0xFE20, 0xFE23}, {0xFEFF, 0xFEFF},
-               {0xFFF9, 0xFFFB}
+               {0x0EB4, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19},
+               {0x0F35, 0x0F35}, {0x0F37, 0x0F37}, {0x0F39, 0x0F39},
+               {0x0F71, 0x0F7E}, {0x0F80, 0x0F84}, {0x0F86, 0x0F87},
+               {0x0F8D, 0x0FBC}, {0x0FC6, 0x0FC6}, {0x102D, 0x1030},
+               {0x1032, 0x1037}, {0x1039, 0x103A}, {0x103D, 0x103E},
+               {0x1058, 0x1059}, {0x105E, 0x1060}, {0x1071, 0x1074},
+               {0x1082, 0x1082}, {0x1085, 0x1086}, {0x108D, 0x108D},
+               {0x109D, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714},
+               {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773},
+               {0x17B4, 0x17B5}, {0x17B7, 0x17BD}, {0x17C6, 0x17C6},
+               {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D},
+               {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x1922},
+               {0x1927, 0x1928}, {0x1932, 0x1932}, {0x1939, 0x193B},
+               {0x1A17, 0x1A18}, {0x1A1B, 0x1A1B}, {0x1A56, 0x1A56},
+               {0x1A58, 0x1A60}, {0x1A62, 0x1A62}, {0x1A65, 0x1A6C},
+               {0x1A73, 0x1A7F}, {0x1AB0, 0x1B03}, {0x1B34, 0x1B34},
+               {0x1B36, 0x1B3A}, {0x1B3C, 0x1B3C}, {0x1B42, 0x1B42},
+               {0x1B6B, 0x1B73}, {0x1B80, 0x1B81}, {0x1BA2, 0x1BA5},
+               {0x1BA8, 0x1BA9}, {0x1BAB, 0x1BAD}, {0x1BE6, 0x1BE6},
+               {0x1BE8, 0x1BE9}, {0x1BED, 0x1BED}, {0x1BEF, 0x1BF1},
+               {0x1C2C, 0x1C33}, {0x1C36, 0x1C37}, {0x1CD0, 0x1CD2},
+               {0x1CD4, 0x1CE0}, {0x1CE2, 0x1CE8}, {0x1CED, 0x1CED},
+               {0x1CF4, 0x1CF4}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DFF},
+               {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F},
+               {0x2DE0, 0x2DFF}, {0x302A, 0x302D}, {0x3099, 0x309A},
+               {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F},
+               {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806},
+               {0xA80B, 0xA80B}, {0xA825, 0xA826}, {0xA8C4, 0xA8C5},
+               {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D},
+               {0xA947, 0xA951}, {0xA980, 0xA982}, {0xA9B3, 0xA9B3},
+               {0xA9B6, 0xA9B9}, {0xA9BC, 0xA9BD}, {0xA9E5, 0xA9E5},
+               {0xAA29, 0xAA2E}, {0xAA31, 0xAA32}, {0xAA35, 0xAA36},
+               {0xAA43, 0xAA43}, {0xAA4C, 0xAA4C}, {0xAA7C, 0xAA7C},
+               {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8},
+               {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEC, 0xAAED},
+               {0xAAF6, 0xAAF6}, {0xABE5, 0xABE5}, {0xABE8, 0xABE8},
+               {0xABED, 0xABED}, {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F},
+               {0xFE20, 0xFE2F},
        };
 
        /* test for 8-bit control characters */
-- 
2.22.0

Reply via email to