In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/4cc560b23649f322bae0315aed9b9665e537c44e?hp=3b318f16bd82320be5768efa37201351e3a0ff2e>

- Log -----------------------------------------------------------------
commit 4cc560b23649f322bae0315aed9b9665e537c44e
Author: Karl Williamson <[email protected]>
Date:   Fri Mar 16 11:01:21 2012 -0600

    mktables: white-space only
    
    This outdents code to line up vertically with surrounding text

M       lib/unicore/mktables

commit 21a1aff7e1eddd8acb5f096dd264e2f967ad5401
Author: Karl Williamson <[email protected]>
Date:   Fri Mar 16 10:54:24 2012 -0600

    mktables: Backport name_alias changes to earlier Unicode versions
    
    This allow mktables to be compiled for earlier Unicode versions and
    work for them.

M       lib/unicore/mktables

commit 6901521e3ffe762ff8e3f268762b0d2f667771a7
Author: Karl Williamson <[email protected]>
Date:   Fri Mar 16 13:12:49 2012 -0600

    mktables: Don't duplicate entries
    
    The Name table can have multiple names for each code point.  The highest
    priority ones are first in the file.  Prior to this patch, adding a
    high priority name to a code point which already had the same name, the
    old name could be retained, leaving two identical names for the code
    point.  This patch causes the lower-priority one to be deleted.

M       lib/unicore/mktables

commit 6342d44542ab62d41255e7655319b5f5ce0975a3
Author: Karl Williamson <[email protected]>
Date:   Sun Mar 11 16:46:18 2012 -0600

    mktables: A leading zero is different than an only 0
    
    Commit b91749bce0ff3d86c6e210a0b35289b4ad36c144 assumed that a single
    0 was a leading 0.  This inappropriately makes a table appear to be hex
    when it isn't.  This currently affects the kPrimaryNumeric property
    which is part of the Unihan database, and is not normally generated by
    mktables; it is generated only if an installation decides they want
    to compile Perl to use the Unihan properties.

M       lib/unicore/mktables

commit b577d4a6d9513ee5434c94e3104462fdd700d6a7
Author: Karl Williamson <[email protected]>
Date:   Tue Mar 6 10:39:15 2012 -0700

    Unicode::UCD::prop_invmap: Fix returned format
    
    The type of an 'a' table should not be changed to 's'.  This bug
    happened currently only if someone changed mktables to output one of the
    optional files.

M       lib/Unicode/UCD.pm

commit 294705a8c512970205c6ab76033cb59703476097
Author: Karl Williamson <[email protected]>
Date:   Tue Mar 6 10:37:37 2012 -0700

    Unicode::UCD: typos in error messages
    
    These concatenated the package name with the beginning of the text with
    no intervening punctuation.  Add also the function within the package

M       lib/Unicode/UCD.pm

commit 22bc5f56c4cdabb8df1b2acae33971781fb1814e
Author: Karl Williamson <[email protected]>
Date:   Wed Feb 29 11:15:17 2012 -0700

    mktables: Add required parens and reword warning msg
    
    Should this message ever get triggered, it would fail because the
    precedence is wrong: the argument to hex needs parens.  Also reword
    somewhat for clarity.

M       lib/unicore/mktables

commit c256547e3b6143ea565019347505bc1996fc59e7
Author: Karl Williamson <[email protected]>
Date:   Wed Feb 29 11:04:59 2012 -0700

    pat.t: Fix typo in comment

M       t/re/pat.t
-----------------------------------------------------------------------

Summary of changes:
 lib/Unicode/UCD.pm   |   10 +-
 lib/unicore/mktables |  320 ++++++++++++++++++++++++++++++++++++++++++++------
 t/re/pat.t           |    2 +-
 3 files changed, 289 insertions(+), 43 deletions(-)

diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 20e060a..b0dc4ca 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -2864,7 +2864,7 @@ RETRY:
                     else {
 
                         # These should all single-element ranges.
-                        croak __PACKAGE__, "Not expecting a mapping with 
multiple code points in a multi-element range, $ranges[$i]" if $hex_end ne "";
+                        croak __PACKAGE__, "::prop_invmap: Not expecting a 
mapping with multiple code points in a multi-element range, $ranges[$i]" if 
$hex_end ne "";
 
                         # Convert them to decimal, as that's what's expected.
                         $list .= "$hex_begin\t\t"
@@ -3058,7 +3058,7 @@ RETRY:
             # This is all we need do for this iteration.
 
             if ($end != $begin) {
-                croak __PACKAGE__, "Multiple maps per code point in '$prop' 
require single-element ranges: begin=$begin, end=$end, map=$map";
+                croak __PACKAGE__, ":prop_invmap: Multiple maps per code point 
in '$prop' require single-element ranges: begin=$begin, end=$end, map=$map";
             }
             if (! ref $invmap[-2]) {
                 $invmap[-2] = [ $invmap[-2], $map ];
@@ -3208,7 +3208,7 @@ RETRY:
                 # Find the range that the override applies to.
                 my $i = _search_invlist(\@invlist, $cp);
                 if ($cp < $invlist[$i] || $cp >= $invlist[$i + 1]) {
-                    croak __PACKAGE__, "wrong_range, cp=$cp; i=$i, 
current=$invlist[$i]; next=$invlist[$i + 1]"
+                    croak __PACKAGE__, "::prop_invmap: wrong_range, cp=$cp; 
i=$i, current=$invlist[$i]; next=$invlist[$i + 1]"
                 }
 
                 # And what that range currently maps to
@@ -3303,13 +3303,13 @@ RETRY:
         # to indicate that need to add code point to it.
         $format = 'ar';
     }
-    elsif ($format ne 'n') {
+    elsif ($format ne 'n' && $format ne 'a') {
 
         # All others are simple scalars
         $format = 's';
     }
     if ($has_multiples &&  $format !~ /l/) {
-       croak __PACKAGE__, "Wrong format '$format' for prop_invmap('$prop'); 
should indicate has lists";
+       croak __PACKAGE__, "::prop_invmap: Wrong format '$format' for 
prop_invmap('$prop'); should indicate has lists";
     }
 
     return (\@invlist, \@invmap, $format, $missing);
diff --git a/lib/unicore/mktables b/lib/unicore/mktables
index 3c8e4d9..ea3d577 100644
--- a/lib/unicore/mktables
+++ b/lib/unicore/mktables
@@ -3394,9 +3394,17 @@ sub trace { return main::trace(@_); }
         #                         multiple times.  They are stored LIFO, so
         #                         that the final one inserted is the first one
         #                         returned in an ordered search of the table.
+        #                         If this is an exact duplicate, including the
+        #                         value, the original will be moved to be
+        #                         first, before any other duplicate ranges
+        #                         with different values.
         #       => $MULTIPLE_AFTER is like $MULTIPLE_BEFORE, but is stored
         #                         FIFO, so that this one is inserted after all
-        #                         others that currently exist.
+        #                         others that currently exist.  If this is an
+        #                         exact duplicate, including value, of an
+        #                         existing range, this one is discarded
+        #                         (leaving the existing one in its original,
+        #                         higher priority position
         #       => anything else  is the same as => $IF_NOT_EQUIVALENT
         #
         # "same value" means identical for non-type-0 ranges, and it means
@@ -3678,7 +3686,10 @@ sub trace { return main::trace(@_); }
 
             # If to place this new record after, move to beyond all existing
             # ones; but don't add this one if identical to any of them, as it
-            # isn't really a multiple
+            # isn't really a multiple.  This leaves the original order, so
+            # that the current request is ignored.  The reasoning is that the
+            # previous request that wanted this record to have high priority
+            # should have precedence.
             if ($replace == $MULTIPLE_AFTER) {
                 while ($i < @$r && $r->[$i]->start == $start) {
                     return if $value eq $r->[$i]->value
@@ -3686,6 +3697,22 @@ sub trace { return main::trace(@_); }
                     $i++;
                 }
             }
+            else {
+                # If instead we are to place this new record before any
+                # existing ones, remove any identical ones that come after it.
+                # This changes the existing order so that the new one is
+                # first, as is being requested.
+                for (my $j = $i + 1;
+                     $j < @$r && $r->[$j]->start == $start;
+                     $j++)
+                {
+                    if ($value eq $r->[$j]->value && $type eq $r->[$j]->type) {
+                        splice @$r, $j, 1;
+                        last;   # There should only be one instance, so no
+                                # need to keep looking
+                    }
+                }
+            }
 
             trace "Adding multiple record at $i with $start..$end, $value" if 
main::DEBUG && $to_trace;
             my @return = splice @$r,
@@ -6538,7 +6565,7 @@ END
                                         # Assume a leading zero means hex,
                                         # even if all digits are 0-9
                                     || ($format eq $INTEGER_FORMAT
-                                        && $map =~ /^0/);
+                                        && $map =~ /^0[0-9A-F]/);
                             $format = $STRING_FORMAT if $format eq $HEX_FORMAT
                                                        && $map =~ /[^0-9A-F]/;
                         }
@@ -11008,8 +11035,8 @@ END
                                   . $object->name
                                   . "(0x$fields[0]) is $value"
                                   . " and SpecialCasing.txt thinks it is "
-                                  . hex $fields[$i]
-                                  . ".  Good luck.  Proceeding anyway.");
+                                  . hex($fields[$i])
+                                  . ".  Good luck.  Retaining UnicodeData 
value, and proceeding anyway.");
                 }
             }
             else {
@@ -11628,15 +11655,228 @@ sub  filter_script_extensions_line {
 }
 
 sub setup_early_name_alias {
+    my $file= shift;
+    Carp::carp_extra_args(\@_) if main::DEBUG && @_;
+
     my $aliases = property_ref('Name_Alias');
     $aliases = Property->new('Name_Alias') if ! defined $aliases;
+    $file->insert_lines(get_old_name_aliases());
 
-    # Before 6.0, this wasn't a problem, and after it, this alias is part of
-    # the Unicode-delivered file.
-    $aliases->add_map(7, 7, "ALERT: control") if $v_version eq v6.0.0;
     return;
 }
 
+sub get_old_name_aliases () {
+    my @return = split /\n/, <<'END';
+0000;NULL;control
+0000;NUL;abbreviation
+0001;START OF HEADING;control
+0001;SOH;abbreviation
+0002;START OF TEXT;control
+0002;STX;abbreviation
+0003;END OF TEXT;control
+0003;ETX;abbreviation
+0004;END OF TRANSMISSION;control
+0004;EOT;abbreviation
+0005;ENQUIRY;control
+0005;ENQ;abbreviation
+0006;ACKNOWLEDGE;control
+0006;ACK;abbreviation
+0007;ALERT;control
+0007;BELL;control
+0007;BEL;abbreviation
+0008;BACKSPACE;control
+0008;BS;abbreviation
+0009;CHARACTER TABULATION;control
+0009;HORIZONTAL TABULATION;control
+0009;HT;abbreviation
+0009;TAB;abbreviation
+000A;LINE FEED;control
+000A;LINE FEED (LF);control
+000A;NEW LINE;control
+000A;END OF LINE;control
+000A;LF;abbreviation
+000A;NL;abbreviation
+000A;EOL;abbreviation
+000B;LINE TABULATION;control
+000B;VERTICAL TABULATION;control
+000B;VT;abbreviation
+000C;FORM FEED;control
+000C;FORM FEED (FF);control
+000C;FF;abbreviation
+000D;CARRIAGE RETURN;control
+000D;CARRIAGE RETURN (CR);control
+000D;CR;abbreviation
+000E;SHIFT OUT;control
+000E;LOCKING-SHIFT ONE;control
+000E;SO;abbreviation
+000F;SHIFT IN;control
+000F;LOCKING-SHIFT ZERO;control
+000F;SI;abbreviation
+0010;DATA LINK ESCAPE;control
+0010;DLE;abbreviation
+0011;DEVICE CONTROL ONE;control
+0011;DC1;abbreviation
+0012;DEVICE CONTROL TWO;control
+0012;DC2;abbreviation
+0013;DEVICE CONTROL THREE;control
+0013;DC3;abbreviation
+0014;DEVICE CONTROL FOUR;control
+0014;DC4;abbreviation
+0015;NEGATIVE ACKNOWLEDGE;control
+0015;NAK;abbreviation
+0016;SYNCHRONOUS IDLE;control
+0016;SYN;abbreviation
+0017;END OF TRANSMISSION BLOCK;control
+0017;ETB;abbreviation
+0018;CANCEL;control
+0018;CAN;abbreviation
+0019;END OF MEDIUM;control
+0019;EOM;abbreviation
+001A;SUBSTITUTE;control
+001A;SUB;abbreviation
+001B;ESCAPE;control
+001B;ESC;abbreviation
+001C;INFORMATION SEPARATOR FOUR;control
+001C;FILE SEPARATOR;control
+001C;FS;abbreviation
+001D;INFORMATION SEPARATOR THREE;control
+001D;GROUP SEPARATOR;control
+001D;GS;abbreviation
+001E;INFORMATION SEPARATOR TWO;control
+001E;RECORD SEPARATOR;control
+001E;RS;abbreviation
+001F;INFORMATION SEPARATOR ONE;control
+001F;UNIT SEPARATOR;control
+001F;US;abbreviation
+0020;SP;abbreviation
+007F;DELETE;control
+007F;DEL;abbreviation
+0080;PADDING CHARACTER;figment
+0080;PAD;abbreviation
+0081;HIGH OCTET PRESET;figment
+0081;HOP;abbreviation
+0082;BREAK PERMITTED HERE;control
+0082;BPH;abbreviation
+0083;NO BREAK HERE;control
+0083;NBH;abbreviation
+0084;INDEX;control
+0084;IND;abbreviation
+0085;NEXT LINE;control
+0085;NEXT LINE (NEL);control
+0085;NEL;abbreviation
+0086;START OF SELECTED AREA;control
+0086;SSA;abbreviation
+0087;END OF SELECTED AREA;control
+0087;ESA;abbreviation
+0088;CHARACTER TABULATION SET;control
+0088;HORIZONTAL TABULATION SET;control
+0088;HTS;abbreviation
+0089;CHARACTER TABULATION WITH JUSTIFICATION;control
+0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
+0089;HTJ;abbreviation
+008A;LINE TABULATION SET;control
+008A;VERTICAL TABULATION SET;control
+008A;VTS;abbreviation
+008B;PARTIAL LINE FORWARD;control
+008B;PARTIAL LINE DOWN;control
+008B;PLD;abbreviation
+008C;PARTIAL LINE BACKWARD;control
+008C;PARTIAL LINE UP;control
+008C;PLU;abbreviation
+008D;REVERSE LINE FEED;control
+008D;REVERSE INDEX;control
+008D;RI;abbreviation
+008E;SINGLE SHIFT TWO;control
+008E;SINGLE-SHIFT-2;control
+008E;SS2;abbreviation
+008F;SINGLE SHIFT THREE;control
+008F;SINGLE-SHIFT-3;control
+008F;SS3;abbreviation
+0090;DEVICE CONTROL STRING;control
+0090;DCS;abbreviation
+0091;PRIVATE USE ONE;control
+0091;PRIVATE USE-1;control
+0091;PU1;abbreviation
+0092;PRIVATE USE TWO;control
+0092;PRIVATE USE-2;control
+0092;PU2;abbreviation
+0093;SET TRANSMIT STATE;control
+0093;STS;abbreviation
+0094;CANCEL CHARACTER;control
+0094;CCH;abbreviation
+0095;MESSAGE WAITING;control
+0095;MW;abbreviation
+0096;START OF GUARDED AREA;control
+0096;START OF PROTECTED AREA;control
+0096;SPA;abbreviation
+0097;END OF GUARDED AREA;control
+0097;END OF PROTECTED AREA;control
+0097;EPA;abbreviation
+0098;START OF STRING;control
+0098;SOS;abbreviation
+0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
+0099;SGC;abbreviation
+009A;SINGLE CHARACTER INTRODUCER;control
+009A;SCI;abbreviation
+009B;CONTROL SEQUENCE INTRODUCER;control
+009B;CSI;abbreviation
+009C;STRING TERMINATOR;control
+009C;ST;abbreviation
+009D;OPERATING SYSTEM COMMAND;control
+009D;OSC;abbreviation
+009E;PRIVACY MESSAGE;control
+009E;PM;abbreviation
+009F;APPLICATION PROGRAM COMMAND;control
+009F;APC;abbreviation
+00A0;NBSP;abbreviation
+00AD;SHY;abbreviation
+200B;ZWSP;abbreviation
+200C;ZWNJ;abbreviation
+200D;ZWJ;abbreviation
+200E;LRM;abbreviation
+200F;RLM;abbreviation
+202A;LRE;abbreviation
+202B;RLE;abbreviation
+202C;PDF;abbreviation
+202D;LRO;abbreviation
+202E;RLO;abbreviation
+FEFF;BYTE ORDER MARK;alternate
+FEFF;BOM;abbreviation
+FEFF;ZWNBSP;abbreviation
+END
+
+    if ($v_version ge v3.0.0) {
+        push @return, split /\n/, <<'END';
+180B; FVS1; abbreviation
+180C; FVS2; abbreviation
+180D; FVS3; abbreviation
+180E; MVS; abbreviation
+202F; NNBSP; abbreviation
+END
+    }
+
+    if ($v_version ge v3.2.0) {
+        push @return, split /\n/, <<'END';
+034F; CGJ; abbreviation
+205F; MMSP; abbreviation
+2060; WJ; abbreviation
+END
+        # Add in VS1..VS16
+        my $cp = 0xFE00 - 1;
+        for my $i (1..16) {
+            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
+        }
+    }
+    if ($v_version ge v4.0.0) { # Add in VS17..VS256
+        my $cp = 0xE0100 - 17;
+        for my $i (17..256) {
+            push @return, sprintf("%04X; VS%d; abbreviation", $cp + $i, $i);
+        }
+    }
+
+    return @return;
+}
+
 sub filter_later_version_name_alias_line {
 
     # This file has an extra entry per line for the alias type.  This is
@@ -11661,8 +11901,10 @@ sub filter_later_version_name_alias_line {
 sub filter_early_version_name_alias_line {
 
     # Early versions did not have the trailing alias type field; implicitly it
-    # was 'correction'
-    $_ .= "; correction";
+    # was 'correction'.   But our synthetic lines we add in this program do
+    # have it, so test for the type field.
+    $_ .= "; correction" if $_ !~ /;.*;/;
+
     filter_later_version_name_alias_line;
     return;
 }
@@ -12510,11 +12752,11 @@ sub compile_perl() {
     }
 
     my $alias_sentence = "";
+    my %abbreviations;
     my $alias = property_ref('Name_Alias');
     if (defined $alias) {
         push @composition, 'Name_Alias';
         $perl_charname->set_proxy_for('Name_Alias');
-        my %abbreviations;
 
         # Add each entry in Name_Alias to Perl_Charnames.  Where these go with
         # respect to any existing entry depends on the entry type.
@@ -12558,40 +12800,44 @@ sub compile_perl() {
 
             # Actually add; before or after current entry(ies) as determined
             # above.
+
             $perl_charname->add_duplicate($code_point, $value, Replace => 
$replace_type);
         }
+    }
 
-        # Now add the Unicode_1 names for the controls.  These come after the
-        # official names, as they are only recommended (by TR18; unclear as of
-        # this writing if that recommendation will be withdrawn, but if it is,
-        # we want to add them anyway for backwards compatibility).  Only a few
-        # differ from the official names.
-        foreach my $range (property_ref('Unicode_1_Name')->ranges) {
-            my $code_point = $range->start;
-            my $unicode_1_value = $range->value;
-            next if $unicode_1_value eq "";     # Skip if name doesn't exist.
+    # Now add the Unicode_1 names for the controls.  The Unicode_1 names had
+    # precedence before 6.1, so should be first in the file; the other names
+    # have precedence starting in 6.1,
+    my $before_or_after = ($v_version lt v6.1.0)
+                          ? $MULTIPLE_BEFORE
+                          : $MULTIPLE_AFTER;
 
-            if ($code_point != $range->end) {
-                Carp::my_carp_bug("Bad News.  Expecting only one code point in 
the range $range.  Just to keep going, using only the first code point;");
-            }
+    foreach my $range (property_ref('Unicode_1_Name')->ranges) {
+        my $code_point = $range->start;
+        my $unicode_1_value = $range->value;
+        next if $unicode_1_value eq "";     # Skip if name doesn't exist.
 
-            # To handle EBCDIC, we don't hard code in the code points of the
-            # controls; instead realizing that all of them are below 256.
-            last if $code_point > 255;
+        if ($code_point != $range->end) {
+            Carp::my_carp_bug("Bad News.  Expecting only one code point in the 
range $range.  Just to keep going, using only the first code point;");
+        }
 
-            # We only add in the controls.
-            next if $gc->value_of($code_point) ne 'Cc';
+        # To handle EBCDIC, we don't hard code in the code points of the
+        # controls; instead realizing that all of them are below 256.
+        last if $code_point > 255;
 
-            # This won't add an exact duplicate.
-            $perl_charname->add_duplicate($code_point, $unicode_1_value,
-                                          Replace => $MULTIPLE_AFTER);
-        }
+        # We only add in the controls.
+        next if $gc->value_of($code_point) ne 'Cc';
 
-        # Now that have everything added, add in abbreviations after
-        # everything else.
-        foreach my $value (keys %abbreviations) {
-            $perl_charname->add_duplicate($abbreviations{$value}, $value, 
Replace => $MULTIPLE_AFTER);
-        }
+        # This won't add an exact duplicate.
+        $perl_charname->add_duplicate($code_point, $unicode_1_value,
+                                        Replace => $before_or_after);
+    }
+
+    # Now that have everything added, add in abbreviations after
+    # everything else.
+    foreach my $value (keys %abbreviations) {
+        $perl_charname->add_duplicate($abbreviations{$value}, $value,
+                                        Replace => $MULTIPLE_AFTER);
         $alias_sentence = <<END;
 The Name_Alias property adds duplicate code point entries that are
 alternatives to the original name.  If an addition is a corrected
diff --git a/t/re/pat.t b/t/re/pat.t
index 624b0d9..faddbc5 100644
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -1180,7 +1180,7 @@ sub run_tests {
     }
 
     {
-        # Test that IDstart works, but doing because the author (khw) knows
+        # Test that IDstart works, but because the author (khw) knows
         # regexes much better than the rest of the core, it is being done here
         # in the context of a regex which relies on buffer names beginng with
         # IDStarts.

--
Perl5 Master Repository

Reply via email to