In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/a3bc64163aa92603c939d88653090ec5e9aa2677?hp=b533676119578f1978fd923c4e66fcbce4589a34>
- Log ----------------------------------------------------------------- commit a3bc64163aa92603c939d88653090ec5e9aa2677 Author: Karl Williamson <[email protected]> Date: Mon Mar 11 11:18:18 2019 -0600 regen/mk_invlists.pl: Remove stray debugging stmts These debugging lines were left in by 21c34e9717d commit 62e88327b15915f71012e8501cf47ee921583f7d Author: Karl Williamson <[email protected]> Date: Sat Mar 9 11:09:01 2019 -0700 regen/mk_invlists.pl: Comment/white-space only commit 98a1b8f75b3617ff62804a29d59d65bad554e835 Author: Karl Williamson <[email protected]> Date: Sat Mar 9 09:43:45 2019 -0700 regen/mk_invlists.pl, lib/utf8_heavy.pl: Rename variable This renames a variable to more accurately reflect its content, and adds a new one which has the old name but with an accurate content. commit 6f954d4263d065602e00e32f5adfe997abeedfea Author: Karl Williamson <[email protected]> Date: Thu Mar 7 15:14:56 2019 -0700 charclass_invlists.h: Add comment ----------------------------------------------------------------------- Summary of changes: charclass_invlists.h | 4 ++-- lib/unicore/uni_keywords.pl | 4 ++-- lib/utf8_heavy.pl | 7 +++++++ regen/mk_invlists.pl | 34 +++++++++++++++++++++------------- uni_keywords.h | 2 +- 5 files changed, 33 insertions(+), 18 deletions(-) diff --git a/charclass_invlists.h b/charclass_invlists.h index 557f021a5b..e2f6796d6e 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -387609,7 +387609,7 @@ const char * const deprecated_property_msgs[] = { #define UNI_LB__SG (UNI_LB__SG_perl_aux + (MAX_UNI_KEYWORD_INDEX * 1)) typedef enum { - PERL_BIN_PLACEHOLDER = 0, + PERL_BIN_PLACEHOLDER = 0, /* So no real value is zero */ UNI_ADLM, UNI_AEGEANNUMBERS, UNI_AGE__10, @@ -390181,5 +390181,5 @@ static const U8 WB_table[23][23] = { * 93cc868487ef3345596041bcb90c302b1b056733bb95233101bc10dc2dbe36b4 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl - * d62728e4cfcba3e6550ff12ad924f4811ca2077ebec705b9fecabed716764414 regen/mk_invlists.pl + * 19e4fbb2cedcad0712e2844e9a3d23fd5194cf16f09302401a705d4392e1bd0c regen/mk_invlists.pl * ex: set ro: */ diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl index 43dea59e02..fed25aaffb 100644 --- a/lib/unicore/uni_keywords.pl +++ b/lib/unicore/uni_keywords.pl @@ -1260,7 +1260,7 @@ # 8b677a4cf17a4c6949632146fa8a99aaa65689a41b9dd231f81783e81600ff36 lib/unicore/extracted/DNumValues.txt # bdc891efa8fc8425685f8c773bbfba47247c2d3612c9926b096fc7f794a5ffc2 lib/unicore/mktables # 93cc868487ef3345596041bcb90c302b1b056733bb95233101bc10dc2dbe36b4 lib/unicore/version -# 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl +# 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl # 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl -# d62728e4cfcba3e6550ff12ad924f4811ca2077ebec705b9fecabed716764414 regen/mk_invlists.pl +# 19e4fbb2cedcad0712e2844e9a3d23fd5194cf16f09302401a705d4392e1bd0c regen/mk_invlists.pl # ex: set ro: diff --git a/lib/utf8_heavy.pl b/lib/utf8_heavy.pl index 22cee9e4af..ec6dbb69dd 100644 --- a/lib/utf8_heavy.pl +++ b/lib/utf8_heavy.pl @@ -35,6 +35,13 @@ sub _loose_name ($) { # out blanks, underscores and dashes. The complication stems from the # grandfathered-in 'L_', which retains a single trailing underscore. +# integer or float (no exponent) +my $integer_or_float_re = qr/ ^ -? \d+ (:? \. \d+ )? $ /x; + +# Also includes rationals +my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; + return $_[0] if $_[0] =~ $numeric_re; + (my $loose = $_[0]) =~ s/[-_ \t]//g; return $loose if $loose !~ / ^ (?: is | to )? l $/x; diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index f6515ef567..b33c3e4888 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -31,8 +31,11 @@ my $VERSION_DATA_STRUCTURE_TYPE = 148565664; # charclass_invlists.h now also contains inversion maps and enum definitions # for those maps that have a finite number of possible values -# integer or float -my $numeric_re = qr/ ^ -? \d+ (:? \. \d+ )? $ /x; +# integer or float (no exponent) +my $integer_or_float_re = qr/ ^ -? \d+ (:? \. \d+ )? $ /x; + +# Also includes rationals +my $numeric_re = qr! $integer_or_float_re | ^ -? \d+ / \d+ $ !x; # More than one code point may have the same code point as their fold. This # gives the maximum number in the current Unicode release. (The folded-to @@ -164,7 +167,7 @@ sub a2n($) { # Returns the input Unicode code point translated to native. - return $cp if $cp !~ $numeric_re || $cp > 255; + return $cp if $cp !~ $integer_or_float_re || $cp > 255; return $a2n[$cp]; } @@ -2422,8 +2425,9 @@ foreach my $property (sort # thing if there is no '=' my ($lhs, $rhs) = $property =~ / ( [^=]* ) ( =? .*) /x; - # $lhs then becomes the property name. See if there are any synonyms - # for this property. + # $lhs then becomes the property name. + + # See if there are any synonyms for this property. if (exists $prop_name_aliases{$lhs}) { # If so, do the combinatorics so that a new entry is added for @@ -2449,6 +2453,7 @@ foreach my $property (sort # processing. But we haven't dealt with it yet. If we already have a # property with the identical characteristics, this becomes just a # synonym for it. + if (exists $enums{$tag}) { push @this_entries, $property; } @@ -2693,7 +2698,9 @@ foreach my $prop (@props) { if (ref $invmap[0]) { $bucket = join "\cK", map { a2n($_) } @{$invmap[0]}; } - elsif ($maps_to_code_point && $invmap[0] =~ $numeric_re) { + elsif ( $maps_to_code_point + && $invmap[0] =~ $integer_or_float_re) + { # Do convert to native for maps to single code points. # There are some properties that have a few outlier @@ -2716,7 +2723,7 @@ foreach my $prop (@props) { # Skip any non-numeric maps: these are outliers # that aren't code points. - && $base_map =~ $numeric_re + && $base_map =~ $integer_or_float_re # 'ne' because the default can be a string && $base_map ne $map_default) @@ -2804,9 +2811,12 @@ foreach my $prop (@props) { for my $i (0 .. @new_invlist - 1) { next if $i > 0 && $new_invlist[$i-1] + 1 == $new_invlist[$i] - && $xlated{$new_invlist[$i-1]} =~ $numeric_re - && $xlated{$new_invlist[$i]} =~ $numeric_re - && $xlated{$new_invlist[$i-1]} + 1 == $xlated{$new_invlist[$i]}; + && $xlated{$new_invlist[$i-1]} + =~ $integer_or_float_re + && $xlated{$new_invlist[$i]} + =~ $integer_or_float_re + && $xlated{$new_invlist[$i-1]} + 1 + == $xlated{$new_invlist[$i]}; push @temp, $new_invlist[$i]; } @new_invlist = @temp; @@ -2945,7 +2955,7 @@ if (scalar keys %deprecated_tags) { } } -print $out_fh "\ntypedef enum {\n\tPERL_BIN_PLACEHOLDER = 0,\n\t"; +print $out_fh "\ntypedef enum {\n\tPERL_BIN_PLACEHOLDER = 0, /* So no real value is zero */\n\t"; print $out_fh join ",\n\t", @enums; print $out_fh "\n"; print $out_fh "} binary_invlist_enum;\n"; @@ -3012,7 +3022,6 @@ my @sources = qw(regen/mk_invlists.pl read_only_bottom_close_and_rename($out_fh, \@sources); -use Data::Dumper; my %name_to_index; for my $i (0 .. @enums - 1) { my $loose_name = $enums[$i] =~ s/^$table_name_prefix//r; @@ -3040,7 +3049,6 @@ my $uni_pl = open_new('lib/unicore/uni_keywords.pl', '>', { print $uni_pl "\%utf8::uni_prop_ptrs_indices = (\n"; for my $name (sort keys %name_to_index) { - print STDERR __LINE__, $name, "\n" unless defined $name_to_index{$name}; print $uni_pl " '$name' => $name_to_index{$name},\n"; } print $uni_pl ");\n\n1;\n"; diff --git a/uni_keywords.h b/uni_keywords.h index a242c551b4..6dfc1df204 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7129,6 +7129,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) { * 93cc868487ef3345596041bcb90c302b1b056733bb95233101bc10dc2dbe36b4 lib/unicore/version * 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl * 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl - * d62728e4cfcba3e6550ff12ad924f4811ca2077ebec705b9fecabed716764414 regen/mk_invlists.pl + * 19e4fbb2cedcad0712e2844e9a3d23fd5194cf16f09302401a705d4392e1bd0c regen/mk_invlists.pl * c56b78df81e0f96632246052d71580b212546ca02ba4075158965e11d892f21e regen/mph.pl * ex: set ro: */ -- Perl5 Master Repository
