In perl.git, the branch khw/ebcdic has been updated <http://perl5.git.perl.org/perl.git/commitdiff/e5640d6b6d4d4a988aac70ba94762deac916a917?hp=5b8b8339e2b27c7f75d67b0bfce8cf19c4d50f38>
- Log ----------------------------------------------------------------- commit e5640d6b6d4d4a988aac70ba94762deac916a917 Author: Karl Williamson <[email protected]> Date: Thu Apr 4 22:17:11 2013 -0600 XXX debugging regen/regcharclass.pl ----------------------------------------------------------------------- Summary of changes: regen/regcharclass.pl | 191 +++++++++++++++++++++++++------------------------ 1 files changed, 97 insertions(+), 94 deletions(-) diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index af38b7a..51955de 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -382,7 +382,10 @@ sub new { die "do '$1' failed: $!$@" if ! do $1 or $@; next; } elsif ($str =~ / ^ & \s* ( .* ) /x) { # user-furnished sub() call + use Data::Dumper; + print STDERR $1, "\n"; my @results = eval "$1"; + print STDERR Dumper \@results; die "eval '$1' failed: $@" if $@; push @{$opt{txt}}, @results; next; @@ -1416,100 +1419,100 @@ if ( !caller ) { 1; # in the unlikely case we are being used as a module __DATA__ -# This is no longer used, but retained in case it is needed some day. -# TRICKYFOLD: Problematic fold case letters. When adding to this list, also should add them to regcomp.c and fold_grind.t -# => generic cp generic-cp generic-both :fast safe -# 0x00DF # LATIN SMALL LETTER SHARP S -# 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS -# 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS -# 0x1E9E # LATIN CAPITAL LETTER SHARP S, because maps to same as 00DF -# 0x1FD3 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA; maps same as 0390 -# 0x1FE3 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA; maps same as 03B0 - -LNBREAK: Line Break: \R -=> generic UTF8 LATIN1 :fast safe -"\x0D\x0A" # CRLF - Network (Windows) line ending -\p{VertSpace} - -HORIZWS: Horizontal Whitespace: \h \H -=> generic UTF8 LATIN1 high cp cp_high :fast safe -\p{HorizSpace} - -VERTWS: Vertical Whitespace: \v \V -=> generic UTF8 high LATIN1 cp cp_high :fast safe -\p{VertSpace} - -XDIGIT: Hexadecimal digits -=> UTF8 high cp_high :fast -\p{XDigit} - -XPERLSPACE: \p{XPerlSpace} -=> generic UTF8 high cp_high :fast -\p{XPerlSpace} - -REPLACEMENT: Unicode REPLACEMENT CHARACTER -=> UTF8 :safe -0xFFFD - -NONCHAR: Non character code points -=> UTF8 :fast -\p{Nchar} - -SURROGATE: Surrogate characters -=> UTF8 :fast -\p{Gc=Cs} - -GCB_L: Grapheme_Cluster_Break=L -=> UTF8 :fast -\p{_X_GCB_L} - -GCB_LV_LVT_V: Grapheme_Cluster_Break=(LV or LVT or V) -=> UTF8 :fast -\p{_X_LV_LVT_V} - -GCB_Prepend: Grapheme_Cluster_Break=Prepend -=> UTF8 :fast -\p{_X_GCB_Prepend} - -GCB_RI: Grapheme_Cluster_Break=RI -=> UTF8 :fast -\p{_X_RI} - -GCB_SPECIAL_BEGIN_START: Grapheme_Cluster_Break=special_begin_starts -=> UTF8 :fast -\p{_X_Special_Begin_Start} - -GCB_T: Grapheme_Cluster_Break=T -=> UTF8 :fast -\p{_X_GCB_T} - -GCB_V: Grapheme_Cluster_Break=V -=> UTF8 :fast -\p{_X_GCB_V} - -# This program was run with this enabled, and the results copied to utf8.h; -# then this was commented out because it takes so long to figure out these 2 -# million code points. The results would not change unless utf8.h decides it -# wants a maximum other than 4 bytes, or this program creates better -# optimizations -#UTF8_CHAR: Matches utf8 from 1 to 4 bytes -#=> UTF8 :safe only_ascii_platform -#0x0 - 0x1FFFFF - -# This hasn't been commented out, because we haven't an EBCDIC platform to run -# it on, and the 3 types of EBCDIC allegedly supported by Perl would have -# different results -#UTF8_CHAR: Matches utf8 from 1 to 5 bytes -#=> UTF8 :safe only_ebcdic_platform -#0x0 - 0x3FFFFF: - -QUOTEMETA: Meta-characters that \Q should quote -=> high :fast -\p{_Perl_Quotemeta} - -PATWS: pattern white space -=> generic generic_non_low cp : fast safe -\p{PatWS} +## This is no longer used, but retained in case it is needed some day. +## TRICKYFOLD: Problematic fold case letters. When adding to this list, also should add them to regcomp.c and fold_grind.t +## => generic cp generic-cp generic-both :fast safe +## 0x00DF # LATIN SMALL LETTER SHARP S +## 0x0390 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +## 0x03B0 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS +## 0x1E9E # LATIN CAPITAL LETTER SHARP S, because maps to same as 00DF +## 0x1FD3 # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA; maps same as 0390 +## 0x1FE3 # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA; maps same as 03B0 +# +#LNBREAK: Line Break: \R +#=> generic UTF8 LATIN1 :fast safe +#"\x0D\x0A" # CRLF - Network (Windows) line ending +#\p{VertSpace} +# +#HORIZWS: Horizontal Whitespace: \h \H +#=> generic UTF8 LATIN1 high cp cp_high :fast safe +#\p{HorizSpace} +# +#VERTWS: Vertical Whitespace: \v \V +#=> generic UTF8 high LATIN1 cp cp_high :fast safe +#\p{VertSpace} +# +#XDIGIT: Hexadecimal digits +#=> UTF8 high cp_high :fast +#\p{XDigit} +# +#XPERLSPACE: \p{XPerlSpace} +#=> generic UTF8 high cp_high :fast +#\p{XPerlSpace} +# +#REPLACEMENT: Unicode REPLACEMENT CHARACTER +#=> UTF8 :safe +#0xFFFD +# +#NONCHAR: Non character code points +#=> UTF8 :fast +#\p{Nchar} +# +#SURROGATE: Surrogate characters +#=> UTF8 :fast +#\p{Gc=Cs} +# +#GCB_L: Grapheme_Cluster_Break=L +#=> UTF8 :fast +#\p{_X_GCB_L} +# +#GCB_LV_LVT_V: Grapheme_Cluster_Break=(LV or LVT or V) +#=> UTF8 :fast +#\p{_X_LV_LVT_V} +# +#GCB_Prepend: Grapheme_Cluster_Break=Prepend +#=> UTF8 :fast +#\p{_X_GCB_Prepend} +# +#GCB_RI: Grapheme_Cluster_Break=RI +#=> UTF8 :fast +#\p{_X_RI} +# +#GCB_SPECIAL_BEGIN_START: Grapheme_Cluster_Break=special_begin_starts +#=> UTF8 :fast +#\p{_X_Special_Begin_Start} +# +#GCB_T: Grapheme_Cluster_Break=T +#=> UTF8 :fast +#\p{_X_GCB_T} +# +#GCB_V: Grapheme_Cluster_Break=V +#=> UTF8 :fast +#\p{_X_GCB_V} +# +## This program was run with this enabled, and the results copied to utf8.h; +## then this was commented out because it takes so long to figure out these 2 +## million code points. The results would not change unless utf8.h decides it +## wants a maximum other than 4 bytes, or this program creates better +## optimizations +##UTF8_CHAR: Matches utf8 from 1 to 4 bytes +##=> UTF8 :safe only_ascii_platform +##0x0 - 0x1FFFFF +# +## This hasn't been commented out, because we haven't an EBCDIC platform to run +## it on, and the 3 types of EBCDIC allegedly supported by Perl would have +## different results +##UTF8_CHAR: Matches utf8 from 1 to 5 bytes +##=> UTF8 :safe only_ebcdic_platform +##0x0 - 0x3FFFFF: +# +#QUOTEMETA: Meta-characters that \Q should quote +#=> high :fast +#\p{_Perl_Quotemeta} +# +#PATWS: pattern white space +#=> generic generic_non_low cp : fast safe +#\p{PatWS} MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character => UTF8 :safe -- Perl5 Master Repository
