In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/9d4baee2fcb9d49ba7bbf5618ac57b983caabeae?hp=a6ceea0637411cc48e4e043c7d222d707dd3611a>
- Log ----------------------------------------------------------------- commit 9d4baee2fcb9d49ba7bbf5618ac57b983caabeae Author: Karl Williamson <[email protected]> Date: Tue Jan 22 14:02:27 2013 -0700 Typo in perl5178delta Spotted by Hugo van der Sanden M pod/perl5178delta.pod commit ff56e4f1f932959354fde1f616e4834c066c0181 Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:34:54 2013 -0700 Add deprecation warning for literal PATWS under /x This is explained in the perldelta changes in this commit. We plan to migrate to Unicode's definition of white-space to ignore under /x. That means we should raise a deprecation warning in the meantime if anyone currently uses these characters in such a way as to have the meaning changed when the migration is completed. M pod/perldelta.pod M pod/perldiag.pod M regcomp.c M t/re/reg_mesg.t commit 0d6106aa7a7bf59a333387bc6170353dcd78d3f3 Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:35:15 2013 -0700 regcomp.c: Add macro for generating deprecated warnings This will be used in a future commit M regcomp.c commit 0ca752d9f8c18474b5ffa3e9aa6f26202fafd6dc Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:32:01 2013 -0700 regcomp.c: Add comments; no code changes M regcomp.c commit f3b7b53430f4975a6642e0d7356d1aff5750e23b Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:30:01 2013 -0700 regcharclass.h: Add macro for non-ASCII PATWS This will be used to deprecate uses of non-ASCII Pattern White Space M regcharclass.h M regen/regcharclass.pl commit 61de6bbc5b5e377a625fd4a2522bfccc0d474f5d Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:29:10 2013 -0700 /regen/regcharclass.pl: white-space only; no code changes M regen/regcharclass.pl commit 87894a2431497db1c641e2b44005df8f408ee7e2 Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:27:44 2013 -0700 regen/regcharclass.pl: Add capability This allows one to generate macros that exclude just the ASCII range M regen/regcharclass.pl commit 6d24e9d47fa5b7178bef38c1eab2f30d722a6a55 Author: Karl Williamson <[email protected]> Date: Tue Jan 22 13:23:06 2013 -0700 reg_mesg.t: Remove repetitious boiler plate All the messages have boiler plate that can be removed and inserted by the subroutine that does insertion anyway. M t/re/reg_mesg.t commit 63fbd1cbafa7d7361a515aa4e812d5258fd7114b Author: Karl Williamson <[email protected]> Date: Tue Jan 22 11:40:54 2013 -0700 Move t/lib/warnings/regcomp to t/re/reg_mesg.t reg_mesg.t has better infrastructure to more easily add and maintain these warnings. M t/lib/warnings/regcomp M t/re/reg_mesg.t commit 147508a29c9b40c156a9552802362f4a5d697439 Author: Karl Williamson <[email protected]> Date: Tue Jan 22 11:16:59 2013 -0700 regcomp.c: Change warning category to just deprecated The warnings for \b{ and \B{ were added in the 5.17 series; they are a deprecation warning which should be turned off by that category. One should not have to turn off regular regexp warnings as well to get rid of these. M regcomp.c M t/lib/warnings/regcomp commit af01601c498bb908faba7e2acd3f47d58fd8fc08 Author: Karl Williamson <[email protected]> Date: Tue Jan 22 11:08:27 2013 -0700 reg_mesg.t: Add cpabilities; improve output This adds the capability to have tests that each generate multiple warnings, and it improves the flow so that if a test fails that make moot subsequent tests, those tests are skipped. M t/re/reg_mesg.t commit b42857f309a518eeb4d33fdfedced0ff624ade14 Author: Karl Williamson <[email protected]> Date: Mon Jan 21 19:32:09 2013 -0700 re/reg_mesg.t: White-space only; no code changes M t/re/reg_mesg.t commit 6fe2934bf70cc9cab65e6f38b34271cc39cee31c Author: Karl Williamson <[email protected]> Date: Mon Jan 21 19:25:01 2013 -0700 re/reg_mesg.t: Add tests for suppressing warnings This automatically adds a test for each warning to verify that turning off the warning category works. M t/re/reg_mesg.t ----------------------------------------------------------------------- Summary of changes: pod/perl5178delta.pod | 2 +- pod/perldelta.pod | 15 ++ pod/perldiag.pod | 17 +++ regcharclass.h | 22 +++ regcomp.c | 31 ++++- regen/regcharclass.pl | 23 ++- t/lib/warnings/regcomp | 280 +-------------------------------------- t/re/reg_mesg.t | 352 ++++++++++++++++++++++++++++++++---------------- 8 files changed, 332 insertions(+), 410 deletions(-) diff --git a/pod/perl5178delta.pod b/pod/perl5178delta.pod index 41821fc..a7cfd85 100644 --- a/pod/perl5178delta.pod +++ b/pod/perl5178delta.pod @@ -16,7 +16,7 @@ L<perl5177delta>, which describes differences between 5.17.6 and 5.17.7. =head2 Regular Expression Set Operations -This is an experimental feature to allow matching against the the union, +This is an experimental feature to allow matching against the union, intersection, etc., of sets of code points, similar to L<Unicode::Regex::Set>. It can also be used to extend C</x> processing to [bracketed] character classes, and as a replacement of user-defined diff --git a/pod/perldelta.pod b/pod/perldelta.pod index cf512ad..87e2215 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -53,6 +53,21 @@ an updated module in the L</Modules and Pragmata> section. [ List each deprecation as a =head2 entry ] +=head2 Five additional characters should be escaped in patterns with C</x> + +When a regular expression pattern is compiled with C</x>, Perl treats 6 +characters as white space to ignore, such as SPACE and TAB. However, +Unicode recommends 11 characters be treated thusly. In preparation to +conforming with this in a future Perl version, in the meantime, use of +any of the missing characters will raise a deprecation warning, unless +turned off. The five characters are: +U+0085 NEXT LINE, +U+200E LEFT-TO-RIGHT MARK, +U+200F RIGHT-TO-LEFT MARK, +U+2028 LINE SEPARATOR, +and +U+2029 PARAGRAPH SEPARATOR. + =head1 Performance Enhancements XXX Changes which enhance performance without changing behaviour go here. diff --git a/pod/perldiag.pod b/pod/perldiag.pod index 19aaa55..797bb8e 100644 --- a/pod/perldiag.pod +++ b/pod/perldiag.pod @@ -1797,6 +1797,23 @@ single form when it must operate on them directly. Either you've passed an invalid file specification to Perl, or you've found a case the conversion routines don't handle. Drat. +=item Escape literal pattern white space under /x + +(D deprecated) You compiled a regular expression pattern with C</x> to +ignore white space, and you used, as a literal, one of the characters +that Perl plans to eventually treat as white space. The character must +be escaped somehow, or it will work differently on a future Perl that +does treat it as white space. The easiest way is to insert a backslash +immediately before it, or to enclose it with square brackets. This +change is to bring Perl into conformance with Unicode recommendations. +Here are the five characters that generate this warning: +U+0085 NEXT LINE, +U+200E LEFT-TO-RIGHT MARK, +U+200F RIGHT-TO-LEFT MARK, +U+2028 LINE SEPARATOR, +and +U+2029 PARAGRAPH SEPARATOR. + =item Eval-group in insecure regular expression (F) Perl detected tainted data when trying to compile a regular diff --git a/regcharclass.h b/regcharclass.h index b41fbbb..e51fe64 100644 --- a/regcharclass.h +++ b/regcharclass.h @@ -886,6 +886,28 @@ : 0 ) /*** GENERATED CODE ***/ +#define is_PATWS_non_low(s,is_utf8) \ +( ( is_utf8 ) ? \ + ( ( 0xC2 == ((U8*)s)[0] ) ? \ + ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( ( ( 0xE2 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( ( ((U8*)s)[2] & 0xFE ) == 0x8E || ( ((U8*)s)[2] & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ +: ( 0x85 == ((U8*)s)[0] ) ) + +/*** GENERATED CODE ***/ +#define is_PATWS_non_low_safe(s,e,is_utf8) \ +( ((e)-(s) > 2) ? \ + ( ( is_utf8 ) ? \ + ( ( 0xC2 == ((U8*)s)[0] ) ? \ + ( ( 0x85 == ((U8*)s)[1] ) ? 2 : 0 ) \ + : ( ( ( 0xE2 == ((U8*)s)[0] ) && ( 0x80 == ((U8*)s)[1] ) ) && ( ( ((U8*)s)[2] & 0xFE ) == 0x8E || ( ((U8*)s)[2] & 0xFE ) == 0xA8 ) ) ? 3 : 0 )\ + : ( 0x85 == ((U8*)s)[0] ) ) \ +: ((e)-(s) > 1) ? \ + ( ( is_utf8 ) ? \ + ( ( ( 0xC2 == ((U8*)s)[0] ) && ( 0x85 == ((U8*)s)[1] ) ) ? 2 : 0 ) \ + : ( 0x85 == ((U8*)s)[0] ) ) \ +: ( ((e)-(s) > 0) && ( !( is_utf8 ) ) ) ? ( 0x85 == ((U8*)s)[0] ) : 0 ) + +/*** GENERATED CODE ***/ #define is_PATWS_cp(cp) \ ( ( 0x09 <= cp && cp <= 0x0D ) || ( 0x0D < cp && \ ( 0x20 == cp || ( 0x20 < cp && \ diff --git a/regcomp.c b/regcomp.c index a16b8b3..2c7709a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -548,6 +548,19 @@ static const scan_data_t zero_scan_data = (int)offset, RExC_precomp, RExC_precomp + offset); \ } STMT_END +#define vWARN_dep(loc, m) STMT_START { \ + const IV offset = loc - RExC_precomp; \ + Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION, \ + (int)offset, RExC_precomp, RExC_precomp + offset); \ +} STMT_END + +#define ckWARNdep(loc,m) STMT_START { \ + const IV offset = loc - RExC_precomp; \ + Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \ + m REPORT_LOCATION, \ + (int)offset, RExC_precomp, RExC_precomp + offset); \ +} STMT_END + #define ckWARNregdep(loc,m) STMT_START { \ const IV offset = loc - RExC_precomp; \ Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \ @@ -10251,7 +10264,7 @@ tryagain: FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= SIMPLE; if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') { - ckWARNregdep(RExC_parse, "\"\\b{\" is deprecated; use \"\\b\\{\" instead"); + ckWARNdep(RExC_parse, "\"\\b{\" is deprecated; use \"\\b\\{\" instead"); } goto finish_meta_pat; case 'B': @@ -10265,7 +10278,7 @@ tryagain: FLAGS(ret) = get_regex_charset(RExC_flags); *flagp |= SIMPLE; if (! SIZE_ONLY && (U8) *(RExC_parse + 1) == '{') { - ckWARNregdep(RExC_parse, "\"\\B{\" is deprecated; use \"\\B\\{\" instead"); + ckWARNdep(RExC_parse, "\"\\B{\" is deprecated; use \"\\B\\{\" instead"); } goto finish_meta_pat; @@ -10774,9 +10787,19 @@ tryagain: ckWARN3reg(p + len, "Unrecognized escape \\%.*s passed through", len, p); } goto normal_default; - } + } /* End of switch on '\' */ break; - default: + default: /* A literal character */ + + if (! SIZE_ONLY + && RExC_flags & RXf_PMf_EXTENDED + && ckWARN(WARN_DEPRECATED) + && is_PATWS_non_low(p, UTF)) + { + vWARN_dep(p + ((UTF) ? UTF8SKIP(p) : 1), + "Escape literal pattern white space under /x"); + } + normal_default: if (UTF8_IS_START(*p) && UTF) { STRLEN numlen; diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl index 9c453e2..f5cf315 100755 --- a/regen/regcharclass.pl +++ b/regen/regcharclass.pl @@ -575,9 +575,11 @@ sub generic_optree { } elsif ( $latin1 ) { $else= __cond_join( "!( is_utf8 )", $latin1, $else ); } - my $low= $self->make_trie( 'low', $opt{max_depth} ); - if ( $low ) { - $else= $self->_optree( $low, $test_type, $opt{ret_type}, $else, 0 ); + if ($opt{type} eq 'generic') { + my $low= $self->make_trie( 'low', $opt{max_depth} ); + if ( $low ) { + $else= $self->_optree( $low, $test_type, $opt{ret_type}, $else, 0 ); + } } return $else; @@ -598,7 +600,7 @@ sub length_optree { my ( @size, $method ); - if ( $type eq 'generic' ) { + if ( $type =~ /generic/ ) { $method= 'generic_optree'; my %sizes= ( %{ $self->{size}{low} || {} }, @@ -1176,18 +1178,19 @@ sub make_macro { my $method; if ( $opts{safe} ) { $method= 'length_optree'; - } elsif ( $type eq 'generic' ) { + } elsif ( $type =~ /generic/ ) { $method= 'generic_optree'; } else { $method= 'optree'; } my @args= $type =~ /^cp/ ? 'cp' : 's'; push @args, "e" if $opts{safe}; - push @args, "is_utf8" if $type eq 'generic'; + push @args, "is_utf8" if $type =~ /generic/; push @args, "len" if $ret_type eq 'both'; my $pfx= $ret_type eq 'both' ? 'what_len_' : $ret_type eq 'cp' ? 'what_' : 'is_'; - my $ext= $type eq 'generic' ? '' : '_' . lc( $type ); + my $ext= $type =~ /generic/ ? '' : '_' . lc( $type ); + $ext .= '_non_low' if $type eq 'generic_non_low'; $ext .= "_safe" if $opts{safe}; my $argstr= join ",", @args; my $def_fmt="$pfx$self->{op}$ext%s($argstr)"; @@ -1340,6 +1343,10 @@ if ( !caller ) { # generic generate a macro whose name is 'is_BASE". It has a 2nd, # boolean, parameter which indicates if the first one points to # a UTF-8 string or not. Thus it works in all circumstances. +# generic_non_low generate a macro whose name is 'is_BASE_non_low". It has +# a 2nd, boolean, parameter which indicates if the first one +# points to a UTF-8 string or not. It excludes any ASCII-range +# matches, but otherwise it works in all circumstances. # cp generate a macro whose name is 'is_BASE_cp' and defines a # class that returns true if the UV parameter is a member of the # class; false if not. @@ -1490,5 +1497,5 @@ MULTI_CHAR_FOLD: multi-char strings that are folded to by a single character # 0 => Latin1-only PATWS: pattern white space -=> generic cp : fast safe +=> generic generic_non_low cp : fast safe \p{PatWS} diff --git a/t/lib/warnings/regcomp b/t/lib/warnings/regcomp index 20ee8cf..19b6b06 100644 --- a/t/lib/warnings/regcomp +++ b/t/lib/warnings/regcomp @@ -1,281 +1,3 @@ - XXX Note that t/re/reg_mesg.t might be a better place for these - - regcomp.c AOK - - Quantifier unexpected on zero-length expression [S_study_chunk] - - Useless (%s%c) - %suse /%c modifier [S_reg] - Useless (%sc) - %suse /gc modifier [S_reg] - - - - Strange *+?{} on zero-length expression [S_study_chunk] - /(?=a)?/ - - %.*s matches null string many times [S_regpiece] - $a = "ABC123" ; $a =~ /(?=a)*/' - - /%.127s/: Unrecognized escape \\%c passed through [S_regatom] - $x = '\m' ; /$x/ - - POSIX syntax [%c %c] belongs inside character classes [S_regclass] - - - Character class [:%.*s:] unknown [S_regpposixcc] - - Character class syntax [%c %c] belongs inside character classes [S_regclass] - - /%.127s/: false [] range \"%*.*s\" in regexp [S_regclass] - - /%.127s/: false [] range \"%*.*s\" in regexp [S_regclassutf8] - - /%.127s/: Unrecognized escape \\%c in character class passed through" [S_regclass] - - /%.127s/: Unrecognized escape \\%c in character class passed through" [S_regclassutf8] - - False [] range \"%*.*s\" [S_regclass] + regcomp.c These tests have been moved to t/re/reg_mesg.t __END__ -# regcomp.c [S_regpiece] -use warnings 'regexp' ; -my $a = "ABC123" ; -$a =~ /(?=a)*/ ; -no warnings 'regexp' ; -$a =~ /(?=a)*/ ; -EXPECT -(?=a)* matches null string many times in regex; marked by <-- HERE in m/(?=a)* <-- HERE / at - line 4. -######## -# regcomp.c [S_regatom] -$x = '\m' ; -use warnings 'regexp' ; -$a =~ /a$x/ ; -no warnings 'regexp' ; -$a =~ /a$x/ ; -EXPECT -Unrecognized escape \m passed through in regex; marked by <-- HERE in m/a\m <-- HERE / at - line 4. -######## -# regcomp.c [S_regatom] -# The \q should warn, the \_ should NOT warn. -use warnings 'regexp'; no warnings "deprecated"; -"foo" =~ /\q/; -"foo" =~ /\q{/; -"foo" =~ /a\b{cde/; -"foo" =~ /a\B{cde/; -"bar" =~ /\_/; -no warnings 'regexp'; -"foo" =~ /\q/; -"foo" =~ /\q{/; -"foo" =~ /a\b{cde/; -"foo" =~ /a\B{cde/; -"bar" =~ /\_/; -EXPECT -Unrecognized escape \q passed through in regex; marked by <-- HERE in m/\q <-- HERE / at - line 4. -Unrecognized escape \q{ passed through in regex; marked by <-- HERE in m/\q{ <-- HERE / at - line 5. -"\b{" is deprecated; use "\b\{" instead in regex; marked by <-- HERE in m/a\ <-- HERE b{cde/ at - line 6. -"\B{" is deprecated; use "\B\{" instead in regex; marked by <-- HERE in m/a\ <-- HERE B{cde/ at - line 7. -######## -# regcomp.c [S_regpposixcc S_regclass] -# -use warnings 'regexp' ; -$_ = "" ; -/[:alpha:]/; -/[:zog:]/; -no warnings 'regexp' ; -/[:alpha:]/; -/[:zog:]/; -EXPECT -POSIX syntax [: :] belongs inside character classes in regex; marked by <-- HERE in m/[:alpha:] <-- HERE / at - line 5. -POSIX syntax [: :] belongs inside character classes in regex; marked by <-- HERE in m/[:zog:] <-- HERE / at - line 6. -######## -# regcomp.c [S_regclass] -# -use warnings 'regexp' ; -$_ = "" ; -/[.zog.]/; -no warnings 'regexp' ; -/[.zog.]/; -EXPECT -POSIX syntax [. .] belongs inside character classes in regex; marked by <-- HERE in m/[.zog.] <-- HERE / at - line 5. -######## -# regcomp.c [S_regclass] -$_ = ""; -use warnings 'regexp' ; -/[a-b]/; -/[a-\d]/; -/[\d-b]/; -/[\s-\d]/; -/[\d-\s]/; -/[a-[:digit:]]/; -/[[:digit:]-b]/; -/[[:alpha:]-[:digit:]]/; -/[[:digit:]-[:alpha:]]/; -no warnings 'regexp' ; -/[a-b]/; -/[a-\d]/; -/[\d-b]/; -/[\s-\d]/; -/[\d-\s]/; -/[a-[:digit:]]/; -/[[:digit:]-b]/; -/[[:alpha:]-[:digit:]]/; -/[[:digit:]-[:alpha:]]/; -EXPECT -False [] range "a-\d" in regex; marked by <-- HERE in m/[a-\d <-- HERE ]/ at - line 5. -False [] range "\d-" in regex; marked by <-- HERE in m/[\d- <-- HERE b]/ at - line 6. -False [] range "\s-" in regex; marked by <-- HERE in m/[\s- <-- HERE \d]/ at - line 7. -False [] range "\d-" in regex; marked by <-- HERE in m/[\d- <-- HERE \s]/ at - line 8. -False [] range "a-[:digit:]" in regex; marked by <-- HERE in m/[a-[:digit:] <-- HERE ]/ at - line 9. -False [] range "[:digit:]-" in regex; marked by <-- HERE in m/[[:digit:]- <-- HERE b]/ at - line 10. -False [] range "[:alpha:]-" in regex; marked by <-- HERE in m/[[:alpha:]- <-- HERE [:digit:]]/ at - line 11. -False [] range "[:digit:]-" in regex; marked by <-- HERE in m/[[:digit:]- <-- HERE [:alpha:]]/ at - line 12. -######## -# regcomp.c [S_regclassutf8] -BEGIN { - if (ord("\t") == 5) { - print "SKIPPED\n# ebcdic regular expression ranges differ."; - exit 0; - } -} -use utf8; -$_ = ""; -use warnings 'regexp' ; -/[a-b]/; -/[a-\d]/; -/[\d-b]/; -/[\s-\d]/; -/[\d-\s]/; -/[a-[:digit:]]/; -/[[:digit:]-b]/; -/[[:alpha:]-[:digit:]]/; -/[[:digit:]-[:alpha:]]/; -no warnings 'regexp' ; -/[a-b]/; -/[a-\d]/; -/[\d-b]/; -/[\s-\d]/; -/[\d-\s]/; -/[a-[:digit:]]/; -/[[:digit:]-b]/; -/[[:alpha:]-[:digit:]]/; -/[[:digit:]-[:alpha:]]/; -EXPECT -False [] range "a-\d" in regex; marked by <-- HERE in m/[a-\d <-- HERE ]/ at - line 12. -False [] range "\d-" in regex; marked by <-- HERE in m/[\d- <-- HERE b]/ at - line 13. -False [] range "\s-" in regex; marked by <-- HERE in m/[\s- <-- HERE \d]/ at - line 14. -False [] range "\d-" in regex; marked by <-- HERE in m/[\d- <-- HERE \s]/ at - line 15. -False [] range "a-[:digit:]" in regex; marked by <-- HERE in m/[a-[:digit:] <-- HERE ]/ at - line 16. -False [] range "[:digit:]-" in regex; marked by <-- HERE in m/[[:digit:]- <-- HERE b]/ at - line 17. -False [] range "[:alpha:]-" in regex; marked by <-- HERE in m/[[:alpha:]- <-- HERE [:digit:]]/ at - line 18. -False [] range "[:digit:]-" in regex; marked by <-- HERE in m/[[:digit:]- <-- HERE [:alpha:]]/ at - line 19. -######## -# regcomp.c [S_regclass S_regclassutf8] -use warnings 'regexp' ; -$a =~ /[a\zb]/ ; -no warnings 'regexp' ; -$a =~ /[a\zb]/ ; -EXPECT -Unrecognized escape \z in character class passed through in regex; marked by <-- HERE in m/[a\z <-- HERE b]/ at - line 3. - -######## -# regcomp.c [S_reg] -use warnings 'regexp' ; -$a = qr/(?c)/; -$a = qr/(?-c)/; -$a = qr/(?g)/; -$a = qr/(?-g)/; -$a = qr/(?o)/; -$a = qr/(?-o)/; -$a = qr/(?g-o)/; -$a = qr/(?g-c)/; -$a = qr/(?o-cg)/; # (?c) means (?g) error won't be thrown -$a = qr/(?ogc)/; -no warnings 'regexp' ; -$a = qr/(?c)/; -$a = qr/(?-c)/; -$a = qr/(?g)/; -$a = qr/(?-g)/; -$a = qr/(?o)/; -$a = qr/(?-o)/; -$a = qr/(?g-o)/; -$a = qr/(?g-c)/; -$a = qr/(?o-cg)/; # (?c) means (?g) error won't be thrown -$a = qr/(?ogc)/; -#EXPECT -EXPECT -Useless (?c) - use /gc modifier in regex; marked by <-- HERE in m/(?c <-- HERE )/ at - line 3. -Useless (?-c) - don't use /gc modifier in regex; marked by <-- HERE in m/(?-c <-- HERE )/ at - line 4. -Useless (?g) - use /g modifier in regex; marked by <-- HERE in m/(?g <-- HERE )/ at - line 5. -Useless (?-g) - don't use /g modifier in regex; marked by <-- HERE in m/(?-g <-- HERE )/ at - line 6. -Useless (?o) - use /o modifier in regex; marked by <-- HERE in m/(?o <-- HERE )/ at - line 7. -Useless (?-o) - don't use /o modifier in regex; marked by <-- HERE in m/(?-o <-- HERE )/ at - line 8. -Useless (?g) - use /g modifier in regex; marked by <-- HERE in m/(?g <-- HERE -o)/ at - line 9. -Useless (?-o) - don't use /o modifier in regex; marked by <-- HERE in m/(?g-o <-- HERE )/ at - line 9. -Useless (?g) - use /g modifier in regex; marked by <-- HERE in m/(?g <-- HERE -c)/ at - line 10. -Useless (?-c) - don't use /gc modifier in regex; marked by <-- HERE in m/(?g-c <-- HERE )/ at - line 10. -Useless (?o) - use /o modifier in regex; marked by <-- HERE in m/(?o <-- HERE -cg)/ at - line 11. -Useless (?-c) - don't use /gc modifier in regex; marked by <-- HERE in m/(?o-c <-- HERE g)/ at - line 11. -Useless (?o) - use /o modifier in regex; marked by <-- HERE in m/(?o <-- HERE gc)/ at - line 12. -Useless (?g) - use /g modifier in regex; marked by <-- HERE in m/(?og <-- HERE c)/ at - line 12. -Useless (?c) - use /gc modifier in regex; marked by <-- HERE in m/(?ogc <-- HERE )/ at - line 12. -######## -# regcomp.c [S_regatom] -$a = qr/\o{/; -EXPECT -Missing right brace on \o{ in regex; marked by <-- HERE in m/\o{ <-- HERE / at - line 2. -######## -# regcomp.c [S_regatom] -$a = qr/\o/; -EXPECT -Missing braces on \o{} in regex; marked by <-- HERE in m/\o <-- HERE / at - line 2. -######## -# regcomp.c [S_regatom] -$a = qr/\o{}/; -EXPECT -Number with no digits in regex; marked by <-- HERE in m/\o{} <-- HERE / at - line 2. -######## -# regcomp.c [S_regclass] -$a = qr/[\o{]/; -EXPECT -Missing right brace on \o{ in regex; marked by <-- HERE in m/[\o{ <-- HERE ]/ at - line 2. -######## -# regcomp.c [S_regclass] -$a = qr/[\o]/; -EXPECT -Missing braces on \o{} in regex; marked by <-- HERE in m/[\o <-- HERE ]/ at - line 2. -######## -# regcomp.c [S_regclass] -$a = qr/[\o{}]/; -EXPECT -Number with no digits in regex; marked by <-- HERE in m/[\o{} <-- HERE ]/ at - line 2. -######## -# regcomp.c [S_regclass] -use warnings 'regexp' ; -$a = qr/[\8\9]/; -$a = qr/[\_\0]/; # Should have no warnings on this and the remainder of this test -$a = qr/[\07]/; -$a = qr/[\006]/; -$a = qr/[\0005]/; -no warnings 'regexp' ; -$a = qr/[\8\9]/; -EXPECT -Unrecognized escape \8 in character class passed through in regex; marked by <-- HERE in m/[\8 <-- HERE \9]/ at - line 3. -Unrecognized escape \9 in character class passed through in regex; marked by <-- HERE in m/[\8\9 <-- HERE ]/ at - line 3. -######## -# regcomp.c [Perl_re_compile] -$a = qr/(?^-i:foo)/; -EXPECT -Sequence (?^-...) not recognized in regex; marked by <-- HERE in m/(?^- <-- HERE i:foo)/ at - line 2. -######## -# regcomp.c [S_regatom] -use warnings 'regexp' ; -$a = qr/\87/; -$a = qr/a\87/; -$a = qr/a\97/; -no warnings 'regexp' ; -$a = qr/\87/; -$a = qr/a\87/; -$a = qr/a\97/; -EXPECT -Unrecognized escape \8 passed through in regex; marked by <-- HERE in m/\8 <-- HERE 7/ at - line 3. -Unrecognized escape \8 passed through in regex; marked by <-- HERE in m/a\8 <-- HERE 7/ at - line 4. -Unrecognized escape \9 passed through in regex; marked by <-- HERE in m/a\9 <-- HERE 7/ at - line 5. diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t index 30bc2d6..b514320 100644 --- a/t/re/reg_mesg.t +++ b/t/re/reg_mesg.t @@ -15,12 +15,29 @@ use strict; ## arrays below. The {#} is a meta-marker -- it marks where the marker should ## go. ## +## Returns empty string if that is what is expected. Otherwise, handles +## either a scalar, turning it into a single element array; or a ref to an +## array, adjusting each element. If called in array context, returns an +## array, otherwise the join of all elements + sub fixup_expect { - my $expect = shift; - $expect =~ s/{\#}/<-- HERE/; - $expect =~ s/{\#}/ <-- HERE /; - $expect .= " at "; - return $expect; + my $expect_ref = shift; + return if $expect_ref eq ""; + + my @expect; + if (ref $expect_ref) { + @expect = @$expect_ref; + } + else { + @expect = $expect_ref; + } + + foreach my $element (@expect) { + $element =~ s/{\#}/in regex; marked by <-- HERE in/; + $element =~ s/{\#}/ <-- HERE /; + $element .= " at "; + } + return wantarray ? @expect : join "", @expect; } my $inf_m1 = ($Config::Config{reg_infty} || 32767) - 1; @@ -31,147 +48,215 @@ my $inf_p1 = $inf_m1 + 2; ## my @death = ( - '/[[=foo=]]/' => 'POSIX syntax [= =] is reserved for future extensions in regex; marked by {#} in m/[[=foo=]{#}]/', + '/[[=foo=]]/' => 'POSIX syntax [= =] is reserved for future extensions {#} m/[[=foo=]{#}]/', '/(?<= .*)/' => 'Variable length lookbehind not implemented in regex m/(?<= .*)/', '/(?<= x{1000})/' => 'Lookbehind longer than 255 not implemented in regex m/(?<= x{1000})/', - '/(?@)/' => 'Sequence (?@...) not implemented in regex; marked by {#} in m/(?@{#})/', + '/(?@)/' => 'Sequence (?@...) not implemented {#} m/(?@{#})/', '/(?{ 1/' => 'Missing right curly or square bracket', - '/(?(1x))/' => 'Switch condition not recognized in regex; marked by {#} in m/(?(1x{#}))/', + '/(?(1x))/' => 'Switch condition not recognized {#} m/(?(1x{#}))/', - '/(?(1)x|y|z)/' => 'Switch (?(condition)... contains too many branches in regex; marked by {#} in m/(?(1)x|y|{#}z)/', + '/(?(1)x|y|z)/' => 'Switch (?(condition)... contains too many branches {#} m/(?(1)x|y|{#}z)/', - '/(?(x)y|x)/' => 'Unknown switch condition (?(x) in regex; marked by {#} in m/(?({#}x)y|x)/', + '/(?(x)y|x)/' => 'Unknown switch condition (?(x) {#} m/(?({#}x)y|x)/', - '/(?/' => 'Sequence (? incomplete in regex; marked by {#} in m/(?{#}/', + '/(?/' => 'Sequence (? incomplete {#} m/(?{#}/', - '/(?;x/' => 'Sequence (?;...) not recognized in regex; marked by {#} in m/(?;{#}x/', - '/(?<;x/' => 'Group name must start with a non-digit word character in regex; marked by {#} in m/(?<;{#}x/', - '/(?\ix/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}ix/', - '/(?\mx/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}mx/', - '/(?\:x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}:x/', - '/(?\=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}=x/', - '/(?\!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}!x/', - '/(?\<=x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<=x/', - '/(?\<!x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}<!x/', - '/(?\>x/' => 'Sequence (?\...) not recognized in regex; marked by {#} in m/(?\{#}>x/', - '/(?^-i:foo)/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i:foo)/', - '/(?^-i)foo/' => 'Sequence (?^-...) not recognized in regex; marked by {#} in m/(?^-{#}i)foo/', - '/(?^d:foo)/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#}:foo)/', - '/(?^d)foo/' => 'Sequence (?^d...) not recognized in regex; marked by {#} in m/(?^d{#})foo/', - '/(?^lu:foo)/' => 'Regexp modifiers "l" and "u" are mutually exclusive in regex; marked by {#} in m/(?^lu{#}:foo)/', - '/(?^lu)foo/' => 'Regexp modifiers "l" and "u" are mutually exclusive in regex; marked by {#} in m/(?^lu{#})foo/', -'/(?da:foo)/' => 'Regexp modifiers "d" and "a" are mutually exclusive in regex; marked by {#} in m/(?da{#}:foo)/', -'/(?lil:foo)/' => 'Regexp modifier "l" may not appear twice in regex; marked by {#} in m/(?lil{#}:foo)/', -'/(?aaia:foo)/' => 'Regexp modifier "a" may appear a maximum of twice in regex; marked by {#} in m/(?aaia{#}:foo)/', -'/(?i-l:foo)/' => 'Regexp modifier "l" may not appear after the "-" in regex; marked by {#} in m/(?i-l{#}:foo)/', + '/(?;x/' => 'Sequence (?;...) not recognized {#} m/(?;{#}x/', + '/(?<;x/' => 'Group name must start with a non-digit word character {#} m/(?<;{#}x/', + '/(?\ix/' => 'Sequence (?\...) not recognized {#} m/(?\{#}ix/', + '/(?\mx/' => 'Sequence (?\...) not recognized {#} m/(?\{#}mx/', + '/(?\:x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}:x/', + '/(?\=x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}=x/', + '/(?\!x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}!x/', + '/(?\<=x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}<=x/', + '/(?\<!x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}<!x/', + '/(?\>x/' => 'Sequence (?\...) not recognized {#} m/(?\{#}>x/', + '/(?^-i:foo)/' => 'Sequence (?^-...) not recognized {#} m/(?^-{#}i:foo)/', + '/(?^-i)foo/' => 'Sequence (?^-...) not recognized {#} m/(?^-{#}i)foo/', + '/(?^d:foo)/' => 'Sequence (?^d...) not recognized {#} m/(?^d{#}:foo)/', + '/(?^d)foo/' => 'Sequence (?^d...) not recognized {#} m/(?^d{#})foo/', + '/(?^lu:foo)/' => 'Regexp modifiers "l" and "u" are mutually exclusive {#} m/(?^lu{#}:foo)/', + '/(?^lu)foo/' => 'Regexp modifiers "l" and "u" are mutually exclusive {#} m/(?^lu{#})foo/', +'/(?da:foo)/' => 'Regexp modifiers "d" and "a" are mutually exclusive {#} m/(?da{#}:foo)/', +'/(?lil:foo)/' => 'Regexp modifier "l" may not appear twice {#} m/(?lil{#}:foo)/', +'/(?aaia:foo)/' => 'Regexp modifier "a" may appear a maximum of twice {#} m/(?aaia{#}:foo)/', +'/(?i-l:foo)/' => 'Regexp modifier "l" may not appear after the "-" {#} m/(?i-l{#}:foo)/', - '/((x)/' => 'Unmatched ( in regex; marked by {#} in m/({#}(x)/', + '/((x)/' => 'Unmatched ( {#} m/({#}(x)/', - "/x{$inf_p1}/" => "Quantifier in {,} bigger than $inf_m1 in regex; marked by {#} in m/x{{#}$inf_p1}/", + "/x{$inf_p1}/" => "Quantifier in {,} bigger than $inf_m1 {#} m/x{{#}$inf_p1}/", - '/x**/' => 'Nested quantifiers in regex; marked by {#} in m/x**{#}/', + '/x**/' => 'Nested quantifiers {#} m/x**{#}/', - '/x[/' => 'Unmatched [ in regex; marked by {#} in m/x[{#}/', + '/x[/' => 'Unmatched [ {#} m/x[{#}/', - '/*/', => 'Quantifier follows nothing in regex; marked by {#} in m/*{#}/', + '/*/', => 'Quantifier follows nothing {#} m/*{#}/', - '/\p{x/' => 'Missing right brace on \p{} in regex; marked by {#} in m/\p{{#}x/', + '/\p{x/' => 'Missing right brace on \p{} {#} m/\p{{#}x/', - '/[\p{x]/' => 'Missing right brace on \p{} in regex; marked by {#} in m/[\p{{#}x]/', + '/[\p{x]/' => 'Missing right brace on \p{} {#} m/[\p{{#}x]/', - '/(x)\2/' => 'Reference to nonexistent group in regex; marked by {#} in m/(x)\2{#}/', + '/(x)\2/' => 'Reference to nonexistent group {#} m/(x)\2{#}/', 'my $m = "\\\"; $m =~ $m', => 'Trailing \ in regex m/\/', - '/\x{1/' => 'Missing right brace on \x{} in regex; marked by {#} in m/\x{1{#}/', - '/\x{X/' => 'Missing right brace on \x{} in regex; marked by {#} in m/\x{{#}X/', - - '/[\x{X]/' => 'Missing right brace on \x{} in regex; marked by {#} in m/[\x{{#}X]/', - '/[\x{A]/' => 'Missing right brace on \x{} in regex; marked by {#} in m/[\x{A{#}]/', - - '/\o{1/' => 'Missing right brace on \o{ in regex; marked by {#} in m/\o{1{#}/', - '/\o{X/' => 'Missing right brace on \o{ in regex; marked by {#} in m/\o{{#}X/', - - '/[\o{X]/' => 'Missing right brace on \o{ in regex; marked by {#} in m/[\o{{#}X]/', - '/[\o{7]/' => 'Missing right brace on \o{ in regex; marked by {#} in m/[\o{7{#}]/', - - '/[[:barf:]]/' => 'POSIX class [:barf:] unknown in regex; marked by {#} in m/[[:barf:]{#}]/', - - '/[[=barf=]]/' => 'POSIX syntax [= =] is reserved for future extensions in regex; marked by {#} in m/[[=barf=]{#}]/', - - '/[[.barf.]]/' => 'POSIX syntax [. .] is reserved for future extensions in regex; marked by {#} in m/[[.barf.]{#}]/', - - '/[z-a]/' => 'Invalid [] range "z-a" in regex; marked by {#} in m/[z-a{#}]/', - - '/\p/' => 'Empty \p{} in regex; marked by {#} in m/\p{#}/', - - '/\P{}/' => 'Empty \P{} in regex; marked by {#} in m/\P{{#}}/', - '/(?[[[:word]]])/' => "Unmatched ':' in POSIX class in regex; marked by {#} in m/(?[[[:word{#}]]])/", - '/(?[[:word]])/' => "Unmatched ':' in POSIX class in regex; marked by {#} in m/(?[[:word{#}]])/", - '/(?[[[:digit: ])/' => "Unmatched '[' in POSIX class in regex; marked by {#} in m/(?[[[:digit:{#} ])/", - '/(?[[:digit: ])/' => "Unmatched '[' in POSIX class in regex; marked by {#} in m/(?[[:digit:{#} ])/", - '/(?[[[::]]])/' => "POSIX class [::] unknown in regex; marked by {#} in m/(?[[[::]{#}]])/", - '/(?[[[:w:]]])/' => "POSIX class [:w:] unknown in regex; marked by {#} in m/(?[[[:w:]{#}]])/", - '/(?[[:w:]])/' => "POSIX class [:w:] unknown in regex; marked by {#} in m/(?[[:w:]{#}])/", - '/(?[a])/' => 'Unexpected character in regex; marked by {#} in m/(?[a{#}])/', - '/(?[\t])/l' => '(?[...]) not valid in locale in regex; marked by {#} in m/(?[{#}\t])/', - '/(?[ + \t ])/' => 'Unexpected binary operator \'+\' with no preceding operand in regex; marked by {#} in m/(?[ +{#} \t ])/', - '/(?[ \cK - ( + \t ) ])/' => 'Unexpected binary operator \'+\' with no preceding operand in regex; marked by {#} in m/(?[ \cK - ( +{#} \t ) ])/', - '/(?[ \cK ( \t ) ])/' => 'Unexpected \'(\' with no preceding operator in regex; marked by {#} in m/(?[ \cK ({#} \t ) ])/', - '/(?[ \cK \t ])/' => 'Operand with no preceding operator in regex; marked by {#} in m/(?[ \cK \t{#} ])/', - '/(?[ \0004 ])/' => 'Need exactly 3 octal digits in regex; marked by {#} in m/(?[ \0004 {#}])/', - '/(?[ \05 ])/' => 'Need exactly 3 octal digits in regex; marked by {#} in m/(?[ \05 {#}])/', - '/(?[ \o{1038} ])/' => 'Non-octal character in regex; marked by {#} in m/(?[ \o{1038{#}} ])/', - '/(?[ \o{} ])/' => 'Number with no digits in regex; marked by {#} in m/(?[ \o{}{#} ])/', - '/(?[ \x{defg} ])/' => 'Non-hex character in regex; marked by {#} in m/(?[ \x{defg{#}} ])/', - '/(?[ \xabcdef ])/' => 'Use \\x{...} for more than two hex characters in regex; marked by {#} in m/(?[ \xabc{#}def ])/', - '/(?[ \x{} ])/' => 'Number with no digits in regex; marked by {#} in m/(?[ \x{}{#} ])/', - '/(?[ \cK + ) ])/' => 'Unexpected \')\' in regex; marked by {#} in m/(?[ \cK + ){#} ])/', - '/(?[ \cK + ])/' => 'Incomplete expression within \'(?[ ])\' in regex; marked by {#} in m/(?[ \cK + {#}])/', - '/(?[ \p{foo} ])/' => 'Property \'foo\' is unknown in regex; marked by {#} in m/(?[ \p{foo}{#} ])/', - '/(?[ \p{ foo = bar } ])/' => 'Property \'foo = bar\' is unknown in regex; marked by {#} in m/(?[ \p{ foo = bar }{#} ])/', - '/(?[ \8 ])/' => 'Unrecognized escape \8 in character class in regex; marked by {#} in m/(?[ \8{#} ])/', + '/\x{1/' => 'Missing right brace on \x{} {#} m/\x{1{#}/', + '/\x{X/' => 'Missing right brace on \x{} {#} m/\x{{#}X/', + + '/[\x{X]/' => 'Missing right brace on \x{} {#} m/[\x{{#}X]/', + '/[\x{A]/' => 'Missing right brace on \x{} {#} m/[\x{A{#}]/', + + '/\o{1/' => 'Missing right brace on \o{ {#} m/\o{1{#}/', + '/\o{X/' => 'Missing right brace on \o{ {#} m/\o{{#}X/', + + '/[\o{X]/' => 'Missing right brace on \o{ {#} m/[\o{{#}X]/', + '/[\o{7]/' => 'Missing right brace on \o{ {#} m/[\o{7{#}]/', + + '/[[:barf:]]/' => 'POSIX class [:barf:] unknown {#} m/[[:barf:]{#}]/', + + '/[[=barf=]]/' => 'POSIX syntax [= =] is reserved for future extensions {#} m/[[=barf=]{#}]/', + + '/[[.barf.]]/' => 'POSIX syntax [. .] is reserved for future extensions {#} m/[[.barf.]{#}]/', + + '/[z-a]/' => 'Invalid [] range "z-a" {#} m/[z-a{#}]/', + + '/\p/' => 'Empty \p{} {#} m/\p{#}/', + + '/\P{}/' => 'Empty \P{} {#} m/\P{{#}}/', + '/(?[[[:word]]])/' => "Unmatched ':' in POSIX class {#} m/(?[[[:word{#}]]])/", + '/(?[[:word]])/' => "Unmatched ':' in POSIX class {#} m/(?[[:word{#}]])/", + '/(?[[[:digit: ])/' => "Unmatched '[' in POSIX class {#} m/(?[[[:digit:{#} ])/", + '/(?[[:digit: ])/' => "Unmatched '[' in POSIX class {#} m/(?[[:digit:{#} ])/", + '/(?[[[::]]])/' => "POSIX class [::] unknown {#} m/(?[[[::]{#}]])/", + '/(?[[[:w:]]])/' => "POSIX class [:w:] unknown {#} m/(?[[[:w:]{#}]])/", + '/(?[[:w:]])/' => "POSIX class [:w:] unknown {#} m/(?[[:w:]{#}])/", + '/(?[a])/' => 'Unexpected character {#} m/(?[a{#}])/', + '/(?[\t])/l' => '(?[...]) not valid in locale {#} m/(?[{#}\t])/', + '/(?[ + \t ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/(?[ +{#} \t ])/', + '/(?[ \cK - ( + \t ) ])/' => 'Unexpected binary operator \'+\' with no preceding operand {#} m/(?[ \cK - ( +{#} \t ) ])/', + '/(?[ \cK ( \t ) ])/' => 'Unexpected \'(\' with no preceding operator {#} m/(?[ \cK ({#} \t ) ])/', + '/(?[ \cK \t ])/' => 'Operand with no preceding operator {#} m/(?[ \cK \t{#} ])/', + '/(?[ \0004 ])/' => 'Need exactly 3 octal digits {#} m/(?[ \0004 {#}])/', + '/(?[ \05 ])/' => 'Need exactly 3 octal digits {#} m/(?[ \05 {#}])/', + '/(?[ \o{1038} ])/' => 'Non-octal character {#} m/(?[ \o{1038{#}} ])/', + '/(?[ \o{} ])/' => 'Number with no digits {#} m/(?[ \o{}{#} ])/', + '/(?[ \x{defg} ])/' => 'Non-hex character {#} m/(?[ \x{defg{#}} ])/', + '/(?[ \xabcdef ])/' => 'Use \\x{...} for more than two hex characters {#} m/(?[ \xabc{#}def ])/', + '/(?[ \x{} ])/' => 'Number with no digits {#} m/(?[ \x{}{#} ])/', + '/(?[ \cK + ) ])/' => 'Unexpected \')\' {#} m/(?[ \cK + ){#} ])/', + '/(?[ \cK + ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ \cK + {#}])/', + '/(?[ \p{foo} ])/' => 'Property \'foo\' is unknown {#} m/(?[ \p{foo}{#} ])/', + '/(?[ \p{ foo = bar } ])/' => 'Property \'foo = bar\' is unknown {#} m/(?[ \p{ foo = bar }{#} ])/', + '/(?[ \8 ])/' => 'Unrecognized escape \8 in character class {#} m/(?[ \8{#} ])/', '/(?[ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ]/', '/(?[ [ \t ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ \t ]/', '/(?[ \t ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ \t ] ]/', '/(?[ [ ] ]/' => 'Syntax error in (?[...]) in regex m/(?[ [ ] ]/', '/(?[ \t + \e # This was supposed to be a comment ])/' => 'Syntax error in (?[...]) in regex m/(?[ \t + \e # This was supposed to be a comment ])/', - '/(?[ ])/' => 'Incomplete expression within \'(?[ ])\' in regex; marked by {#} in m/(?[ {#}])/', - 'm/(?[[a-\d]])/' => 'False [] range "a-\d" in regex; marked by {#} in m/(?[[a-\d{#}]])/', - 'm/(?[[\w-x]])/' => 'False [] range "\w-" in regex; marked by {#} in m/(?[[\w-{#}x]])/', - 'm/(?[[a-\pM]])/' => 'False [] range "a-\pM" in regex; marked by {#} in m/(?[[a-\pM{#}]])/', - 'm/(?[[\pM-x]])/' => 'False [] range "\pM-" in regex; marked by {#} in m/(?[[\pM-{#}x]])/', - 'm/(?[[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]])/' => '\N{} in character class restricted to one character in regex; marked by {#} in m/(?[[\N{U+100.300{#}}]])/', + '/(?[ ])/' => 'Incomplete expression within \'(?[ ])\' {#} m/(?[ {#}])/', + 'm/(?[[a-\d]])/' => 'False [] range "a-\d" {#} m/(?[[a-\d{#}]])/', + 'm/(?[[\w-x]])/' => 'False [] range "\w-" {#} m/(?[[\w-{#}x]])/', + 'm/(?[[a-\pM]])/' => 'False [] range "a-\pM" {#} m/(?[[a-\pM{#}]])/', + 'm/(?[[\pM-x]])/' => 'False [] range "\pM-" {#} m/(?[[\pM-{#}x]])/', + 'm/(?[[\N{LATIN CAPITAL LETTER A WITH MACRON AND GRAVE}]])/' => '\N{} in character class restricted to one character {#} m/(?[[\N{U+100.300{#}}]])/', + 'm/\o{/' => 'Missing right brace on \o{ {#} m/\o{{#}/', + 'm/\o/' => 'Missing braces on \o{} {#} m/\o{#}/', + 'm/\o{}/' => 'Number with no digits {#} m/\o{}{#}/', + 'm/[\o{]/' => 'Missing right brace on \o{ {#} m/[\o{{#}]/', + 'm/[\o]/' => 'Missing braces on \o{} {#} m/[\o{#}]/', + 'm/[\o{}]/' => 'Number with no digits {#} m/[\o{}{#}]/', + 'm/(?^-i:foo)/' => 'Sequence (?^-...) not recognized {#} m/(?^-{#}i:foo)/', ); # Tests involving a user-defined charnames translator are in pat_advanced.t +# In the following arrays of warnings, the value can be an array of things to +# expect. If the empty string, it means no warning should be raised. + ## -## Key-value pairs of code/error of code that should have non-fatal warnings. +## Key-value pairs of code/error of code that should have non-fatal regexp warnings. ## my @warning = ( - 'm/\b*/' => '\b* matches null string many times in regex; marked by {#} in m/\b*{#}/', - - 'm/[:blank:]/' => 'POSIX syntax [: :] belongs inside character classes in regex; marked by {#} in m/[:blank:]{#}/', - - "m'[\\y]'" => 'Unrecognized escape \y in character class passed through in regex; marked by {#} in m/[\y{#}]/', - - 'm/[a-\d]/' => 'False [] range "a-\d" in regex; marked by {#} in m/[a-\d{#}]/', - 'm/[\w-x]/' => 'False [] range "\w-" in regex; marked by {#} in m/[\w-{#}x]/', - 'm/[a-\pM]/' => 'False [] range "a-\pM" in regex; marked by {#} in m/[a-\pM{#}]/', - 'm/[\pM-x]/' => 'False [] range "\pM-" in regex; marked by {#} in m/[\pM-{#}x]/', - "m'\\y'" => 'Unrecognized escape \y passed through in regex; marked by {#} in m/\y{#}/', - '/x{3,1}/' => 'Quantifier {n,m} with n > m can\'t match in regex; marked by {#} in m/x{3,1}{#}/', - '/\08/' => '\'\08\' resolved to \'\o{0}8\' in regex; marked by {#} in m/\08{#}/', - '/\018/' => '\'\018\' resolved to \'\o{1}8\' in regex; marked by {#} in m/\018{#}/', - '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' in regex; marked by {#} in m/[\08{#}]/', - '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' in regex; marked by {#} in m/[\018{#}]/', - '/(?[ \t ])/' => 'The regex_sets feature is experimental in regex; marked by {#} in m/(?[{#} \t ])/', + 'm/\b*/' => '\b* matches null string many times {#} m/\b*{#}/', + + 'm/[:blank:]/' => 'POSIX syntax [: :] belongs inside character classes {#} m/[:blank:]{#}/', + + "m'[\\y]'" => 'Unrecognized escape \y in character class passed through {#} m/[\y{#}]/', + + 'm/[a-\d]/' => 'False [] range "a-\d" {#} m/[a-\d{#}]/', + 'm/[\w-x]/' => 'False [] range "\w-" {#} m/[\w-{#}x]/', + 'm/[a-\pM]/' => 'False [] range "a-\pM" {#} m/[a-\pM{#}]/', + 'm/[\pM-x]/' => 'False [] range "\pM-" {#} m/[\pM-{#}x]/', + "m'\\y'" => 'Unrecognized escape \y passed through {#} m/\y{#}/', + '/x{3,1}/' => 'Quantifier {n,m} with n > m can\'t match {#} m/x{3,1}{#}/', + '/\08/' => '\'\08\' resolved to \'\o{0}8\' {#} m/\08{#}/', + '/\018/' => '\'\018\' resolved to \'\o{1}8\' {#} m/\018{#}/', + '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' {#} m/[\08{#}]/', + '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' {#} m/[\018{#}]/', + '/\87/' => 'Unrecognized escape \8 passed through {#} m/\8{#}7/', + '/a\87/' => 'Unrecognized escape \8 passed through {#} m/a\8{#}7/', + '/a\97/' => 'Unrecognized escape \9 passed through {#} m/a\9{#}7/', + '/(?=a)*/' => '(?=a)* matches null string many times {#} m/(?=a)*{#}/', + 'my $x = \'\m\'; qr/a$x/' => 'Unrecognized escape \m passed through {#} m/a\m{#}/', + '/\q/' => 'Unrecognized escape \q passed through {#} m/\q{#}/', + '/\q{/' => 'Unrecognized escape \q{ passed through {#} m/\q{{#}/', + '/(?=a){1,3}/' => 'Quantifier unexpected on zero-length expression {#} m/(?=a){1,3}{#}/', + '/\_/' => "", + '/[\_\0]/' => "", + '/[\07]/' => "", + '/[\006]/' => "", + '/[\0005]/' => "", + '/[\8\9]/' => ['Unrecognized escape \8 in character class passed through {#} m/[\8{#}\9]/', + 'Unrecognized escape \9 in character class passed through {#} m/[\8\9{#}]/', + ], + '/[:alpha:]/' => 'POSIX syntax [: :] belongs inside character classes {#} m/[:alpha:]{#}/', + '/[:zog:]/' => 'POSIX syntax [: :] belongs inside character classes {#} m/[:zog:]{#}/', + '/[.zog.]/' => 'POSIX syntax [. .] belongs inside character classes {#} m/[.zog.]{#}/', + '/[a-b]/' => "", + '/[a-\d]/' => 'False [] range "a-\d" {#} m/[a-\d{#}]/', + '/[\d-b]/' => 'False [] range "\d-" {#} m/[\d-{#}b]/', + '/[\s-\d]/' => 'False [] range "\s-" {#} m/[\s-{#}\d]/', + '/[\d-\s]/' => 'False [] range "\d-" {#} m/[\d-{#}\s]/', + '/[a-[:digit:]]/' => 'False [] range "a-[:digit:]" {#} m/[a-[:digit:]{#}]/', + '/[[:digit:]-b]/' => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}b]/', + '/[[:alpha:]-[:digit:]]/' => 'False [] range "[:alpha:]-" {#} m/[[:alpha:]-{#}[:digit:]]/', + '/[[:digit:]-[:alpha:]]/' => 'False [] range "[:digit:]-" {#} m/[[:digit:]-{#}[:alpha:]]/', + '/[a\zb]/' => 'Unrecognized escape \z in character class passed through {#} m/[a\z{#}b]/', + '/(?c)/' => 'Useless (?c) - use /gc modifier {#} m/(?c{#})/', + '/(?-c)/' => 'Useless (?-c) - don\'t use /gc modifier {#} m/(?-c{#})/', + '/(?g)/' => 'Useless (?g) - use /g modifier {#} m/(?g{#})/', + '/(?-g)/' => 'Useless (?-g) - don\'t use /g modifier {#} m/(?-g{#})/', + '/(?o)/' => 'Useless (?o) - use /o modifier {#} m/(?o{#})/', + '/(?-o)/' => 'Useless (?-o) - don\'t use /o modifier {#} m/(?-o{#})/', + '/(?g-o)/' => [ 'Useless (?g) - use /g modifier {#} m/(?g{#}-o)/', + 'Useless (?-o) - don\'t use /o modifier {#} m/(?g-o{#})/', + ], + '/(?g-c)/' => [ 'Useless (?g) - use /g modifier {#} m/(?g{#}-c)/', + 'Useless (?-c) - don\'t use /gc modifier {#} m/(?g-c{#})/', + ], + # (?c) means (?g) error won't be thrown + '/(?o-cg)/' => [ 'Useless (?o) - use /o modifier {#} m/(?o{#}-cg)/', + 'Useless (?-c) - don\'t use /gc modifier {#} m/(?o-c{#}g)/', + ], + '/(?ogc)/' => [ 'Useless (?o) - use /o modifier {#} m/(?o{#}gc)/', + 'Useless (?g) - use /g modifier {#} m/(?og{#}c)/', + 'Useless (?c) - use /gc modifier {#} m/(?ogc{#})/', + ], +); + +my @experimental_regex_sets = ( + '/(?[ \t ])/' => 'The regex_sets feature is experimental {#} m/(?[{#} \t ])/', +); + +my @deprecated = ( + '/a\b{cde/' => '"\b{" is deprecated; use "\b\{" instead {#} m/a\{#}b{cde/', + '/a\B{cde/' => '"\B{" is deprecated; use "\B\{" instead {#} m/a\{#}B{cde/', + 'use utf8; /(?x)\Â Â \Â /' => 'Escape literal pattern white space under /x {#} m/(?x)\Â Â {#}\Â /', ); while (my ($regex, $expect) = splice @death, 0, 2) { @@ -187,13 +272,44 @@ while (my ($regex, $expect) = splice @death, 0, 2) { }, undef, "... and died without any other warnings"); } -while (my ($regex, $expect) = splice @warning, 0, 2) { - my $expect = fixup_expect($expect); - warning_like(sub { - $_ = "x"; - eval $regex; - is($@, '', "$regex did not die"); - }, qr/\Q$expect/, "... and gave expected warning"); +foreach my $ref (\@warning, \@experimental_regex_sets, \@deprecated) { + my $warning_type = ($ref == \@warning) + ? 'regexp' + : ($ref == \@deprecated) + ? 'deprecated' + : 'experimental::regex_sets'; + while (my ($regex, $expect) = splice @$ref, 0, 2) { + my @expect = fixup_expect($expect); + { + $_ = "x"; + no warnings; + eval $regex; + } + if (is($@, "", "$regex did not die")) { + my @got = capture_warnings(sub { + $_ = "x"; + eval $regex }); + my $count = @expect; + if (! is(scalar @got, scalar @expect, "... and gave expected number ($count) of warnings")) { + if (@got < @expect) { + $count = @got; + note "Expected warnings not gotten:\n\t" . join "\n\t", @expect[$count .. $#expect]; + } + else { + note "Unexpected warnings gotten:\n\t" . join("\n\t", @got[$count .. $#got]); + } + } + foreach my $i (0 .. $count - 1) { + if (like($got[$i], qr/\Q$expect[$i]/, "... and gave expected warning[$i]")) { + ok (0 == capture_warnings(sub { + $_ = "x"; + eval "no warnings '$warning_type'; $regex;" } + ), + "... and turning off '$warning_type' warnings suppressed it"); + } + } + } + } } done_testing(); -- Perl5 Master Repository
