Change 34755 by [EMAIL PROTECTED] on 2008/11/06 18:48:28
Various changes to regex diagnostics and testing
* Make ANYOF output from regprop easier to read by adding ][ in between
the unicode representation and the "ascii" one
* Make it possible to make tests in re_tests todo.
* add a todo test for a complementary character class match that should
fail (perl #60156)
* Also add a comment explaining a previous commit (relating to perl
#60344)
Affected files ...
... //depot/perl/regcomp.c#672 edit
... //depot/perl/regcomp.h#135 edit
... //depot/perl/regexec.c#577 edit
... //depot/perl/t/op/re_tests#137 edit
... //depot/perl/t/op/regexp.t#48 edit
Differences ...
==== //depot/perl/regcomp.c#672 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#671~34747~ 2008-11-06 03:32:25.000000000 -0800
+++ perl/regcomp.c 2008-11-06 10:48:28.000000000 -0800
@@ -9099,6 +9099,7 @@
else if (k == ANYOF) {
int i, rangestart = -1;
const U8 flags = ANYOF_FLAGS(o);
+ int do_sep = 0;
/* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
static const char * const anyofs[] = {
@@ -9114,8 +9115,8 @@
"[:^alpha:]",
"[:ascii:]",
"[:^ascii:]",
- "[:ctrl:]",
- "[:^ctrl:]",
+ "[:cntrl:]",
+ "[:^cntrl:]",
"[:graph:]",
"[:^graph:]",
"[:lower:]",
@@ -9154,14 +9155,26 @@
sv_catpvs(sv, "-");
put_byte(sv, i - 1);
}
+ do_sep = 1;
rangestart = -1;
}
}
-
+ if (do_sep) {
+ sv_catpvs(sv,"][");
+ do_sep = 0;
+ }
+
if (o->flags & ANYOF_CLASS)
for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
- if (ANYOF_CLASS_TEST(o,i))
+ if (ANYOF_CLASS_TEST(o,i)) {
sv_catpv(sv, anyofs[i]);
+ do_sep = 1;
+ }
+
+ if (do_sep) {
+ sv_catpvs(sv,"][");
+ do_sep = 0;
+ }
if (flags & ANYOF_UNICODE)
sv_catpvs(sv, "{unicode}");
@@ -9175,7 +9188,7 @@
if (lv) {
if (sw) {
U8 s[UTF8_MAXBYTES_CASE+1];
-
+
for (i = 0; i <= 256; i++) { /* just the first 256 */
uvchr_to_utf8(s, i);
==== //depot/perl/regcomp.h#135 (text) ====
Index: perl/regcomp.h
--- perl/regcomp.h#134~32804~ 2008-01-02 05:47:42.000000000 -0800
+++ perl/regcomp.h 2008-11-06 10:48:28.000000000 -0800
@@ -317,9 +317,9 @@
#define ANYOF_NALNUM 1
#define ANYOF_SPACE 2 /* \s */
#define ANYOF_NSPACE 3
-#define ANYOF_DIGIT 4
+#define ANYOF_DIGIT 4 /* \d */
#define ANYOF_NDIGIT 5
-#define ANYOF_ALNUMC 6 /* isalnum(3), utf8::IsAlnum, ALNUMC */
+#define ANYOF_ALNUMC 6 /* [[:alnum:]] isalnum(3), utf8::IsAlnum,
ALNUMC */
#define ANYOF_NALNUMC 7
#define ANYOF_ALPHA 8
#define ANYOF_NALPHA 9
==== //depot/perl/regexec.c#577 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#576~34746~ 2008-11-06 02:44:13.000000000 -0800
+++ perl/regexec.c 2008-11-06 10:48:28.000000000 -0800
@@ -4983,7 +4983,8 @@
do_ifmatch:
ST.me = scan;
ST.logical = logical;
- logical = 0;
+ logical = 0; /* XXX: reset state of logical once it has been saved
into ST */
+
/* execute body of (?...A) */
PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
/* NOTREACHED */
==== //depot/perl/t/op/re_tests#137 (text) ====
Index: perl/t/op/re_tests
--- perl/t/op/re_tests#136~34747~ 2008-11-06 03:32:25.000000000 -0800
+++ perl/t/op/re_tests 2008-11-06 10:48:28.000000000 -0800
@@ -1360,4 +1360,5 @@
/(.*?)a(?!(a+)b\2c)/ baaabaac y $&-$1 baa-ba
# [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10
/\A(?(?=db2)db2|\D+)(?<!processed)\.csv\z/xms sql_processed.csv n
- -
-/\N{U+0100}/ \x{100} y $& \x{100} # Bug #59328
\ No newline at end of file
+/\N{U+0100}/ \x{100} y $& \x{100} # Bug #59328
+[\s][\S] \x{a0}\x{a0} nT - - # TODO Unicode
complements should not match same character
\ No newline at end of file
==== //depot/perl/t/op/regexp.t#48 (xtext) ====
Index: perl/t/op/regexp.t
--- perl/t/op/regexp.t#47~33919~ 2008-05-24 08:42:08.000000000 -0700
+++ perl/t/op/regexp.t 2008-11-06 10:48:28.000000000 -0800
@@ -13,6 +13,7 @@
# y expect a match
# n expect no match
# c expect an error
+# T the test is a TODO (can be combined with y/n/c)
# B test exposes a known bug in Perl, should be skipped
# b test exposes a known bug in Perl, should be skipped if noamp
# t test exposes a bug with threading, TODO if qr_embed_thr
@@ -102,16 +103,19 @@
my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6);
$reason = '' unless defined $reason;
my $input = join(':',$pat,$subject,$result,$repl,$expect);
- $pat = "'$pat'" unless $pat =~ /^[:'\/]/;
+ # the double '' below keeps simple syntax highlighters from going crazy
+ $pat = "'$pat'" unless $pat =~ /^[:''\/]/;
$pat =~ s/(\$\{\w+\})/$1/eeg;
$pat =~ s/\\n/\n/g;
$subject = eval qq("$subject"); die $@ if $@;
$expect = eval qq("$expect"); die $@ if $@;
$expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
- my $todo = $qr_embed_thr && ($result =~ s/t//);
+ my $todo_qr = $qr_embed_thr && ($result =~ s/t//);
my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
$reason = 'skipping $&' if $reason eq '' && $skip_amp;
$result =~ s/B//i unless $skip;
+ my $todo= $result =~ s/T// ? " # TODO" : "";
+
for my $study ('', 'study $subject', 'utf8::upgrade($subject)',
'utf8::upgrade($subject); study $subject') {
@@ -165,39 +169,39 @@
}
chomp( my $err = $@ );
if ($result eq 'c') {
- if ($err !~ m!^\Q$expect!) { print "not ok $test (compile) $input
=> `$err'\n"; next TEST }
+ if ($err !~ m!^\Q$expect!) { print "not ok $test$todo (compile)
$input => `$err'\n"; next TEST }
last; # no need to study a syntax error
}
elsif ( $skip ) {
print "ok $test # skipped", length($reason) ? " $reason" : '', "\n";
next TEST;
}
- elsif ( $todo ) {
+ elsif ( $todo_qr ) {
print "not ok $test # TODO", length($reason) ? " - $reason" : '',
"\n";
next TEST;
}
elsif ($@) {
- print "not ok $test $input => error `$err'[EMAIL PROTECTED]"; next
TEST;
+ print "not ok $test$todo $input => error `$err'[EMAIL PROTECTED]";
next TEST;
}
elsif ($result =~ /^n/) {
- if ($match) { print "not ok $test ($study) $input => false
positive\n"; next TEST }
+ if ($match) { print "not ok $test$todo ($study) $input => false
positive\n"; next TEST }
}
else {
if (!$match || $got ne $expect) {
eval { require Data::Dumper };
if ($@) {
- print "not ok $test ($study) $input => `$got',
match=$match\n$code\n";
+ print "not ok $test$todo ($study) $input => `$got',
match=$match\n$code\n";
}
else { # better diagnostics
my $s =
Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
- print "not ok $test ($study) $input => `$got',
match=$match\n$s\n$g\n$code\n";
+ print "not ok $test$todo ($study) $input => `$got',
match=$match\n$s\n$g\n$code\n";
}
next TEST;
}
}
}
- print "ok $test\n";
+ print "ok $test$todo\n";
}
1;
End of Patch.