Change 34755: Various changes to regex diagnostics and testing

Yves Orton Thu, 06 Nov 2008 11:00:18 -0800

Change 34755 by [EMAIL PROTECTED] on 2008/11/06 18:48:28

        Various changes to regex diagnostics and testing
        
        * Make ANYOF output from regprop easier to read by adding ][ in between 
the unicode representation and the "ascii" one
        
        * Make it possible to make tests in re_tests todo.
        
        * add a todo test for a complementary character class match that should 
fail (perl #60156)
        
        * Also add a comment explaining a previous commit (relating to perl 
#60344)


Affected files ...

... //depot/perl/regcomp.c#672 edit
... //depot/perl/regcomp.h#135 edit
... //depot/perl/regexec.c#577 edit
... //depot/perl/t/op/re_tests#137 edit
... //depot/perl/t/op/regexp.t#48 edit

Differences ...

==== //depot/perl/regcomp.c#672 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#671~34747~   2008-11-06 03:32:25.000000000 -0800
+++ perl/regcomp.c      2008-11-06 10:48:28.000000000 -0800
@@ -9099,6 +9099,7 @@
     else if (k == ANYOF) {
        int i, rangestart = -1;
        const U8 flags = ANYOF_FLAGS(o);
+       int do_sep = 0;
 
        /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
        static const char * const anyofs[] = {
@@ -9114,8 +9115,8 @@
            "[:^alpha:]",
            "[:ascii:]",
            "[:^ascii:]",
-           "[:ctrl:]",
-           "[:^ctrl:]",
+           "[:cntrl:]",
+           "[:^cntrl:]",
            "[:graph:]",
            "[:^graph:]",
            "[:lower:]",
@@ -9154,14 +9155,26 @@
                    sv_catpvs(sv, "-");
                    put_byte(sv, i - 1);
                }
+               do_sep = 1;
                rangestart = -1;
            }
        }
-
+        if (do_sep) {
+            sv_catpvs(sv,"][");
+            do_sep = 0;
+        }
+            
        if (o->flags & ANYOF_CLASS)
            for (i = 0; i < (int)(sizeof(anyofs)/sizeof(char*)); i++)
-               if (ANYOF_CLASS_TEST(o,i))
+               if (ANYOF_CLASS_TEST(o,i)) {
                    sv_catpv(sv, anyofs[i]);
+                   do_sep = 1;
+               }
+        
+        if (do_sep) {
+            sv_catpvs(sv,"][");
+            do_sep = 0;
+        }
 
        if (flags & ANYOF_UNICODE)
            sv_catpvs(sv, "{unicode}");
@@ -9175,7 +9188,7 @@
            if (lv) {
                if (sw) {
                    U8 s[UTF8_MAXBYTES_CASE+1];
-               
+
                    for (i = 0; i <= 256; i++) { /* just the first 256 */
                        uvchr_to_utf8(s, i);
                        

==== //depot/perl/regcomp.h#135 (text) ====
Index: perl/regcomp.h
--- perl/regcomp.h#134~32804~   2008-01-02 05:47:42.000000000 -0800
+++ perl/regcomp.h      2008-11-06 10:48:28.000000000 -0800
@@ -317,9 +317,9 @@
 #define ANYOF_NALNUM    1
 #define ANYOF_SPACE     2      /* \s */
 #define ANYOF_NSPACE    3
-#define ANYOF_DIGIT     4
+#define ANYOF_DIGIT     4      /* \d */
 #define ANYOF_NDIGIT    5
-#define ANYOF_ALNUMC    6      /* isalnum(3), utf8::IsAlnum, ALNUMC */
+#define ANYOF_ALNUMC    6      /* [[:alnum:]] isalnum(3), utf8::IsAlnum, 
ALNUMC */
 #define ANYOF_NALNUMC   7
 #define ANYOF_ALPHA     8
 #define ANYOF_NALPHA    9

==== //depot/perl/regexec.c#577 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#576~34746~   2008-11-06 02:44:13.000000000 -0800
+++ perl/regexec.c      2008-11-06 10:48:28.000000000 -0800
@@ -4983,7 +4983,8 @@
          do_ifmatch:
            ST.me = scan;
            ST.logical = logical;
-           logical = 0;
+           logical = 0; /* XXX: reset state of logical once it has been saved 
into ST */
+           
            /* execute body of (?...A) */
            PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
            /* NOTREACHED */

==== //depot/perl/t/op/re_tests#137 (text) ====
Index: perl/t/op/re_tests
--- perl/t/op/re_tests#136~34747~       2008-11-06 03:32:25.000000000 -0800
+++ perl/t/op/re_tests  2008-11-06 10:48:28.000000000 -0800
@@ -1360,4 +1360,5 @@
 /(.*?)a(?!(a+)b\2c)/   baaabaac        y       $&-$1   baa-ba
 # [perl #60344] Regex lookbehind failure after an (if)then|else in perl 5.10
 /\A(?(?=db2)db2|\D+)(?<!processed)\.csv\z/xms  sql_processed.csv       n       
-       -
-/\N{U+0100}/   \x{100} y       $&      \x{100} # Bug #59328
\ No newline at end of file
+/\N{U+0100}/   \x{100} y       $&      \x{100} # Bug #59328
+[\s][\S]       \x{a0}\x{a0}    nT      -       -       # TODO Unicode 
complements should not match same character
\ No newline at end of file

==== //depot/perl/t/op/regexp.t#48 (xtext) ====
Index: perl/t/op/regexp.t
--- perl/t/op/regexp.t#47~33919~        2008-05-24 08:42:08.000000000 -0700
+++ perl/t/op/regexp.t  2008-11-06 10:48:28.000000000 -0800
@@ -13,6 +13,7 @@
 #      y       expect a match
 #      n       expect no match
 #      c       expect an error
+#      T       the test is a TODO (can be combined with y/n/c)
 #      B       test exposes a known bug in Perl, should be skipped
 #      b       test exposes a known bug in Perl, should be skipped if noamp
 #      t       test exposes a bug with threading, TODO if qr_embed_thr
@@ -102,16 +103,19 @@
     my ($pat, $subject, $result, $repl, $expect, $reason) = split(/\t/,$_,6);
     $reason = '' unless defined $reason;
     my $input = join(':',$pat,$subject,$result,$repl,$expect);
-    $pat = "'$pat'" unless $pat =~ /^[:'\/]/;
+    # the double '' below keeps simple syntax highlighters from going crazy
+    $pat = "'$pat'" unless $pat =~ /^[:''\/]/; 
     $pat =~ s/(\$\{\w+\})/$1/eeg;
     $pat =~ s/\\n/\n/g;
     $subject = eval qq("$subject"); die $@ if $@;
     $expect  = eval qq("$expect"); die $@ if $@;
     $expect = $repl = '-' if $skip_amp and $input =~ /\$[&\`\']/;
-    my $todo = $qr_embed_thr && ($result =~ s/t//);
+    my $todo_qr = $qr_embed_thr && ($result =~ s/t//);
     my $skip = ($skip_amp ? ($result =~ s/B//i) : ($result =~ s/B//));
     $reason = 'skipping $&' if $reason eq  '' && $skip_amp;
     $result =~ s/B//i unless $skip;
+    my $todo= $result =~ s/T// ? " # TODO" : "";
+    
 
     for my $study ('', 'study $subject', 'utf8::upgrade($subject)',
                   'utf8::upgrade($subject); study $subject') {
@@ -165,39 +169,39 @@
        }
        chomp( my $err = $@ );
        if ($result eq 'c') {
-           if ($err !~ m!^\Q$expect!) { print "not ok $test (compile) $input 
=> `$err'\n"; next TEST }
+           if ($err !~ m!^\Q$expect!) { print "not ok $test$todo (compile) 
$input => `$err'\n"; next TEST }
            last;  # no need to study a syntax error
        }
        elsif ( $skip ) {
            print "ok $test # skipped", length($reason) ? " $reason" : '', "\n";
            next TEST;
        }
-       elsif ( $todo ) {
+       elsif ( $todo_qr ) {
            print "not ok $test # TODO", length($reason) ? " - $reason" : '', 
"\n";
            next TEST;
        }
        elsif ($@) {
-           print "not ok $test $input => error `$err'[EMAIL PROTECTED]"; next 
TEST;
+           print "not ok $test$todo $input => error `$err'[EMAIL PROTECTED]"; 
next TEST;
        }
        elsif ($result =~ /^n/) {
-           if ($match) { print "not ok $test ($study) $input => false 
positive\n"; next TEST }
+           if ($match) { print "not ok $test$todo ($study) $input => false 
positive\n"; next TEST }
        }
        else {
            if (!$match || $got ne $expect) {
                eval { require Data::Dumper };
                if ($@) {
-                   print "not ok $test ($study) $input => `$got', 
match=$match\n$code\n";
+                   print "not ok $test$todo ($study) $input => `$got', 
match=$match\n$code\n";
                }
                else { # better diagnostics
                    my $s = 
Data::Dumper->new([$subject],['subject'])->Useqq(1)->Dump;
                    my $g = Data::Dumper->new([$got],['got'])->Useqq(1)->Dump;
-                   print "not ok $test ($study) $input => `$got', 
match=$match\n$s\n$g\n$code\n";
+                   print "not ok $test$todo ($study) $input => `$got', 
match=$match\n$s\n$g\n$code\n";
                }
                next TEST;
            }
        }
     }
-    print "ok $test\n";
+    print "ok $test$todo\n";
 }
 
 1;
End of Patch.

Change 34755: Various changes to regex diagnostics and testing

Reply via email to