In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/419d89744c528882284e8c1f6db945501a4ca7b6?hp=a10707d85bb39630271c7862504e361a1cc722f9>

- Log -----------------------------------------------------------------
commit 419d89744c528882284e8c1f6db945501a4ca7b6
Author: Karl Williamson <[email protected]>
Date:   Sat Mar 12 12:37:40 2011 -0700

    regcomp.c: /a should handle /\xdf/i same as /u
    
    /a and /u should match identically case-insensitively, but they didn't.
    Nor was /a being tested because it was thought that they handled things
    identically, and the tests were already taking too long.  So this adds
    some tests as well.
-----------------------------------------------------------------------

Summary of changes:
 regcomp.c         |    8 ++++----
 t/re/fold_grind.t |   34 +++++++++++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 0581217..3f4d634 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9331,13 +9331,13 @@ S_set_regclass_bit_fold(pTHX_ RExC_state_t 
*pRExC_state, regnode* node, const U8
                break;
            case LATIN_SMALL_LETTER_SHARP_S:
 
-               /* Under /d and /u, this can match the two chars "ss" */
+               /* Under /a, /d, and /u, this can match the two chars "ss" */
                if (! MORE_ASCII_RESTRICTED) {
                    add_alternate(alternate_ptr, (U8 *) "ss", 2);
 
-                   /* And under /u, it can match even if the target is not
-                    * utf8 */
-                   if (UNI_SEMANTICS) {
+                   /* And under /u or /a, it can match even if the target is
+                    * not utf8 */
+                   if (AT_LEAST_UNI_SEMANTICS) {
                        ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8;
                    }
                }
diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t
index 0a848f9..5861bd2 100644
--- a/t/re/fold_grind.t
+++ b/t/re/fold_grind.t
@@ -208,6 +208,9 @@ my $has_tested_ascii_l;
 my $has_tested_above_latin1_d;
 my $has_tested_ascii_d;
 my $has_tested_non_latin1_d;
+my $has_tested_above_latin1_a;
+my $has_tested_ascii_a;
+my $has_tested_non_latin1_a;
 
 # For use by pairs() in generating combinations
 sub prefix {
@@ -223,7 +226,7 @@ sub pairs (@) {
     map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_
 }
 
-my @charsets = qw(d u aa);
+my @charsets = qw(d u a aa);
 my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // "";
 push @charsets, 'l' if $current_locale eq 'C';
 
@@ -365,6 +368,30 @@ foreach my $test (sort { numerically } keys %tests) {
             }
           }
         }
+        elsif ($charset eq 'a') {
+          # Similarly for a.  This should match identically to /u, so wasn't
+          # tested at all until a bug was found that was thereby missed.
+          # As a compromise, beyond one test (besides self) each, we  don't
+          # test pairs that are both ascii; or both above latin1, or are
+          # combinations of ascii and above latin1.
+          if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) {
+            if ($target_has_ascii && $pattern_has_ascii) {
+              next if defined $has_tested_ascii_a
+                      && $has_tested_ascii_a != $test;
+              $has_tested_ascii_a = $test
+            }
+            elsif (! $target_has_latin1 && ! $pattern_has_latin1) {
+              next if defined $has_tested_above_latin1_a
+                      && $has_tested_above_latin1_a != $test;
+              $has_tested_above_latin1_a = $test;
+            }
+            else {
+              next if defined $has_tested_non_latin1_a
+                      && $has_tested_non_latin1_a != $test;
+              $has_tested_non_latin1_a = $test;
+            }
+          }
+        }
       }
 
       foreach my $utf8_target (0, 1) {    # Both utf8 and not, for
@@ -402,8 +429,9 @@ foreach my $test (sort { numerically } keys %tests) {
           my $todo = ($test == 0xdf
                       && $lhs =~ /DF/
                       && $uni_semantics
-                      && ($charset eq 'u' || $charset eq 'd')
-                      && ! ($charset eq 'u' && (($upgrade_target eq "") != 
($upgrade_pattern eq "")))
+                      && ($charset eq 'u' || $charset eq 'a' || $charset eq 
'd')
+                      && ! (($charset eq 'u' || $charset eq 'a')
+                            && (($upgrade_target eq "") != ($upgrade_pattern 
eq "")))
                       && ! ($charset eq 'd' && (! $upgrade_target || ! 
$upgrade_pattern))
                       );
           my $eval = "my \$c = \"$lhs$rhs\"; my \$p = 
qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p";

--
Perl5 Master Repository

Reply via email to