In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/419d89744c528882284e8c1f6db945501a4ca7b6?hp=a10707d85bb39630271c7862504e361a1cc722f9>
- Log ----------------------------------------------------------------- commit 419d89744c528882284e8c1f6db945501a4ca7b6 Author: Karl Williamson <[email protected]> Date: Sat Mar 12 12:37:40 2011 -0700 regcomp.c: /a should handle /\xdf/i same as /u /a and /u should match identically case-insensitively, but they didn't. Nor was /a being tested because it was thought that they handled things identically, and the tests were already taking too long. So this adds some tests as well. ----------------------------------------------------------------------- Summary of changes: regcomp.c | 8 ++++---- t/re/fold_grind.t | 34 +++++++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/regcomp.c b/regcomp.c index 0581217..3f4d634 100644 --- a/regcomp.c +++ b/regcomp.c @@ -9331,13 +9331,13 @@ S_set_regclass_bit_fold(pTHX_ RExC_state_t *pRExC_state, regnode* node, const U8 break; case LATIN_SMALL_LETTER_SHARP_S: - /* Under /d and /u, this can match the two chars "ss" */ + /* Under /a, /d, and /u, this can match the two chars "ss" */ if (! MORE_ASCII_RESTRICTED) { add_alternate(alternate_ptr, (U8 *) "ss", 2); - /* And under /u, it can match even if the target is not - * utf8 */ - if (UNI_SEMANTICS) { + /* And under /u or /a, it can match even if the target is + * not utf8 */ + if (AT_LEAST_UNI_SEMANTICS) { ANYOF_FLAGS(node) |= ANYOF_NONBITMAP_NON_UTF8; } } diff --git a/t/re/fold_grind.t b/t/re/fold_grind.t index 0a848f9..5861bd2 100644 --- a/t/re/fold_grind.t +++ b/t/re/fold_grind.t @@ -208,6 +208,9 @@ my $has_tested_ascii_l; my $has_tested_above_latin1_d; my $has_tested_ascii_d; my $has_tested_non_latin1_d; +my $has_tested_above_latin1_a; +my $has_tested_ascii_a; +my $has_tested_non_latin1_a; # For use by pairs() in generating combinations sub prefix { @@ -223,7 +226,7 @@ sub pairs (@) { map { prefix $_[$_], @_[0..$_-1, $_+1..$#_] } 0..$#_ } -my @charsets = qw(d u aa); +my @charsets = qw(d u a aa); my $current_locale = POSIX::setlocale( &POSIX::LC_ALL, "C") // ""; push @charsets, 'l' if $current_locale eq 'C'; @@ -365,6 +368,30 @@ foreach my $test (sort { numerically } keys %tests) { } } } + elsif ($charset eq 'a') { + # Similarly for a. This should match identically to /u, so wasn't + # tested at all until a bug was found that was thereby missed. + # As a compromise, beyond one test (besides self) each, we don't + # test pairs that are both ascii; or both above latin1, or are + # combinations of ascii and above latin1. + if (! $target_has_upper_latin1 && ! $pattern_has_upper_latin1) { + if ($target_has_ascii && $pattern_has_ascii) { + next if defined $has_tested_ascii_a + && $has_tested_ascii_a != $test; + $has_tested_ascii_a = $test + } + elsif (! $target_has_latin1 && ! $pattern_has_latin1) { + next if defined $has_tested_above_latin1_a + && $has_tested_above_latin1_a != $test; + $has_tested_above_latin1_a = $test; + } + else { + next if defined $has_tested_non_latin1_a + && $has_tested_non_latin1_a != $test; + $has_tested_non_latin1_a = $test; + } + } + } } foreach my $utf8_target (0, 1) { # Both utf8 and not, for @@ -402,8 +429,9 @@ foreach my $test (sort { numerically } keys %tests) { my $todo = ($test == 0xdf && $lhs =~ /DF/ && $uni_semantics - && ($charset eq 'u' || $charset eq 'd') - && ! ($charset eq 'u' && (($upgrade_target eq "") != ($upgrade_pattern eq ""))) + && ($charset eq 'u' || $charset eq 'a' || $charset eq 'd') + && ! (($charset eq 'u' || $charset eq 'a') + && (($upgrade_target eq "") != ($upgrade_pattern eq ""))) && ! ($charset eq 'd' && (! $upgrade_target || ! $upgrade_pattern)) ); my $eval = "my \$c = \"$lhs$rhs\"; my \$p = qr/(?$charset:^($rhs)\\1\$)/i;$upgrade_target$upgrade_pattern \$c $op \$p"; -- Perl5 Master Repository
