In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/c012444fd89eef64e1d1687642cdb9f968e96739?hp=bd2db5df3cd7c8f0ecc592ef15151e17c1504af9>

- Log -----------------------------------------------------------------
commit c012444fd89eef64e1d1687642cdb9f968e96739
Author: Slaven Rezic <[email protected]>
Date:   Sun Jan 4 17:28:33 2009 +0100

    Another regexp failure with utf8-flagged string and byte-flagged pattern 
(reminder)
    
    Date: 17 Nov 2007 16:29:29 +0100
    Message-ID: <[email protected]>
-----------------------------------------------------------------------

Summary of changes:
 regexec.c  |    8 ++++++--
 t/op/pat.t |   11 ++++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/regexec.c b/regexec.c
index 94d6761..bc8da6e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1007,15 +1007,16 @@ Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, 
char *strpos,
 
 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
+    UV uvc_unfolded = 0;                                                   \
     switch (trie_type) {                                                    \
     case trie_utf8_fold:                                                    \
        if ( foldlen>0 ) {                                                  \
-           uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
+           uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, 
uniflags ); \
            foldlen -= len;                                                 \
            uscan += len;                                                   \
            len=0;                                                          \
        } else {                                                            \
-           uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );   \
+           uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, 
uniflags ); \
            uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
            foldlen -= UNISKIP( uvc );                                      \
            uscan = foldbuf + UNISKIP( uvc );                               \
@@ -1054,6 +1055,9 @@ uvc, charid, foldlen, foldbuf, uniflags) STMT_START {     
                  \
                charid = (U16)SvIV(*svpp);                                  \
        }                                                                   \
     }                                                                       \
+    if (!charid && trie_type == trie_utf8_fold && !UTF) {                  \
+       charid = trie->charmap[uvc_unfolded];                               \
+    }                                                                      \
 } STMT_END
 
 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
diff --git a/t/op/pat.t b/t/op/pat.t
index aa275bd..586b317 100755
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -13,7 +13,7 @@ sub run_tests;
 
 $| = 1;
 
-my $EXPECTED_TESTS = 3865;  # Update this when adding/deleting tests.
+my $EXPECTED_TESTS = 3961;  # Update this when adding/deleting tests.
 
 BEGIN {
     chdir 't' if -d 't';
@@ -3896,6 +3896,15 @@ sub run_tests {
         iseq $1, "\xd6", "Upgrade error";
     }
 
+    {
+# more TRIE/AHOCORASICK problems with mixed utf8 / latin-1 and case folding
+       for my $chr (160 .. 255) {
+           my $chr_byte = chr($chr);
+           my $chr_utf8 = chr($chr); utf8::upgrade($chr_utf8);
+           my $rx = qr{$chr_byte|X}i;
+           ok($chr_utf8 =~ $rx, "utf8/latin, codepoint $chr");
+       }
+    }
 
     {
         # Regardless of utf8ness any character matches itself when 

--
Perl5 Master Repository

Reply via email to