From 296892300c7325612a9b0d9d44ca40ee45a04147 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppi...@redhat.com>
Date: Thu, 3 Nov 2016 13:33:10 +0100
Subject: Fix firstchar bitmap under UTF-8 with prefix optimization

---
 ...ix-perl-129950-fix-firstchar-bitmap-under.patch | 97 ++++++++++++++++++++++
 perl.spec                                          |  8 ++
 2 files changed, 105 insertions(+)
 create mode 100644 
perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch

diff --git 
a/perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch 
b/perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch
new file mode 100644
index 0000000..5d7c2cc
--- /dev/null
+++ b/perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch
@@ -0,0 +1,97 @@
+From e40804ef8d2f49e588498bcc4bc0ba8e108ac648 Mon Sep 17 00:00:00 2001
+From: Yves Orton <demer...@gmail.com>
+Date: Thu, 27 Oct 2016 13:52:24 +0200
+Subject: [PATCH] regcomp.c: fix perl #129950 - fix firstchar bitmap under utf8
+ with prefix optimisation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Ported to 5.22.2:
+
+commit da42332b10691ba7af7550035ffc7f46c87e4e66
+Author: Yves Orton <demer...@gmail.com>
+Date:   Thu Oct 27 13:52:24 2016 +0200
+
+    regcomp.c: fix perl #129950 - fix firstchar bitmap under utf8 with prefix 
optimisation
+
+    The trie code contains a number of sub optimisations, one of which
+    extracts common prefixes from alternations, and another which isa
+    bitmap of the possible matching first chars.
+
+    The bitmap needs to contain the possible first octets of the string
+    which the trie can match, and for codepoints which might have a different
+    first octet under utf8 or non-utf8 need to register BOTH codepoints.
+
+    So for instance in the pattern (?:a|a\x{E4}) we should restructure this
+    as a(|\x{E4), and the bitmap for the trie should contain both \x{E4} AND
+    \x{C3} as \x{C3} is the first byte of \x{EF} expressed as utf8.
+
+Signed-off-by: Petr Písař <ppi...@redhat.com>
+---
+ regcomp.c  | 14 ++++++++++++++
+ t/re/pat.t |  9 ++++++++-
+ 2 files changed, 22 insertions(+), 1 deletion(-)
+
+diff --git a/regcomp.c b/regcomp.c
+index 9332dea..fcb5147 100644
+--- a/regcomp.c
++++ b/regcomp.c
+@@ -2965,6 +2965,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode 
*startbranch,
+                                     TRIE_BITMAP_SET(trie,*ch);
+                                     if ( folder )
+                                         TRIE_BITMAP_SET(trie, folder[ *ch ]);
++                                    if ( !UTF ) {
++                                        /* store first byte of utf8 
representation of
++                                           variant codepoints */
++                                        if (! UVCHR_IS_INVARIANT(*ch)) {
++                                            TRIE_BITMAP_SET(trie, 
UTF8_TWO_BYTE_HI(*ch));
++                                        }
++                                    }
+                                     DEBUG_OPTIMISE_r(
+                                         PerlIO_printf(Perl_debug_log, "%s", 
(char*)ch)
+                                     );
+@@ -2973,6 +2980,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode 
*startbranch,
+                           TRIE_BITMAP_SET(trie,*ch);
+                           if ( folder )
+                               TRIE_BITMAP_SET(trie,folder[ *ch ]);
++                            if ( !UTF ) {
++                                /* store first byte of utf8 representation of
++                                   variant codepoints */
++                                if (! UVCHR_IS_INVARIANT(*ch)) {
++                                    TRIE_BITMAP_SET(trie, 
UTF8_TWO_BYTE_HI(*ch));
++                                }
++                            }
+                           DEBUG_OPTIMISE_r(PerlIO_printf( 
Perl_debug_log,"%s", ch));
+                       }
+                         idx = ofs;
+diff --git a/t/re/pat.t b/t/re/pat.t
+index 3377b19..2f18aa8 100644
+--- a/t/re/pat.t
++++ b/t/re/pat.t
+@@ -23,7 +23,7 @@ BEGIN {
+     skip_all_without_unicode_tables();
+ }
+ 
+-plan tests => 775;  # Update this when adding/deleting tests.
++plan tests => 777;  # Update this when adding/deleting tests.
+ 
+ run_tests() unless caller;
+ 
+@@ -1708,6 +1708,13 @@ EOP
+               like($error, qr{Reference to nonexistent group},
+                               'gave appropriate error for qr{()(?1)}n');
+       }
++
++      {
++              my $str = "a\xE4";
++              ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - latin1 
case" );
++              utf8::upgrade($str);
++              ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - utf8 case" 
);
++      }
+ } # End of sub run_tests
+ 
+ 1;
+-- 
+2.7.4
+
diff --git a/perl.spec b/perl.spec
index 60fb90c..a17c7cb 100644
--- a/perl.spec
+++ b/perl.spec
@@ -133,6 +133,11 @@ Patch43:        
perl-5.22.2-perl-129350-anchored-floating-substrings-must-be-utf
 # Fix parsing perl options in shell bang line, RT#129336,
 # in upstream after 5.25.5
 Patch44:        
perl-5.24.0-rt-129336-perl-i-u-erroneously-interpreted-as-u.patch
+
+# Fix firstchar bitmap under UTF-8 with prefix optimization, RT#129950,
+# in upstream after 5.25.6
+Patch45:        
perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch
+
 # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
 Patch200:       
perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
 
@@ -2418,6 +2423,7 @@ Perl extension for Version Objects
 %patch42 -p1
 %patch43 -p1
 %patch44 -p1
+%patch45 -p1
 %patch200 -p1
 %patch201 -p1
 
@@ -2454,6 +2460,7 @@ perl -x patchlevel.h \
     'Fedora Patch42: Fix string overrun in Perl_gv_fetchmethod_pvn_flags 
(RT#129267)' \
     'Fedora Patch43: Fix crash when matching UTF-8 string with non-UTF-8 
substrings (RT#129350)' \
     'Fedora Patch44: Fix parsing perl options in shell bang line (RT#129336)' \
+    'Fedora Patch45: Fix firstchar bitmap under UTF-8 with prefix optimization 
(RT#129950)' \
     'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on 
Linux' \
     'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
     %{nil}
@@ -4714,6 +4721,7 @@ popd
 - Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267)
 - Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350)
 - Fix parsing perl options in shell bang line (RT#129336)
+- Fix firstchar bitmap under UTF-8 with prefix optimization (RT#129950)
 
 * Wed Aug 03 2016 Jitka Plesnikova <jples...@redhat.com> - 4:5.22.2-362
 - Avoid loading optional modules from default . (CVE-2016-1238)
-- 
cgit v0.12


        
http://pkgs.fedoraproject.org/cgit/perl.git/commit/?h=f24&id=296892300c7325612a9b0d9d44ca40ee45a04147
_______________________________________________
perl-devel mailing list -- perl-devel@lists.fedoraproject.org
To unsubscribe send an email to perl-devel-le...@lists.fedoraproject.org

Reply via email to