From 296892300c7325612a9b0d9d44ca40ee45a04147 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppi...@redhat.com> Date: Thu, 3 Nov 2016 13:33:10 +0100 Subject: Fix firstchar bitmap under UTF-8 with prefix optimization
--- ...ix-perl-129950-fix-firstchar-bitmap-under.patch | 97 ++++++++++++++++++++++ perl.spec | 8 ++ 2 files changed, 105 insertions(+) create mode 100644 perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch diff --git a/perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch b/perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch new file mode 100644 index 0000000..5d7c2cc --- /dev/null +++ b/perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch @@ -0,0 +1,97 @@ +From e40804ef8d2f49e588498bcc4bc0ba8e108ac648 Mon Sep 17 00:00:00 2001 +From: Yves Orton <demer...@gmail.com> +Date: Thu, 27 Oct 2016 13:52:24 +0200 +Subject: [PATCH] regcomp.c: fix perl #129950 - fix firstchar bitmap under utf8 + with prefix optimisation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ported to 5.22.2: + +commit da42332b10691ba7af7550035ffc7f46c87e4e66 +Author: Yves Orton <demer...@gmail.com> +Date: Thu Oct 27 13:52:24 2016 +0200 + + regcomp.c: fix perl #129950 - fix firstchar bitmap under utf8 with prefix optimisation + + The trie code contains a number of sub optimisations, one of which + extracts common prefixes from alternations, and another which isa + bitmap of the possible matching first chars. + + The bitmap needs to contain the possible first octets of the string + which the trie can match, and for codepoints which might have a different + first octet under utf8 or non-utf8 need to register BOTH codepoints. + + So for instance in the pattern (?:a|a\x{E4}) we should restructure this + as a(|\x{E4), and the bitmap for the trie should contain both \x{E4} AND + \x{C3} as \x{C3} is the first byte of \x{EF} expressed as utf8. + +Signed-off-by: Petr Písař <ppi...@redhat.com> +--- + regcomp.c | 14 ++++++++++++++ + t/re/pat.t | 9 ++++++++- + 2 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/regcomp.c b/regcomp.c +index 9332dea..fcb5147 100644 +--- a/regcomp.c ++++ b/regcomp.c +@@ -2965,6 +2965,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, + TRIE_BITMAP_SET(trie,*ch); + if ( folder ) + TRIE_BITMAP_SET(trie, folder[ *ch ]); ++ if ( !UTF ) { ++ /* store first byte of utf8 representation of ++ variant codepoints */ ++ if (! UVCHR_IS_INVARIANT(*ch)) { ++ TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(*ch)); ++ } ++ } + DEBUG_OPTIMISE_r( + PerlIO_printf(Perl_debug_log, "%s", (char*)ch) + ); +@@ -2973,6 +2980,13 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch, + TRIE_BITMAP_SET(trie,*ch); + if ( folder ) + TRIE_BITMAP_SET(trie,folder[ *ch ]); ++ if ( !UTF ) { ++ /* store first byte of utf8 representation of ++ variant codepoints */ ++ if (! UVCHR_IS_INVARIANT(*ch)) { ++ TRIE_BITMAP_SET(trie, UTF8_TWO_BYTE_HI(*ch)); ++ } ++ } + DEBUG_OPTIMISE_r(PerlIO_printf( Perl_debug_log,"%s", ch)); + } + idx = ofs; +diff --git a/t/re/pat.t b/t/re/pat.t +index 3377b19..2f18aa8 100644 +--- a/t/re/pat.t ++++ b/t/re/pat.t +@@ -23,7 +23,7 @@ BEGIN { + skip_all_without_unicode_tables(); + } + +-plan tests => 775; # Update this when adding/deleting tests. ++plan tests => 777; # Update this when adding/deleting tests. + + run_tests() unless caller; + +@@ -1708,6 +1708,13 @@ EOP + like($error, qr{Reference to nonexistent group}, + 'gave appropriate error for qr{()(?1)}n'); + } ++ ++ { ++ my $str = "a\xE4"; ++ ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - latin1 case" ); ++ utf8::upgrade($str); ++ ok( $str =~ m{^(a|a\x{e4})$}, "fix [perl #129950] - utf8 case" ); ++ } + } # End of sub run_tests + + 1; +-- +2.7.4 + diff --git a/perl.spec b/perl.spec index 60fb90c..a17c7cb 100644 --- a/perl.spec +++ b/perl.spec @@ -133,6 +133,11 @@ Patch43: perl-5.22.2-perl-129350-anchored-floating-substrings-must-be-utf # Fix parsing perl options in shell bang line, RT#129336, # in upstream after 5.25.5 Patch44: perl-5.24.0-rt-129336-perl-i-u-erroneously-interpreted-as-u.patch + +# Fix firstchar bitmap under UTF-8 with prefix optimization, RT#129950, +# in upstream after 5.25.6 +Patch45: perl-5.22.2-regcomp.c-fix-perl-129950-fix-firstchar-bitmap-under.patch + # Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048 Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch @@ -2418,6 +2423,7 @@ Perl extension for Version Objects %patch42 -p1 %patch43 -p1 %patch44 -p1 +%patch45 -p1 %patch200 -p1 %patch201 -p1 @@ -2454,6 +2460,7 @@ perl -x patchlevel.h \ 'Fedora Patch42: Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267)' \ 'Fedora Patch43: Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350)' \ 'Fedora Patch44: Fix parsing perl options in shell bang line (RT#129336)' \ + 'Fedora Patch45: Fix firstchar bitmap under UTF-8 with prefix optimization (RT#129950)' \ 'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \ 'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \ %{nil} @@ -4714,6 +4721,7 @@ popd - Fix string overrun in Perl_gv_fetchmethod_pvn_flags (RT#129267) - Fix crash when matching UTF-8 string with non-UTF-8 substrings (RT#129350) - Fix parsing perl options in shell bang line (RT#129336) +- Fix firstchar bitmap under UTF-8 with prefix optimization (RT#129950) * Wed Aug 03 2016 Jitka Plesnikova <jples...@redhat.com> - 4:5.22.2-362 - Avoid loading optional modules from default . (CVE-2016-1238) -- cgit v0.12 http://pkgs.fedoraproject.org/cgit/perl.git/commit/?h=f24&id=296892300c7325612a9b0d9d44ca40ee45a04147 _______________________________________________ perl-devel mailing list -- perl-devel@lists.fedoraproject.org To unsubscribe send an email to perl-devel-le...@lists.fedoraproject.org