In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/8842e230bdaeef0ab4af48b7d7a9b29ee40b3890?hp=5d9574c10ca04dcefc1cac5441cb321f7bb4cc37>

- Log -----------------------------------------------------------------
commit 8842e230bdaeef0ab4af48b7d7a9b29ee40b3890
Author: Karl Williamson <[email protected]>
Date:   Wed Mar 12 14:11:58 2014 -0600

    regcomp.c: Don't read past string-end
    
    In doing an audit of regcomp.c, and experimenting using
    Encode::_utf8_on(), I found this one instance of a regen/regcharclass.pl
    macro that could read beyond the end of the string if given malformed
    UTF-8.  Hence we convert to use the 'safe' form.  There are no other
    uses of the non-safe version, so don't need to generate them.
-----------------------------------------------------------------------

Summary of changes:
 regcharclass.h        | 26 --------------------------
 regcomp.c             |  2 +-
 regen/regcharclass.pl |  2 +-
 3 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/regcharclass.h b/regcharclass.h
index b0f635d..5e34ec0 100644
--- a/regcharclass.h
+++ b/regcharclass.h
@@ -723,15 +723,6 @@
        \p{PatWS}
 */
 /*** GENERATED CODE ***/
-#define is_PATWS(s,is_utf8)                                                 \
-( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && NATIVE_TO_LATIN1(((U8*)s)[0]) 
<= 0x0D ) || 0x20 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\
-: ( is_utf8 ) ?                                                             \
-    ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?                           \
-       ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )               \
-    : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == 
NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) 
== 0x8E || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == ... [20 chars truncated]
-: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) )
-
-/*** GENERATED CODE ***/
 #define is_PATWS_safe(s,e,is_utf8)                                          \
 ( ((e) > (s)) ?                                                             \
     ( ( ( 0x09 <= NATIVE_TO_LATIN1(((U8*)s)[0]) && 
NATIVE_TO_LATIN1(((U8*)s)[0]) <= 0x0D ) || 0x20 == 
NATIVE_TO_LATIN1(((U8*)s)[0]) ) ? 1\
@@ -745,14 +736,6 @@
 : 0 )
 
 /*** GENERATED CODE ***/
-#define is_PATWS_non_low(s,is_utf8)                                         \
-( ( is_utf8 ) ?                                                             \
-    ( ( 0xC2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) ?                           \
-       ( ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[1]) ) ? 2 : 0 )               \
-    : ( ( ( 0xE2 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) && ( 0x80 == 
NATIVE_TO_LATIN1(((U8*)s)[1]) ) ) && ( ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) 
== 0x8E || ( NATIVE_TO_LATIN1(((U8*)s)[2]) & 0xFE ) == ... [20 chars truncated]
-: ( 0x85 == NATIVE_TO_LATIN1(((U8*)s)[0]) ) )
-
-/*** GENERATED CODE ***/
 #define is_PATWS_non_low_safe(s,e,is_utf8)                                  \
 ( ((e) > (s)) ?                                                             \
     ( (! is_utf8) ?                                                         \
@@ -773,15 +756,6 @@
 ( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) &&            \
 ( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) 
) )
 
-/*** GENERATED CODE ***/
-#define is_PATWS_cp(cp)                                                     \
-( ( 0x09 <= NATIVE_TO_UNI(cp) && NATIVE_TO_UNI(cp) <= 0x0D ) || ( 0x0D < 
NATIVE_TO_UNI(cp) &&\
-( 0x20 == NATIVE_TO_UNI(cp) || ( 0x20 < NATIVE_TO_UNI(cp) &&                \
-( 0x85 == NATIVE_TO_UNI(cp) || ( 0x85 < NATIVE_TO_UNI(cp) &&                \
-( 0x200E == NATIVE_TO_UNI(cp) || ( 0x200E < NATIVE_TO_UNI(cp) &&            \
-( 0x200F == NATIVE_TO_UNI(cp) || ( 0x200F < NATIVE_TO_UNI(cp) &&            \
-( 0x2028 == NATIVE_TO_UNI(cp) || 0x2029 == NATIVE_TO_UNI(cp) ) ) ) ) ) ) ) ) ) 
) )
-
 
 #endif /* H_REGCHARCLASS */
 
diff --git a/regcomp.c b/regcomp.c
index d72d344..ca2ffb8 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -11920,7 +11920,7 @@ tryagain:
                     if (! SIZE_ONLY
                         && RExC_flags & RXf_PMf_EXTENDED
                         && ckWARN_d(WARN_DEPRECATED)
-                        && is_PATWS_non_low(p, UTF))
+                        && is_PATWS_non_low_safe(p, RExC_end, UTF))
                     {
                         vWARN_dep(p + ((UTF) ? UTF8SKIP(p) : 1),
                                 "Escape literal pattern white space under /x");
diff --git a/regen/regcharclass.pl b/regen/regcharclass.pl
index fa947a3..187023a 100755
--- a/regen/regcharclass.pl
+++ b/regen/regcharclass.pl
@@ -1652,5 +1652,5 @@ PROBLEMATIC_LOCALE_FOLDEDS_START : The first folded 
character of folds which are
 \p{_Perl_Problematic_Locale_Foldeds_Start}
 
 PATWS: pattern white space
-=> generic generic_non_low cp : fast safe
+=> generic generic_non_low cp : safe
 \p{PatWS}

--
Perl5 Master Repository

Reply via email to