In perl.git, the branch blead has been updated

<https://perl5.git.perl.org/perl.git/commitdiff/3ed3004ae659b0360a49bd586680461ab3b6a6b7?hp=2f145a2d09c96f4005a7cd54f706cc76a1fecbbb>

- Log -----------------------------------------------------------------
commit 3ed3004ae659b0360a49bd586680461ab3b6a6b7
Author: Yves Orton <demer...@gmail.com>
Date:   Wed Feb 14 10:29:26 2018 +1100

    fix TRIE_READ_CHAR and DECL_TRIE_TYPE to account for non-utf8 target
    
    This is the third commit involved in [perl #132063, and the bottom line
    cause of it.  The problem is that the code is incorrectly branching to a
    portion of the code that expects it is handling UTF-8. And the input
    isn't UTF-8.  The fix is to handle this case and branch correctly.  This
    bug requires the following things in order to manifest:
        1) the pattern is compiled under /il
        2) the pattern does not contain any characters below 256
        3) the target string is not UTF-8.
    
    (The committer changed the test to test this issue on EBCDIC, as the
    original \xFF is an invariant there that wouldn't exercise the problem.
    We want a start byte for a long UTF-8 sequence for a single character.
    On the EBCDIC pages we support, \xFE fits that bill.

commit 9ad8cac45829d8cd51c25f8c6abf8c591514d7e8
Author: Karl Williamson <k...@cpan.org>
Date:   Tue Feb 6 14:50:48 2018 -0700

    Subject: PATCH: [perl #132063]: Heap buffer overflow
    
    There were three things that were fixed as a result of this ticket, any
    one of which would have avoided the issue.
    
    Commit 421da25c4318861925129cd1b17263289db3443c already has fixed
    one of those.  The issue was reading beyond the end of a buffer, and
    that commit keeps from reading beyond a NUL, which normally should be
    present, marking the end of the buffer.
    
    This commit fixes the issue where the code was told that reading that
    many bytes was ok to do.  This is several instances in regexec.c of the
    code assuming that the input was valid UTF-8, whereas the input was too
    short for what the start byte claimed it would be.
    
    I grepped through the core for any other similar uses, and did not find
    any.
    
    The next commit will fix the third thing.

-----------------------------------------------------------------------

Summary of changes:
 regexec.c     | 47 ++++++++++++++++++++++++++++-------------------
 t/re/re_tests |  2 ++
 2 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/regexec.c b/regexec.c
index 4b537f6872..9a5e87e9e5 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1781,7 +1781,7 @@ Perl_re_intuit_start(pTHX_
 #define DECL_TRIE_TYPE(scan) \
     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold,  
     \
                  trie_utf8_exactfa_fold, trie_latin_utf8_exactfa_fold,         
     \
-                 trie_utf8l, trie_flu8 }                                       
     \
+                 trie_utf8l, trie_flu8, trie_flu8_latin }                      
     \
                     trie_type = ((scan->flags == EXACT)                        
     \
                                  ? (utf8_target ? trie_utf8 : trie_plain)      
     \
                                  : (scan->flags == EXACTL)                     
     \
@@ -1791,20 +1791,24 @@ Perl_re_intuit_start(pTHX_
                                          ? trie_utf8_exactfa_fold              
     \
                                          : trie_latin_utf8_exactfa_fold)       
     \
                                       : (scan->flags == EXACTFLU8              
     \
-                                         ? trie_flu8                           
     \
+                                         ? (utf8_target                        
     \
+                                           ? trie_flu8                         
     \
+                                           : trie_flu8_latin)                  
     \
                                          : (utf8_target                        
     \
                                            ? trie_utf8_fold                    
     \
-                                           :   trie_latin_utf8_fold)))
+                                           : trie_latin_utf8_fold)))
 
-#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len, 
uvc, charid, foldlen, foldbuf, uniflags) \
+/* 'uscan' is set to foldbuf, and incremented, so below the end of uscan is
+ * 'foldbuf+sizeof(foldbuf)' */
+#define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uc_end, uscan, 
len, uvc, charid, foldlen, foldbuf, uniflags) \
 STMT_START {                                                                   
     \
     STRLEN skiplen;                                                            
     \
     U8 flags = FOLD_FLAGS_FULL;                                                
     \
     switch (trie_type) {                                                       
     \
     case trie_flu8:                                                            
     \
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                    
     \
-        if (utf8_target && UTF8_IS_ABOVE_LATIN1(*uc)) {                        
     \
-            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc + UTF8SKIP(uc));     
     \
+        if (UTF8_IS_ABOVE_LATIN1(*uc)) {                                       
     \
+            _CHECK_AND_OUTPUT_WIDE_LOCALE_UTF8_MSG(uc, uc_end - uc);           
     \
         }                                                                      
     \
         goto do_trie_utf8_fold;                                                
     \
     case trie_utf8_exactfa_fold:                                               
     \
@@ -1813,25 +1817,29 @@ STMT_START {
     case trie_utf8_fold:                                                       
     \
       do_trie_utf8_fold:                                                       
     \
         if ( foldlen>0 ) {                                                     
     \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, 
uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags 
);     \
             foldlen -= len;                                                    
     \
             uscan += len;                                                      
     \
             len=0;                                                             
     \
         } else {                                                               
     \
-            len = UTF8SKIP(uc);                                                
     \
-            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc + len, foldbuf, 
&foldlen,  \
+            uvc = _toFOLD_utf8_flags( (const U8*) uc, uc_end, foldbuf, 
&foldlen,    \
                                                                             
flags); \
+            len = UTF8SKIP(uc);                                                
     \
             skiplen = UVCHR_SKIP( uvc );                                       
     \
             foldlen -= skiplen;                                                
     \
             uscan = foldbuf + skiplen;                                         
     \
         }                                                                      
     \
         break;                                                                 
     \
+    case trie_flu8_latin:                                                      
     \
+        _CHECK_AND_WARN_PROBLEMATIC_LOCALE;                                    
     \
+        goto do_trie_latin_utf8_fold;                                          
     \
     case trie_latin_utf8_exactfa_fold:                                         
     \
         flags |= FOLD_FLAGS_NOMIX_ASCII;                                       
     \
         /* FALLTHROUGH */                                                      
     \
     case trie_latin_utf8_fold:                                                 
     \
+      do_trie_latin_utf8_fold:                                                 
     \
         if ( foldlen>0 ) {                                                     
     \
-            uvc = utf8n_to_uvchr( (const U8*) uscan, UTF8_MAXLEN, &len, 
uniflags ); \
+            uvc = utf8n_to_uvchr( (const U8*) uscan, foldlen, &len, uniflags 
);     \
             foldlen -= len;                                                    
     \
             uscan += len;                                                      
     \
             len=0;                                                             
     \
@@ -1850,7 +1858,7 @@ STMT_START {
         }                                                                      
     \
         /* FALLTHROUGH */                                                      
     \
     case trie_utf8:                                                            
     \
-        uvc = utf8n_to_uvchr( (const U8*) uc, UTF8_MAXLEN, &len, uniflags );   
     \
+        uvc = utf8n_to_uvchr( (const U8*) uc, uc_end - uc, &len, uniflags );   
     \
         break;                                                                 
     \
     case trie_plain:                                                           
     \
         uvc = (UV)*uc;                                                         
     \
@@ -2971,10 +2979,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
                     }
                     points[pointpos++ % maxlen]= uc;
                     if (foldlen || uc < (U8*)strend) {
-                        REXEC_TRIE_READ_CHAR(trie_type, trie,
-                                         widecharmap, uc,
-                                         uscan, len, uvc, charid, foldlen,
-                                         foldbuf, uniflags);
+                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
+                                             (U8 *) strend, uscan, len, uvc,
+                                             charid, foldlen, foldbuf,
+                                             uniflags);
                         DEBUG_TRIE_EXECUTE_r({
                             dump_exec_pos( (char *)uc, c, strend,
                                         real_start, s, utf8_target, 0);
@@ -6003,8 +6011,9 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
                    if ( base && (foldlen || uc < (U8*)(reginfo->strend))) {
                        I32 offset;
                        REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
-                                            uscan, len, uvc, charid, foldlen,
-                                            foldbuf, uniflags);
+                                             (U8 *) reginfo->strend, uscan,
+                                             len, uvc, charid, foldlen,
+                                             foldbuf, uniflags);
                        charcount++;
                        if (foldlen>0)
                            ST.longfold = TRUE;
@@ -6139,8 +6148,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
                        while (foldlen) {
                            if (!--chars)
                                break;
-                           uvc = utf8n_to_uvchr(uscan, UTF8_MAXLEN, &len,
-                                           uniflags);
+                           uvc = utf8n_to_uvchr(uscan, foldlen, &len,
+                                                 uniflags);
                            uscan += len;
                            foldlen -= len;
                        }
diff --git a/t/re/re_tests b/t/re/re_tests
index 54de12024c..3fd24ff572 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1991,6 +1991,8 @@ AB\s+\x{100}      AB \x{100}X     y       -       -
 [[:^ascii:]]+b \x80a\x81\x{100}b       y       $&      \x81\x{100}b
 /\A\x80+\z/    \x80\x80\x80\x80\x80\x80\x80\x80\x80    y       $&      
\x80\x80\x80\x80\x80\x80\x80\x80\x80            # [perl #132900]
 ^(\d+)*?4X$    1234X   y       $1      123     # perl #131648
+(?il)\x{100}|\x{100}|\x{FE}    \xFE    y       $&      \xFE
+
 
 # Keep these lines at the end of the file
 # vim: softtabstop=0 noexpandtab

-- 
Perl5 Master Repository

Reply via email to