Patch 7.1.310

Bram Moolenaar Thu, 05 Jun 2008 14:57:26 -0700


Patch 7.1.310
Problem:    Incomplete utf-8 byte sequence at end of the file is not detected.
            Accessing memory that wasn't written.
Solution:   Check the last bytes in the buffer for being a valid utf-8
            character. (mostly by Ben Schmidt)
            Also fix that the reported line number of the error was wrong.
Files:      src/fileio.c



*** ../vim-7.1.309/src/fileio.c Wed May  7 19:05:55 2008
--- src/fileio.c        Wed Jun  4 18:28:48 2008
***************
*** 1288,1299 ****
  #ifdef FEAT_MBYTE
                    else if (conv_restlen > 0)
                    {
!                       /* Reached end-of-file but some trailing bytes could
!                        * not be converted.  Truncated file? */
!                       if (conv_error == 0)
!                           conv_error = linecnt;
!                       if (bad_char_behavior != BAD_DROP)
                        {
                            fio_flags = 0;      /* don't convert this */
  # ifdef USE_ICONV
                            if (iconv_fd != (iconv_t)-1)
--- 1288,1336 ----
  #ifdef FEAT_MBYTE
                    else if (conv_restlen > 0)
                    {
!                       /*
!                        * Reached end-of-file but some trailing bytes could
!                        * not be converted.  Truncated file?
!                        */
! 
!                       /* When we did a conversion report an error. */
!                       if (fio_flags != 0
! # ifdef USE_ICONV
!                               || iconv_fd != (iconv_t)-1
! # endif
!                          )
                        {
+                           if (conv_error == 0)
+                               conv_error = curbuf->b_ml.ml_line_count
+                                                               - linecnt + 1;
+                       }
+                       /* Remember the first linenr with an illegal byte */
+                       else if (illegal_byte == 0)
+                           illegal_byte = curbuf->b_ml.ml_line_count
+                                                               - linecnt + 1;
+                       if (bad_char_behavior == BAD_DROP)
+                       {
+                           *(ptr - conv_restlen) = NUL;
+                           conv_restlen = 0;
+                       }
+                       else
+                       {
+                           /* Replace the trailing bytes with the replacement
+                            * character if we were converting; if we weren't,
+                            * leave the UTF8 checking code to do it, as it
+                            * works slightly differently. */
+                           if (bad_char_behavior != BAD_KEEP && (fio_flags != 0
+ # ifdef USE_ICONV
+                                   || iconv_fd != (iconv_t)-1
+ # endif
+                              ))
+                           {
+                               while (conv_restlen > 0)
+                               {
+                                   *(--ptr) = bad_char_behavior;
+                                   --conv_restlen;
+                               }
+                           }
                            fio_flags = 0;      /* don't convert this */
  # ifdef USE_ICONV
                            if (iconv_fd != (iconv_t)-1)
***************
*** 1302,1321 ****
                                iconv_fd = (iconv_t)-1;
                            }
  # endif
-                           if (bad_char_behavior == BAD_KEEP)
-                           {
-                               /* Keep the trailing bytes as-is. */
-                               size = conv_restlen;
-                               ptr -= conv_restlen;
-                           }
-                           else
-                           {
-                               /* Replace the trailing bytes with the
-                                * replacement character. */
-                               size = 1;
-                               *--ptr = bad_char_behavior;
-                           }
-                           conv_restlen = 0;
                        }
                    }
  #endif
--- 1339,1344 ----
***************
*** 1397,1402 ****
--- 1420,1430 ----
                    goto retry;
                }
            }
+ 
+           /* Include not converted bytes. */
+           ptr -= conv_restlen;
+           size += conv_restlen;
+           conv_restlen = 0;
  #endif
            /*
             * Break here for a read error or end-of-file.
***************
*** 1406,1416 ****
  
  #ifdef FEAT_MBYTE
  
-           /* Include not converted bytes. */
-           ptr -= conv_restlen;
-           size += conv_restlen;
-           conv_restlen = 0;
- 
  # ifdef USE_ICONV
            if (iconv_fd != (iconv_t)-1)
            {
--- 1434,1439 ----
***************
*** 1872,1883 ****
                size = (long)((ptr + real_size) - dest);
                ptr = dest;
            }
!           else if (enc_utf8 && conv_error == 0 && !curbuf->b_p_bin)
            {
!               /* Reading UTF-8: Check if the bytes are valid UTF-8.
!                * Need to start before "ptr" when part of the character was
!                * read in the previous read() call. */
!               for (p = ptr - utf_head_off(buffer, ptr); ; ++p)
                {
                    int  todo = (int)((ptr + size) - p);
                    int  l;
--- 1895,1906 ----
                size = (long)((ptr + real_size) - dest);
                ptr = dest;
            }
!           else if (enc_utf8 && !curbuf->b_p_bin)
            {
!               int  incomplete_tail = FALSE;
! 
!               /* Reading UTF-8: Check if the bytes are valid UTF-8. */
!               for (p = ptr; ; ++p)
                {
                    int  todo = (int)((ptr + size) - p);
                    int  l;
***************
*** 1891,1933 ****
                         * read() will get the next bytes, we'll check it
                         * then. */
                        l = utf_ptr2len_len(p, todo);
!                       if (l > todo)
                        {
!                           /* Incomplete byte sequence, the next read()
!                            * should get them and check the bytes. */
!                           p += todo;
!                           break;
                        }
!                       if (l == 1)
                        {
                            /* Illegal byte.  If we can try another encoding
!                            * do that. */
!                           if (can_retry)
                                break;
- 
-                           /* Remember the first linenr with an illegal byte */
-                           if (illegal_byte == 0)
-                               illegal_byte = readfile_linenr(linecnt, ptr, p);
  # ifdef USE_ICONV
                            /* When we did a conversion report an error. */
                            if (iconv_fd != (iconv_t)-1 && conv_error == 0)
                                conv_error = readfile_linenr(linecnt, ptr, p);
  # endif
  
                            /* Drop, keep or replace the bad byte. */
                            if (bad_char_behavior == BAD_DROP)
                            {
!                               mch_memmove(p, p+1, todo - 1);
                                --p;
                                --size;
                            }
                            else if (bad_char_behavior != BAD_KEEP)
                                *p = bad_char_behavior;
                        }
!                       p += l - 1;
                    }
                }
!               if (p < ptr + size)
                {
                    /* Detected a UTF-8 error. */
  rewind_retry:
--- 1914,1969 ----
                         * read() will get the next bytes, we'll check it
                         * then. */
                        l = utf_ptr2len_len(p, todo);
!                       if (l > todo && !incomplete_tail)
                        {
!                           /* Avoid retrying with a different encoding when
!                            * a truncated file is more likely, or attempting
!                            * to read the rest of an incomplete sequence when
!                            * we have already done so. */
!                           if (p > ptr || filesize > 0)
!                               incomplete_tail = TRUE;
!                           /* Incomplete byte sequence, move it to conv_rest[]
!                            * and try to read the rest of it, unless we've
!                            * already done so. */
!                           if (p > ptr)
!                           {
!                               conv_restlen = todo;
!                               mch_memmove(conv_rest, p, conv_restlen);
!                               size -= conv_restlen;
!                               break;
!                           }
                        }
!                       if (l == 1 || l > todo)
                        {
                            /* Illegal byte.  If we can try another encoding
!                            * do that, unless at EOF where a truncated
!                            * file is more likely than a conversion error. */
!                           if (can_retry && !incomplete_tail)
                                break;
  # ifdef USE_ICONV
                            /* When we did a conversion report an error. */
                            if (iconv_fd != (iconv_t)-1 && conv_error == 0)
                                conv_error = readfile_linenr(linecnt, ptr, p);
  # endif
+                           /* Remember the first linenr with an illegal byte */
+                           if (conv_error == 0 && illegal_byte == 0)
+                               illegal_byte = readfile_linenr(linecnt, ptr, p);
  
                            /* Drop, keep or replace the bad byte. */
                            if (bad_char_behavior == BAD_DROP)
                            {
!                               mch_memmove(p, p + 1, todo - 1);
                                --p;
                                --size;
                            }
                            else if (bad_char_behavior != BAD_KEEP)
                                *p = bad_char_behavior;
                        }
!                       else
!                           p += l - 1;
                    }
                }
!               if (p < ptr + size && !incomplete_tail)
                {
                    /* Detected a UTF-8 error. */
  rewind_retry:
*** ../vim-7.1.309/src/version.c        Wed Jun  4 15:27:43 2008
--- src/version.c       Wed Jun  4 19:35:16 2008
***************
*** 668,669 ****
--- 673,676 ----
  {   /* Add new patch number below this line */
+ /**/
+     310,
  /**/

-- 
Normal people believe that if it ain't broke, don't fix it.  Engineers believe
that if it ain't broke, it doesn't have enough features yet.
                                (Scott Adams - The Dilbert principle)

 /// Bram Moolenaar -- [EMAIL PROTECTED] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\        download, build and distribute -- http://www.A-A-P.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

--~--~---------~--~----~------------~-------~--~----~
You received this message from the "vim_dev" maillist.
For more information, visit http://www.vim.org/maillist.php
-~----------~----~----~----~------~----~------~--~---

Patch 7.1.310

Raspunde prin e-mail lui