Patch 8.2.4695
Problem:    JSON encoding could be faster.
Solution:   Optimize encoding JSON strings. (closes #10086)
Files:      src/json.c, src/testdir/test_json.vim


*** ../vim-8.2.4694/src/json.c  2022-04-04 15:16:50.738014123 +0100
--- src/json.c  2022-04-05 15:02:26.659250519 +0100
***************
*** 114,150 ****
  }
  #endif
  
      static void
  write_string(garray_T *gap, char_u *str)
  {
      char_u    *res = str;
      char_u    numbuf[NUMBUFLEN];
  
      if (res == NULL)
-       ga_concat(gap, (char_u *)"\"\"");
-     else
      {
! #if defined(USE_ICONV)
!       vimconv_T   conv;
!       char_u      *converted = NULL;
  
!       if (!enc_utf8)
!       {
!           // Convert the text from 'encoding' to utf-8, the JSON string is
!           // always utf-8.
!           conv.vc_type = CONV_NONE;
!           convert_setup(&conv, p_enc, (char_u*)"utf-8");
!           if (conv.vc_type != CONV_NONE)
!               converted = res = string_convert(&conv, res, NULL);
!           convert_setup(&conv, NULL, NULL);
!       }
  #endif
!       ga_append(gap, '"');
!       while (*res != NUL)
        {
!           int c;
!           // always use utf-8 encoding, ignore 'encoding'
!           c = utf_ptr2char(res);
  
            switch (c)
            {
--- 114,185 ----
  }
  #endif
  
+ /*
+  * Lookup table to quickly know if the given ASCII character must be escaped.
+  */
+ static const char ascii_needs_escape[128] = {
+     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x0.
+     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x1.
+     0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x2.
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x3.
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 0x5.
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x6.
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
+ };
+ 
+ /*
+  * Encode the utf-8 encoded string "str" into "gap".
+  */
      static void
  write_string(garray_T *gap, char_u *str)
  {
      char_u    *res = str;
      char_u    numbuf[NUMBUFLEN];
+     char_u    *from;
+ #if defined(USE_ICONV)
+     vimconv_T   conv;
+     char_u    *converted = NULL;
+ #endif
+     int               c;
  
      if (res == NULL)
      {
!       ga_concat(gap, (char_u *)"\"\"");
!       return;
!     }
  
! #if defined(USE_ICONV)
!     if (!enc_utf8)
!     {
!       // Convert the text from 'encoding' to utf-8, because a JSON string is
!       // always utf-8.
!       conv.vc_type = CONV_NONE;
!       convert_setup(&conv, p_enc, (char_u*)"utf-8");
!       if (conv.vc_type != CONV_NONE)
!           converted = res = string_convert(&conv, res, NULL);
!       convert_setup(&conv, NULL, NULL);
!     }
  #endif
!     ga_append(gap, '"');
!     // `from` is the beginning of a sequence of bytes we can directly copy 
from
!     // the input string, avoiding the overhead associated to decoding/encoding
!     // them.
!     from = res;
!     while ((c = *res) != NUL)
!     {
!       // always use utf-8 encoding, ignore 'encoding'
!       if (c < 0x80)
        {
!           if (!ascii_needs_escape[c])
!           {
!               res += 1;
!               continue;
!           }
! 
!           if (res != from)
!               ga_concat_len(gap, from, res - from);
!           from = res + 1;
  
            switch (c)
            {
***************
*** 164,188 ****
                    ga_append(gap, c);
                    break;
                default:
!                   if (c >= 0x20)
!                   {
!                       numbuf[utf_char2bytes(c, numbuf)] = NUL;
!                       ga_concat(gap, numbuf);
!                   }
!                   else
!                   {
!                       vim_snprintf((char *)numbuf, NUMBUFLEN,
!                                                        "\\u%04lx", (long)c);
!                       ga_concat(gap, numbuf);
!                   }
            }
!           res += utf_ptr2len(res);
        }
!       ga_append(gap, '"');
  #if defined(USE_ICONV)
!       vim_free(converted);
  #endif
-     }
  }
  
  /*
--- 199,241 ----
                    ga_append(gap, c);
                    break;
                default:
!                   vim_snprintf((char *)numbuf, NUMBUFLEN, "\\u%04lx",
!                                                                     (long)c);
!                   ga_concat(gap, numbuf);
            }
! 
!           res += 1;
        }
!       else
!       {
!           int l = utf_ptr2len(res);
! 
!           if (l > 1)
!           {
!               res += l;
!               continue;
!           }
! 
!           // Invalid utf-8 sequence, replace it with the Unicode replacement
!           // character U+FFFD.
!           if (res != from)
!               ga_concat_len(gap, from, res - from);
!           from = res + 1;
! 
!           numbuf[utf_char2bytes(0xFFFD, numbuf)] = NUL;
!           ga_concat(gap, numbuf);
! 
!           res += l;
!       }
!     }
! 
!     if (res != from)
!       ga_concat_len(gap, from, res - from);
! 
!     ga_append(gap, '"');
  #if defined(USE_ICONV)
!     vim_free(converted);
  #endif
  }
  
  /*
*** ../vim-8.2.4694/src/testdir/test_json.vim   2021-02-08 20:53:05.592963320 
+0000
--- src/testdir/test_json.vim   2022-04-05 14:55:22.151639261 +0100
***************
*** 107,112 ****
--- 107,115 ----
    call assert_equal('"café"', json_encode("caf\xe9"))
    let &encoding = save_encoding
  
+   " Invalid utf-8 sequences are replaced with U+FFFD (replacement character)
+   call assert_equal('"foo' . "\ufffd" . '"', json_encode("foo\xAB"))
+ 
    call assert_fails('echo json_encode(function("tr"))', 'E1161: Cannot json 
encode a func')
    call assert_fails('echo json_encode([function("tr")])', 'E1161: Cannot json 
encode a func')
  
*** ../vim-8.2.4694/src/version.c       2022-04-05 15:07:08.210791582 +0100
--- src/version.c       2022-04-05 14:56:43.963566990 +0100
***************
*** 748,749 ****
--- 748,751 ----
  {   /* Add new patch number below this line */
+ /**/
+     4695,
  /**/

-- 
There are only two hard things in programming: Cache invalidation,
naming things and off-by-one errors.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///                                                                      \\\
\\\        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/vim_dev/20220405140946.4B56D1C05DA%40moolenaar.net.

Raspunde prin e-mail lui