Patch 8.2.1461
Problem:    Vim9: string indexes are counted in bytes.
Solution:   Use character indexes. (closes #6574)
Files:      runtime/doc/eval.txt, src/eval.c, src/proto/eval.pro,
            src/vim9execute.c, src/eval.c, src/testdir/test_vim9_expr.vim


*** ../vim-8.2.1460/runtime/doc/eval.txt        2020-08-09 14:03:51.541367942 
+0200
--- runtime/doc/eval.txt        2020-08-15 18:24:40.799380817 +0200
***************
*** 1128,1146 ****
  
  expr8[expr1]          item of String or |List|        *expr-[]* *E111*
                                                        *E909* *subscript*
  If expr8 is a Number or String this results in a String that contains the
! expr1'th single byte from expr8.  expr8 is used as a String, expr1 as a
! Number.  This doesn't recognize multi-byte encodings, see `byteidx()` for
! an alternative, or use `split()` to turn the string into a list of characters.
! 
! Index zero gives the first byte.  This is like it works in C.  Careful:
! text column numbers start with one!  Example, to get the byte under the
! cursor: >
        :let c = getline(".")[col(".") - 1]
  
  If the length of the String is less than the index, the result is an empty
  String.  A negative index always results in an empty string (reason: backward
! compatibility).  Use [-1:] to get the last byte.
  
  If expr8 is a |List| then it results the item at index expr1.  See 
|list-index|
  for possible index values.  If the index is out of range this results in an
--- 1131,1155 ----
  
  expr8[expr1]          item of String or |List|        *expr-[]* *E111*
                                                        *E909* *subscript*
+ In legacy Vim script:
  If expr8 is a Number or String this results in a String that contains the
! expr1'th single byte from expr8.  expr8 is used as a String (a number is
! automatically converted to a String), expr1 as a Number.  This doesn't
! recognize multi-byte encodings, see `byteidx()` for an alternative, or use
! `split()` to turn the string into a list of characters.  Example, to get the
! byte under the cursor: >
        :let c = getline(".")[col(".") - 1]
  
+ In Vim9 script:
+ If expr8 is a String this results in a String that contains the expr1'th
+ single character from expr8.  To use byte indexes use |strpart()|.
+ 
+ Index zero gives the first byte or character.  Careful: text column numbers
+ start with one!
+ 
  If the length of the String is less than the index, the result is an empty
  String.  A negative index always results in an empty string (reason: backward
! compatibility).  Use [-1:] to get the last byte or character.
  
  If expr8 is a |List| then it results the item at index expr1.  See 
|list-index|
  for possible index values.  If the index is out of range this results in an
***************
*** 1154,1163 ****
  
  expr8[expr1a : expr1b]        substring or sublist            *expr-[:]*
  
! If expr8 is a Number or String this results in the substring with the bytes
! from expr1a to and including expr1b.  expr8 is used as a String, expr1a and
! expr1b are used as a Number.  This doesn't recognize multi-byte encodings, see
! |byteidx()| for computing the indexes.
  
  If expr1a is omitted zero is used.  If expr1b is omitted the length of the
  string minus one is used.
--- 1163,1178 ----
  
  expr8[expr1a : expr1b]        substring or sublist            *expr-[:]*
  
! If expr8 is a String this results in the substring with the bytes from expr1a
! to and including expr1b.  expr8 is used as a String, expr1a and expr1b are
! used as a Number.
! 
! In legacy Vim script the indexes are byte indexes.  This doesn't recognize
! multi-byte encodings, see |byteidx()| for computing the indexes.  If expr8 is
! a Number it is first converted to a String.
! 
! In Vim9 script the indexes are character indexes.  To use byte indexes use
! |strpart()|.
  
  If expr1a is omitted zero is used.  If expr1b is omitted the length of the
  string minus one is used.
*** ../vim-8.2.1460/src/eval.c  2020-08-15 16:33:24.501747305 +0200
--- src/eval.c  2020-08-15 18:32:40.363506218 +0200
***************
*** 3718,3723 ****
--- 3718,3727 ----
                    else
                        s = vim_strnsave(s + n1, n2 - n1 + 1);
                }
+               else if (in_vim9script())
+               {
+                   s = char_from_string(s, n1);
+               }
                else
                {
                    // The resulting variable is a string of a single
***************
*** 5285,5290 ****
--- 5289,5318 ----
  }
  
  /*
+  * Return the character "str[index]" where "index" is the character index.  If
+  * "index" is out of range NULL is returned.
+  */
+     char_u *
+ char_from_string(char_u *str, varnumber_T index)
+ {
+     size_t        nbyte = 0;
+     varnumber_T           nchar = index;
+     size_t        slen;
+ 
+     if (str == NULL || index < 0)
+       return NULL;
+     slen = STRLEN(str);
+     while (nchar > 0 && nbyte < slen)
+     {
+       nbyte += MB_CPTR2LEN(str + nbyte);
+       --nchar;
+     }
+     if (nbyte >= slen)
+       return NULL;
+     return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte));
+ }
+ 
+ /*
   * Handle:
   * - expr[expr], expr[expr:expr] subscript
   * - ".name" lookup
*** ../vim-8.2.1460/src/proto/eval.pro  2020-07-27 21:43:24.137946109 +0200
--- src/proto/eval.pro  2020-08-15 17:08:27.359565655 +0200
***************
*** 59,64 ****
--- 59,65 ----
  int eval_isnamec(int c);
  int eval_isnamec1(int c);
  int eval_isdictc(int c);
+ char_u *char_from_string(char_u *str, varnumber_T index);
  int handle_subscript(char_u **arg, typval_T *rettv, evalarg_T *evalarg, int 
verbose);
  int item_copy(typval_T *from, typval_T *to, int deep, int copyID);
  void echo_one(typval_T *rettv, int with_space, int *atstart, int *needclr);
*** ../vim-8.2.1460/src/vim9execute.c   2020-08-15 16:33:24.497747330 +0200
--- src/vim9execute.c   2020-08-15 17:11:00.345567711 +0200
***************
*** 2233,2239 ****
  
            case ISN_STRINDEX:
                {
-                   char_u      *s;
                    varnumber_T n;
                    char_u      *res;
  
--- 2233,2238 ----
***************
*** 2245,2251 ****
                        emsg(_(e_stringreq));
                        goto on_error;
                    }
-                   s = tv->vval.v_string;
  
                    tv = STACK_TV_BOT(-1);
                    if (tv->v_type != VAR_NUMBER)
--- 2244,2249 ----
***************
*** 2259,2270 ****
                    // The resulting variable is a string of a single
                    // character.  If the index is too big or negative the
                    // result is empty.
-                   if (n < 0 || n >= (varnumber_T)STRLEN(s))
-                       res = NULL;
-                   else
-                       res = vim_strnsave(s + n, 1);
                    --ectx.ec_stack.ga_len;
                    tv = STACK_TV_BOT(-1);
                    vim_free(tv->vval.v_string);
                    tv->vval.v_string = res;
                }
--- 2257,2265 ----
                    // The resulting variable is a string of a single
                    // character.  If the index is too big or negative the
                    // result is empty.
                    --ectx.ec_stack.ga_len;
                    tv = STACK_TV_BOT(-1);
+                   res = char_from_string(tv->vval.v_string, n);
                    vim_free(tv->vval.v_string);
                    tv->vval.v_string = res;
                }
*** ../vim-8.2.1460/src/eval.c  2020-08-15 16:33:24.501747305 +0200
--- src/eval.c  2020-08-15 18:32:40.363506218 +0200
***************
*** 3718,3723 ****
--- 3718,3727 ----
                    else
                        s = vim_strnsave(s + n1, n2 - n1 + 1);
                }
+               else if (in_vim9script())
+               {
+                   s = char_from_string(s, n1);
+               }
                else
                {
                    // The resulting variable is a string of a single
***************
*** 5285,5290 ****
--- 5289,5318 ----
  }
  
  /*
+  * Return the character "str[index]" where "index" is the character index.  If
+  * "index" is out of range NULL is returned.
+  */
+     char_u *
+ char_from_string(char_u *str, varnumber_T index)
+ {
+     size_t        nbyte = 0;
+     varnumber_T           nchar = index;
+     size_t        slen;
+ 
+     if (str == NULL || index < 0)
+       return NULL;
+     slen = STRLEN(str);
+     while (nchar > 0 && nbyte < slen)
+     {
+       nbyte += MB_CPTR2LEN(str + nbyte);
+       --nchar;
+     }
+     if (nbyte >= slen)
+       return NULL;
+     return vim_strnsave(str + nbyte, MB_CPTR2LEN(str + nbyte));
+ }
+ 
+ /*
   * Handle:
   * - expr[expr], expr[expr:expr] subscript
   * - ".name" lookup
*** ../vim-8.2.1460/src/testdir/test_vim9_expr.vim      2020-08-15 
16:33:24.501747305 +0200
--- src/testdir/test_vim9_expr.vim      2020-08-15 17:16:48.649794487 +0200
***************
*** 2075,2086 ****
  enddef
  
  def Test_expr7_subscript()
!   let text = 'abcdef'
!   assert_equal('', text[-1])
!   assert_equal('a', text[0])
!   assert_equal('e', text[4])
!   assert_equal('f', text[5])
!   assert_equal('', text[6])
  enddef
  
  def Test_expr7_subscript_linebreak()
--- 2075,2102 ----
  enddef
  
  def Test_expr7_subscript()
!   let lines =<< trim END
!     let text = 'abcdef'
!     assert_equal('', text[-1])
!     assert_equal('a', text[0])
!     assert_equal('e', text[4])
!     assert_equal('f', text[5])
!     assert_equal('', text[6])
! 
!     text = 'ábçdëf'
!     assert_equal('', text[-999])
!     assert_equal('', text[-1])
!     assert_equal('á', text[0])
!     assert_equal('b', text[1])
!     assert_equal('ç', text[2])
!     assert_equal('d', text[3])
!     assert_equal('ë', text[4])
!     assert_equal('f', text[5])
!     assert_equal('', text[6])
!     assert_equal('', text[999])
!   END
!   CheckDefSuccess(lines)
!   CheckScriptSuccess(['vim9script'] + lines)
  enddef
  
  def Test_expr7_subscript_linebreak()
*** ../vim-8.2.1460/src/version.c       2020-08-15 16:33:24.505747282 +0200
--- src/version.c       2020-08-15 17:08:22.507633662 +0200
***************
*** 756,757 ****
--- 756,759 ----
  {   /* Add new patch number below this line */
+ /**/
+     1461,
  /**/

-- 
hundred-and-one symptoms of being an internet addict:
210. When you get a divorce, you don't care about who gets the children,
     but discuss endlessly who can use the email address.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/vim_dev/202008151639.07FGdWVG342455%40masaka.moolenaar.net.

Raspunde prin e-mail lui