Patch 9.0.1485

Bram Moolenaar Mon, 24 Apr 2023 13:10:40 -0700


Patch 9.0.1485
Problem:    no functions for converting from/to UTF-16 index.
Solution:   Add UTF-16 flag to existing funtions and add strutf16len() and
            utf16idx(). (Yegappan Lakshmanan, closes #12216)
Files:      runtime/doc/builtin.txt, runtime/doc/eval.txt,
            runtime/doc/usr_41.txt, src/evalfunc.c, src/strings.c,
            src/proto/strings.pro, src/testdir/test_functions.vim



*** ../vim-9.0.1484/runtime/doc/builtin.txt     2023-03-16 21:43:07.339227880 
+0000
--- runtime/doc/builtin.txt     2023-04-24 20:31:26.200724135 +0100
***************
*** 81,88 ****
  bufwinid({buf})                       Number  window ID of buffer {buf}
  bufwinnr({buf})                       Number  window number of buffer {buf}
  byte2line({byte})             Number  line number at byte count {byte}
! byteidx({expr}, {nr})         Number  byte index of {nr}'th char in {expr}
! byteidxcomp({expr}, {nr})     Number  byte index of {nr}'th char in {expr}
  call({func}, {arglist} [, {dict}])
                                any     call {func} with arguments {arglist}
  ceil({expr})                  Float   round {expr} up
--- 81,90 ----
  bufwinid({buf})                       Number  window ID of buffer {buf}
  bufwinnr({buf})                       Number  window number of buffer {buf}
  byte2line({byte})             Number  line number at byte count {byte}
! byteidx({expr}, {nr} [, {utf16}])
!                               Number  byte index of {nr}'th char in {expr}
! byteidxcomp({expr}, {nr} [, {utf16}])
!                               Number  byte index of {nr}'th char in {expr}
  call({func}, {arglist} [, {dict}])
                                any     call {func} with arguments {arglist}
  ceil({expr})                  Float   round {expr} up
***************
*** 117,123 ****
  char2nr({expr} [, {utf8}])    Number  ASCII/UTF-8 value of first char in 
{expr}
  charclass({string})           Number  character class of {string}
  charcol({expr} [, {winid}])   Number  column number of cursor or mark
! charidx({string}, {idx} [, {countcc}])
                                Number  char index of byte {idx} in {string}
  chdir({dir})                  String  change current working directory
  cindent({lnum})                       Number  C indent for line {lnum}
--- 119,125 ----
  char2nr({expr} [, {utf8}])    Number  ASCII/UTF-8 value of first char in 
{expr}
  charclass({string})           Number  character class of {string}
  charcol({expr} [, {winid}])   Number  column number of cursor or mark
! charidx({string}, {idx} [, {countcc} [, {utf16}]])
                                Number  char index of byte {idx} in {string}
  chdir({dir})                  String  change current working directory
  cindent({lnum})                       Number  C indent for line {lnum}
***************
*** 604,609 ****
--- 606,613 ----
  strridx({haystack}, {needle} [, {start}])
                                Number  last index of {needle} in {haystack}
  strtrans({expr})              String  translate string to make it printable
+ strutf16len({string} [, {countcc}])
+                               Number  number of UTF-16 code units in {string}
  strwidth({expr})              Number  display cell length of the String {expr}
  submatch({nr} [, {list}])     String or List
                                        specific match in ":s" or substitute()
***************
*** 704,709 ****
--- 708,715 ----
  undotree()                    List    undo file tree
  uniq({list} [, {func} [, {dict}]])
                                List    remove adjacent duplicates from a list
+ utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
+                               Number  UTF-16 index of byte {idx} in {string}
  values({dict})                        List    values in {dict}
  virtcol({expr} [, {list}])    Number or List
                                        screen column of cursor or mark
***************
*** 1363,1369 ****
  <             {not available when compiled without the |+byte_offset|
                feature}
  
! byteidx({expr}, {nr})                                 *byteidx()*
                Return byte index of the {nr}'th character in the String
                {expr}.  Use zero for the first character, it then returns
                zero.
--- 1369,1375 ----
  <             {not available when compiled without the |+byte_offset|
                feature}
  
! byteidx({expr}, {nr} [, {utf16}])                     *byteidx()*
                Return byte index of the {nr}'th character in the String
                {expr}.  Use zero for the first character, it then returns
                zero.
***************
*** 1373,1378 ****
--- 1379,1391 ----
                length is added to the preceding base character.  See
                |byteidxcomp()| below for counting composing characters
                separately.
+               When {utf16} is present and TRUE, {nr} is used as the UTF-16
+               index in the String {expr} instead of as the character index.
+               The UTF-16 index is the index in the string when it is encoded
+               with 16-bit words.  If the specified UTF-16 index is in the
+               middle of a character (e.g. in a 4-byte character), then the
+               byte index of the first byte in the character is returned.
+               Refer to |string-offset-encoding| for more information.
                Example : >
                        echo matchstr(str, ".", byteidx(str, 3))
  <             will display the fourth character.  Another way to do the
***************
*** 1384,1394 ****
                If there are less than {nr} characters -1 is returned.
                If there are exactly {nr} characters the length of the string
                in bytes is returned.
! 
                Can also be used as a |method|: >
                        GetName()->byteidx(idx)
  
! byteidxcomp({expr}, {nr})                                     *byteidxcomp()*
                Like byteidx(), except that a composing character is counted
                as a separate character.  Example: >
                        let s = 'e' .. nr2char(0x301)
--- 1397,1413 ----
                If there are less than {nr} characters -1 is returned.
                If there are exactly {nr} characters the length of the string
                in bytes is returned.
!               See |charidx()| and |utf16idx()| for getting the character and
!               UTF-16 index respectively from the byte index.
!               Examples: >
!                       echo byteidx('a😊😊', 2)  returns 5
!                       echo byteidx('a😊😊', 2, 1)       returns 1
!                       echo byteidx('a😊😊', 3, 1)       returns 5
! <
                Can also be used as a |method|: >
                        GetName()->byteidx(idx)
  
! byteidxcomp({expr}, {nr} [, {utf16}])                 *byteidxcomp()*
                Like byteidx(), except that a composing character is counted
                as a separate character.  Example: >
                        let s = 'e' .. nr2char(0x301)
***************
*** 1493,1519 ****
                        GetPos()->col()
  <
                                                        *charidx()*
! charidx({string}, {idx} [, {countcc}])
                Return the character index of the byte at {idx} in {string}.
                The index of the first character is zero.
                If there are no multibyte characters the returned value is
                equal to {idx}.
                When {countcc} is omitted or |FALSE|, then composing characters
!               are not counted separately, their byte length is
!               added to the preceding base character.
                When {countcc} is |TRUE|, then composing characters are
                counted as separate characters.
                Returns -1 if the arguments are invalid or if {idx} is greater
                than the index of the last byte in {string}.  An error is
                given if the first argument is not a string, the second
                argument is not a number or when the third argument is present
                and is not zero or one.
                See |byteidx()| and |byteidxcomp()| for getting the byte index
!               from the character index.
                Examples: >
                        echo charidx('áb́ć', 3)               returns 1
                        echo charidx('áb́ć', 6, 1)    returns 4
                        echo charidx('áb́ć', 16)              returns -1
  <
                Can also be used as a |method|: >
                        GetName()->charidx(idx)
--- 1512,1547 ----
                        GetPos()->col()
  <
                                                        *charidx()*
! charidx({string}, {idx} [, {countcc} [, {utf16}]])
                Return the character index of the byte at {idx} in {string}.
                The index of the first character is zero.
                If there are no multibyte characters the returned value is
                equal to {idx}.
+ 
                When {countcc} is omitted or |FALSE|, then composing characters
!               are not counted separately, their byte length is added to the
!               preceding base character.
                When {countcc} is |TRUE|, then composing characters are
                counted as separate characters.
+ 
+               When {utf16} is present and TRUE, {idx} is used as the UTF-16
+               index in the String {expr} instead of as the byte index.
+ 
                Returns -1 if the arguments are invalid or if {idx} is greater
                than the index of the last byte in {string}.  An error is
                given if the first argument is not a string, the second
                argument is not a number or when the third argument is present
                and is not zero or one.
+ 
                See |byteidx()| and |byteidxcomp()| for getting the byte index
!               from the character index and |utf16idx()| for getting the
!               UTF-16 index from the character index.
!               Refer to |string-offset-encoding| for more information.
                Examples: >
                        echo charidx('áb́ć', 3)               returns 1
                        echo charidx('áb́ć', 6, 1)    returns 4
                        echo charidx('áb́ć', 16)              returns -1
+                       echo charidx('a😊😊', 4, 0, 1)    returns 2
  <
                Can also be used as a |method|: >
                        GetName()->charidx(idx)
***************
*** 9236,9241 ****
--- 9272,9299 ----
                Can also be used as a |method|: >
                        GetString()->strtrans()
  
+ strutf16len({string} [, {countcc}])                   *strutf16len()*
+               The result is a Number, which is the number of UTF-16 code
+               units in String {string} (after converting it to UTF-16).
+ 
+               When {countcc} is TRUE, composing characters are counted
+               separately.
+               When {countcc} is omitted or FALSE, composing characters are
+               ignored.
+ 
+               Returns zero on error.
+ 
+               Also see |strlen()| and |strcharlen()|.
+               Examples: >
+                   echo strutf16len('a')               returns 1
+                   echo strutf16len('©')               returns 1
+                   echo strutf16len('😊')               returns 2
+                   echo strutf16len('ą́')             returns 1
+                   echo strutf16len('ą́', v:true)     returns 3
+ 
+               Can also be used as a |method|: >
+                       GetText()->strutf16len()
+ <
  strwidth({string})                                    *strwidth()*
                The result is a Number, which is the number of display cells
                String {string} occupies.  A Tab character is counted as one
***************
*** 10049,10054 ****
--- 10109,10142 ----
  
                Can also be used as a |method|: >
                        mylist->uniq()
+ <
+                                                       *utf16idx()*
+ utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
+               Same as |charidx()| but returns the UTF-16 index of the byte
+               at {idx} in {string} (after converting it to UTF-16).
+ 
+               When {charidx} is present and TRUE, {idx} is used as the
+               character index in the String {string} instead of as the byte
+               index.
+               An {idx} in the middle of a UTF-8 sequence is rounded upwards
+               to the end of that sequence.
+ 
+               See |byteidx()| and |byteidxcomp()| for getting the byte index
+               from the UTF-16 index and |charidx()| for getting the
+               character index from the UTF-16 index.
+               Refer to |string-offset-encoding| for more information.
+               Examples: >
+                       echo utf16idx('a😊😊', 3) returns 2
+                       echo utf16idx('a😊😊', 7) returns 4
+                       echo utf16idx('a😊😊', 1, 0, 1)   returns 2
+                       echo utf16idx('a😊😊', 2, 0, 1)   returns 4
+                       echo utf16idx('aą́c', 6)               returns 2
+                       echo utf16idx('aą́c', 6, 1)    returns 4
+                       echo utf16idx('a😊😊', 9) returns -1
+ <
+               Can also be used as a |method|: >
+                       GetName()->utf16idx(idx)
+ 
  
  values({dict})                                                *values()*
                Return a |List| with all the values of {dict}.  The |List| is
*** ../vim-9.0.1484/runtime/doc/eval.txt        2023-01-12 21:07:58.636905098 
+0000
--- runtime/doc/eval.txt        2023-04-24 20:53:21.609044564 +0100
***************
*** 1574,1579 ****
--- 1580,1612 ----
        echo $"The square root of {{9}} is {sqrt(9)}"
  <     The square root of {9} is 3.0 ~
  
+                                               *string-offset-encoding*
+ A string consists of multiple characters.  How the characters are stored
+ depends on 'encoding'.  Most common is UTF-8, which uses one byte for ASCII
+ characters, two bytes for other latin characters and more bytes for other
+ characters.
+ 
+ A string offset can count characters or bytes.  Other programs may use
+ UTF-16 encoding (16-bit words) and an offset of UTF-16 words.  Some functions
+ use byte offsets, usually for UTF-8 encoding.  Other functions use character
+ offsets, in which case the encoding doesn't matter.
+ 
+ The different offsets for the string "a©😊" are below:
+ 
+   UTF-8 offsets:
+       [0]: 61, [1]: C2, [2]: A9, [3]: F0, [4]: 9F, [5]: 98, [6]: 8A
+   UTF-16 offsets:
+       [0]: 0061, [1]: 00A9, [2]: D83D, [3]: DE0A
+   UTF-32 (character) offsets:
+       [0]: 00000061, [1]: 000000A9, [2]: 0001F60A
+ 
+ You can use the "g8" and "ga" commands on a character to see the
+ decimal/hex/octal values.
+ 
+ The functions |byteidx()|, |utf16idx()| and |charidx()| can be used to convert
+ between these indices.  The functions |strlen()|, |strutf16len()| and
+ |strcharlen()| return the number of bytes, UTF-16 code units and characters in
+ a string respectively.
  
  option                                                *expr-option* *E112* 
*E113*
  ------
*** ../vim-9.0.1484/runtime/doc/usr_41.txt      2023-01-17 18:31:20.423373305 
+0000
--- runtime/doc/usr_41.txt      2023-04-24 20:22:12.664400625 +0100
***************
*** 753,758 ****
--- 754,760 ----
        strlen()                length of a string in bytes
        strcharlen()            length of a string in characters
        strchars()              number of characters in a string
+       strutf16len()           number of UTF-16 code units in a string
        strwidth()              size of string when displayed
        strdisplaywidth()       size of string when displayed, deals with tabs
        setcellwidths()         set character cell width overrides
***************
*** 770,775 ****
--- 772,778 ----
        byteidx()               byte index of a character in a string
        byteidxcomp()           like byteidx() but count composing characters
        charidx()               character index of a byte in a string
+       utf16idx()              UTF-16 index of a byte in a string
        repeat()                repeat a string multiple times
        eval()                  evaluate a string expression
        execute()               execute an Ex command and get the output
*** ../vim-9.0.1484/src/evalfunc.c      2023-04-13 19:15:50.023391985 +0100
--- src/evalfunc.c      2023-04-24 20:22:12.664400625 +0100
***************
*** 1751,1759 ****
                        ret_number,         f_bufwinnr},
      {"byte2line",     1, 1, FEARG_1,      arg1_number,
                        ret_number,         f_byte2line},
!     {"byteidx",               2, 2, FEARG_1,      arg2_string_number,
                        ret_number,         f_byteidx},
!     {"byteidxcomp",   2, 2, FEARG_1,      arg2_string_number,
                        ret_number,         f_byteidxcomp},
      {"call",          2, 3, FEARG_1,      arg3_any_list_dict,
                        ret_any,            f_call},
--- 1751,1759 ----
                        ret_number,         f_bufwinnr},
      {"byte2line",     1, 1, FEARG_1,      arg1_number,
                        ret_number,         f_byte2line},
!     {"byteidx",               2, 3, FEARG_1,      arg3_string_number_bool,
                        ret_number,         f_byteidx},
!     {"byteidxcomp",   2, 3, FEARG_1,      arg3_string_number_bool,
                        ret_number,         f_byteidxcomp},
      {"call",          2, 3, FEARG_1,      arg3_any_list_dict,
                        ret_any,            f_call},
***************
*** 1803,1809 ****
                        ret_number,         f_charclass},
      {"charcol",               1, 2, FEARG_1,      arg2_string_or_list_number,
                        ret_number,         f_charcol},
!     {"charidx",               2, 3, FEARG_1,      arg3_string_number_bool,
                        ret_number,         f_charidx},
      {"chdir",         1, 1, FEARG_1,      arg1_string,
                        ret_string,         f_chdir},
--- 1803,1809 ----
                        ret_number,         f_charclass},
      {"charcol",               1, 2, FEARG_1,      arg2_string_or_list_number,
                        ret_number,         f_charcol},
!     {"charidx",               2, 4, FEARG_1,      arg3_string_number_bool,
                        ret_number,         f_charidx},
      {"chdir",         1, 1, FEARG_1,      arg1_string,
                        ret_string,         f_chdir},
***************
*** 2601,2606 ****
--- 2601,2608 ----
                        ret_number,         f_strridx},
      {"strtrans",      1, 1, FEARG_1,      arg1_string,
                        ret_string,         f_strtrans},
+     {"strutf16len",   1, 2, FEARG_1,      arg2_string_bool,
+                       ret_number,         f_strutf16len},
      {"strwidth",      1, 1, FEARG_1,      arg1_string,
                        ret_number,         f_strwidth},
      {"submatch",      1, 2, FEARG_1,      arg2_number_bool,
***************
*** 2785,2790 ****
--- 2787,2794 ----
                        ret_dict_any,       f_undotree},
      {"uniq",          1, 3, FEARG_1,      arg13_sortuniq,
                        ret_first_arg,      f_uniq},
+     {"utf16idx",      2, 4, FEARG_1,      arg3_string_number_bool,
+                       ret_number,         f_utf16idx},
      {"values",                1, 1, FEARG_1,      arg1_dict_any,
                        ret_list_member,    f_values},
      {"virtcol",               1, 2, FEARG_1,      arg2_string_or_list_bool,
*** ../vim-9.0.1484/src/strings.c       2023-04-16 20:53:50.189171575 +0100
--- src/strings.c       2023-04-24 21:00:25.749101228 +0100
***************
*** 1006,1015 ****
      static void
  byteidx(typval_T *argvars, typval_T *rettv, int comp UNUSED)
  {
-     char_u    *t;
-     char_u    *str;
-     varnumber_T       idx;
- 
      rettv->vval.v_number = -1;
  
      if (in_vim9script()
--- 1006,1011 ----
***************
*** 1017,1036 ****
                || check_for_number_arg(argvars, 1) == FAIL))
        return;
  
!     str = tv_get_string_chk(&argvars[0]);
!     idx = tv_get_number_chk(&argvars[1], NULL);
      if (str == NULL || idx < 0)
        return;
  
!     t = str;
      for ( ; idx > 0; idx--)
      {
        if (*t == NUL)          // EOL reached
            return;
!       if (enc_utf8 && comp)
!           t += utf_ptr2len(t);
!       else
!           t += (*mb_ptr2len)(t);
      }
      rettv->vval.v_number = (varnumber_T)(t - str);
  }
--- 1013,1054 ----
                || check_for_number_arg(argvars, 1) == FAIL))
        return;
  
!     char_u *str = tv_get_string_chk(&argvars[0]);
!     varnumber_T       idx = tv_get_number_chk(&argvars[1], NULL);
      if (str == NULL || idx < 0)
        return;
  
!     varnumber_T       utf16idx = FALSE;
!     if (argvars[2].v_type != VAR_UNKNOWN)
!     {
!       utf16idx = tv_get_bool(&argvars[2]);
!       if (utf16idx < 0 || utf16idx > 1)
!       {
!           semsg(_(e_using_number_as_bool_nr), utf16idx);
!           return;
!       }
!     }
! 
!     int (*ptr2len)(char_u *);
!     if (enc_utf8 && comp)
!       ptr2len = utf_ptr2len;
!     else
!       ptr2len = mb_ptr2len;
! 
!     char_u *t = str;
      for ( ; idx > 0; idx--)
      {
        if (*t == NUL)          // EOL reached
            return;
!       if (utf16idx)
!       {
!           int clen = ptr2len(t);
!           int c = (clen > 1) ? utf_ptr2char(t) : *t;
!           if (c > 0xFFFF)
!               idx--;
!       }
!       if (idx > 0)
!           t += ptr2len(t);
      }
      rettv->vval.v_number = (varnumber_T)(t - str);
  }
***************
*** 1059,1100 ****
      void
  f_charidx(typval_T *argvars, typval_T *rettv)
  {
-     char_u    *str;
-     varnumber_T       idx;
-     varnumber_T       countcc = FALSE;
-     char_u    *p;
-     int               len;
-     int               (*ptr2len)(char_u *);
- 
      rettv->vval.v_number = -1;
  
!     if ((check_for_string_arg(argvars, 0) == FAIL
                || check_for_number_arg(argvars, 1) == FAIL
!               || check_for_opt_bool_arg(argvars, 2) == FAIL))
        return;
  
!     str = tv_get_string_chk(&argvars[0]);
!     idx = tv_get_number_chk(&argvars[1], NULL);
      if (str == NULL || idx < 0)
        return;
  
      if (argvars[2].v_type != VAR_UNKNOWN)
-       countcc = tv_get_bool(&argvars[2]);
-     if (countcc < 0 || countcc > 1)
      {
!       semsg(_(e_using_number_as_bool_nr), countcc);
!       return;
      }
  
      if (enc_utf8 && countcc)
        ptr2len = utf_ptr2len;
      else
        ptr2len = mb_ptr2len;
  
!     for (p = str, len = 0; p <= str + idx; len++)
      {
        if (*p == NUL)
            return;
        p += ptr2len(p);
      }
  
--- 1077,1125 ----
      void
  f_charidx(typval_T *argvars, typval_T *rettv)
  {
      rettv->vval.v_number = -1;
  
!     if (check_for_string_arg(argvars, 0) == FAIL
                || check_for_number_arg(argvars, 1) == FAIL
!               || check_for_opt_bool_arg(argvars, 2) == FAIL
!               || (argvars[2].v_type != VAR_UNKNOWN
!                   && check_for_opt_bool_arg(argvars, 3) == FAIL))
        return;
  
!     char_u *str = tv_get_string_chk(&argvars[0]);
!     varnumber_T       idx = tv_get_number_chk(&argvars[1], NULL);
      if (str == NULL || idx < 0)
        return;
  
+     varnumber_T       countcc = FALSE;
+     varnumber_T       utf16idx = FALSE;
      if (argvars[2].v_type != VAR_UNKNOWN)
      {
!       countcc = tv_get_bool(&argvars[2]);
!       if (argvars[3].v_type != VAR_UNKNOWN)
!           utf16idx = tv_get_bool(&argvars[3]);
      }
  
+     int (*ptr2len)(char_u *);
      if (enc_utf8 && countcc)
        ptr2len = utf_ptr2len;
      else
        ptr2len = mb_ptr2len;
  
!     char_u    *p;
!     int               len;
!     for (p = str, len = 0; utf16idx ? idx >= 0 : p <= str + idx; len++)
      {
        if (*p == NUL)
            return;
+       if (utf16idx)
+       {
+           idx--;
+           int clen = ptr2len(p);
+           int c = (clen > 1) ? utf_ptr2char(p) : *p;
+           if (c > 0xFFFF)
+               idx--;
+       }
        p += ptr2len(p);
      }
  
***************
*** 1359,1364 ****
--- 1384,1421 ----
  }
  
  /*
+  * "strutf16len()" function
+  */
+     void
+ f_strutf16len(typval_T *argvars, typval_T *rettv)
+ {
+     rettv->vval.v_number = -1;
+ 
+     if (check_for_string_arg(argvars, 0) == FAIL
+           || check_for_opt_bool_arg(argvars, 1) == FAIL)
+       return;
+ 
+     varnumber_T countcc = FALSE;
+     if (argvars[1].v_type != VAR_UNKNOWN)
+       countcc = tv_get_bool(&argvars[1]);
+ 
+     char_u            *s = tv_get_string(&argvars[0]);
+     varnumber_T               len = 0;
+     int                       (*func_mb_ptr2char_adv)(char_u **pp);
+     int                       ch;
+ 
+     func_mb_ptr2char_adv = countcc ? mb_cptr2char_adv : mb_ptr2char_adv;
+     while (*s != NUL)
+     {
+       ch = func_mb_ptr2char_adv(&s);
+       if (ch > 0xFFFF)
+           ++len;
+       ++len;
+     }
+     rettv->vval.v_number = len;
+ }
+ 
+ /*
   * "strdisplaywidth()" function
   */
      void
***************
*** 1619,1624 ****
--- 1676,1736 ----
      rettv->vval.v_string = transstr(tv_get_string(&argvars[0]));
  }
  
+ 
+ /*
+  *
+  * "utf16idx()" function
+  */
+     void
+ f_utf16idx(typval_T *argvars, typval_T *rettv)
+ {
+     rettv->vval.v_number = -1;
+ 
+     if (check_for_string_arg(argvars, 0) == FAIL
+           || check_for_opt_number_arg(argvars, 1) == FAIL
+           || check_for_opt_bool_arg(argvars, 2) == FAIL
+           || (argvars[2].v_type != VAR_UNKNOWN
+                   && check_for_opt_bool_arg(argvars, 3) == FAIL))
+           return;
+ 
+     char_u *str = tv_get_string_chk(&argvars[0]);
+     varnumber_T       idx = tv_get_number_chk(&argvars[1], NULL);
+     if (str == NULL || idx < 0)
+       return;
+ 
+     varnumber_T       countcc = FALSE;
+     varnumber_T       charidx = FALSE;
+     if (argvars[2].v_type != VAR_UNKNOWN)
+     {
+       countcc = tv_get_bool(&argvars[2]);
+       if (argvars[3].v_type != VAR_UNKNOWN)
+           charidx = tv_get_bool(&argvars[3]);
+     }
+ 
+     int (*ptr2len)(char_u *);
+     if (enc_utf8 && countcc)
+       ptr2len = utf_ptr2len;
+     else
+       ptr2len = mb_ptr2len;
+ 
+     char_u    *p;
+     int               len;
+     for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++)
+     {
+       if (*p == NUL)
+           return;
+       int clen = ptr2len(p);
+       int c = (clen > 1) ? utf_ptr2char(p) : *p;
+       if (c > 0xFFFF)
+           len++;
+       p += ptr2len(p);
+       if (charidx)
+           idx--;
+     }
+ 
+     rettv->vval.v_number = len > 0 ? len - 1 : 0;
+ }
+ 
  /*
   * "tolower(string)" function
   */
*** ../vim-9.0.1484/src/proto/strings.pro       2023-01-04 15:56:47.868550539 
+0000
--- src/proto/strings.pro       2023-04-24 20:22:12.664400625 +0100
***************
*** 36,47 ****
--- 36,49 ----
  void f_strlen(typval_T *argvars, typval_T *rettv);
  void f_strcharlen(typval_T *argvars, typval_T *rettv);
  void f_strchars(typval_T *argvars, typval_T *rettv);
+ void f_strutf16len(typval_T *argvars, typval_T *rettv);
  void f_strdisplaywidth(typval_T *argvars, typval_T *rettv);
  void f_strwidth(typval_T *argvars, typval_T *rettv);
  void f_strcharpart(typval_T *argvars, typval_T *rettv);
  void f_strpart(typval_T *argvars, typval_T *rettv);
  void f_strridx(typval_T *argvars, typval_T *rettv);
  void f_strtrans(typval_T *argvars, typval_T *rettv);
+ void f_utf16idx(typval_T *argvars, typval_T *rettv);
  void f_tolower(typval_T *argvars, typval_T *rettv);
  void f_toupper(typval_T *argvars, typval_T *rettv);
  void f_tr(typval_T *argvars, typval_T *rettv);
*** ../vim-9.0.1484/src/testdir/test_functions.vim      2023-02-04 
10:58:28.815703377 +0000
--- src/testdir/test_functions.vim      2023-04-24 20:22:12.668400624 +0100
***************
*** 1192,1210 ****
    bw!
  endfunc
  
! " Test for byteidx() and byteidxcomp() functions
  func Test_byteidx()
    let a = '.é.' " one char of two bytes
    call assert_equal(0, byteidx(a, 0))
-   call assert_equal(0, byteidxcomp(a, 0))
    call assert_equal(1, byteidx(a, 1))
-   call assert_equal(1, byteidxcomp(a, 1))
    call assert_equal(3, byteidx(a, 2))
-   call assert_equal(3, byteidxcomp(a, 2))
    call assert_equal(4, byteidx(a, 3))
-   call assert_equal(4, byteidxcomp(a, 3))
    call assert_equal(-1, byteidx(a, 4))
-   call assert_equal(-1, byteidxcomp(a, 4))
  
    let b = '.é.' " normal e with composing char
    call assert_equal(0, b->byteidx(0))
--- 1192,1205 ----
    bw!
  endfunc
  
! " Test for byteidx() using a character index
  func Test_byteidx()
    let a = '.é.' " one char of two bytes
    call assert_equal(0, byteidx(a, 0))
    call assert_equal(1, byteidx(a, 1))
    call assert_equal(3, byteidx(a, 2))
    call assert_equal(4, byteidx(a, 3))
    call assert_equal(-1, byteidx(a, 4))
  
    let b = '.é.' " normal e with composing char
    call assert_equal(0, b->byteidx(0))
***************
*** 1212,1229 ****
    call assert_equal(4, b->byteidx(2))
    call assert_equal(5, b->byteidx(3))
    call assert_equal(-1, b->byteidx(4))
    call assert_fails("call byteidx([], 0)", 'E730:')
  
    call assert_equal(0, b->byteidxcomp(0))
    call assert_equal(1, b->byteidxcomp(1))
    call assert_equal(2, b->byteidxcomp(2))
    call assert_equal(4, b->byteidxcomp(3))
    call assert_equal(5, b->byteidxcomp(4))
    call assert_equal(-1, b->byteidxcomp(5))
    call assert_fails("call byteidxcomp([], 0)", 'E730:')
  endfunc
  
! " Test for charidx()
  func Test_charidx()
    let a = 'xáb́y'
    call assert_equal(0, charidx(a, 0))
--- 1207,1390 ----
    call assert_equal(4, b->byteidx(2))
    call assert_equal(5, b->byteidx(3))
    call assert_equal(-1, b->byteidx(4))
+ 
+   " string with multiple composing characters
+   let str = '-ą́-ą́'
+   call assert_equal(0, byteidx(str, 0))
+   call assert_equal(1, byteidx(str, 1))
+   call assert_equal(6, byteidx(str, 2))
+   call assert_equal(7, byteidx(str, 3))
+   call assert_equal(12, byteidx(str, 4))
+   call assert_equal(-1, byteidx(str, 5))
+ 
+   " empty string
+   call assert_equal(0, byteidx('', 0))
+   call assert_equal(-1, byteidx('', 1))
+ 
+   " error cases
    call assert_fails("call byteidx([], 0)", 'E730:')
+   call assert_fails("call byteidx('abc', [])", 'E745:')
+ endfunc
  
+ " Test for byteidxcomp() using a character index
+ func Test_byteidxcomp()
+   let a = '.é.' " one char of two bytes
+   call assert_equal(0, byteidxcomp(a, 0))
+   call assert_equal(1, byteidxcomp(a, 1))
+   call assert_equal(3, byteidxcomp(a, 2))
+   call assert_equal(4, byteidxcomp(a, 3))
+   call assert_equal(-1, byteidxcomp(a, 4))
+ 
+   let b = '.é.' " normal e with composing char
    call assert_equal(0, b->byteidxcomp(0))
    call assert_equal(1, b->byteidxcomp(1))
    call assert_equal(2, b->byteidxcomp(2))
    call assert_equal(4, b->byteidxcomp(3))
    call assert_equal(5, b->byteidxcomp(4))
    call assert_equal(-1, b->byteidxcomp(5))
+ 
+   " string with multiple composing characters
+   let str = '-ą́-ą́'
+   call assert_equal(0, byteidxcomp(str, 0))
+   call assert_equal(1, byteidxcomp(str, 1))
+   call assert_equal(2, byteidxcomp(str, 2))
+   call assert_equal(4, byteidxcomp(str, 3))
+   call assert_equal(6, byteidxcomp(str, 4))
+   call assert_equal(7, byteidxcomp(str, 5))
+   call assert_equal(8, byteidxcomp(str, 6))
+   call assert_equal(10, byteidxcomp(str, 7))
+   call assert_equal(12, byteidxcomp(str, 8))
+   call assert_equal(-1, byteidxcomp(str, 9))
+ 
+   " empty string
+   call assert_equal(0, byteidxcomp('', 0))
+   call assert_equal(-1, byteidxcomp('', 1))
+ 
+   " error cases
    call assert_fails("call byteidxcomp([], 0)", 'E730:')
+   call assert_fails("call byteidxcomp('abc', [])", 'E745:')
  endfunc
  
! " Test for byteidx() using a UTF-16 index
! func Test_byteidx_from_utf16_index()
!   " string with single byte characters
!   let str = "abc"
!   for i in range(3)
!     call assert_equal(i, byteidx(str, i, v:true))
!   endfor
!   call assert_equal(3, byteidx(str, 3, v:true))
!   call assert_equal(-1, byteidx(str, 4, v:true))
! 
!   " string with two byte characters
!   let str = "a©©b"
!   call assert_equal(0, byteidx(str, 0, v:true))
!   call assert_equal(1, byteidx(str, 1, v:true))
!   call assert_equal(3, byteidx(str, 2, v:true))
!   call assert_equal(5, byteidx(str, 3, v:true))
!   call assert_equal(6, byteidx(str, 4, v:true))
!   call assert_equal(-1, byteidx(str, 5, v:true))
! 
!   " string with two byte characters
!   let str = "a😊😊b"
!   call assert_equal(0, byteidx(str, 0, v:true))
!   call assert_equal(1, byteidx(str, 1, v:true))
!   call assert_equal(1, byteidx(str, 2, v:true))
!   call assert_equal(5, byteidx(str, 3, v:true))
!   call assert_equal(5, byteidx(str, 4, v:true))
!   call assert_equal(9, byteidx(str, 5, v:true))
!   call assert_equal(10, byteidx(str, 6, v:true))
!   call assert_equal(-1, byteidx(str, 7, v:true))
! 
!   " string with composing characters
!   let str = '-á-b́'
!   call assert_equal(0, byteidx(str, 0, v:true))
!   call assert_equal(1, byteidx(str, 1, v:true))
!   call assert_equal(4, byteidx(str, 2, v:true))
!   call assert_equal(5, byteidx(str, 3, v:true))
!   call assert_equal(8, byteidx(str, 4, v:true))
!   call assert_equal(-1, byteidx(str, 5, v:true))
! 
!   " string with multiple composing characters
!   let str = '-ą́-ą́'
!   call assert_equal(0, byteidx(str, 0, v:true))
!   call assert_equal(1, byteidx(str, 1, v:true))
!   call assert_equal(6, byteidx(str, 2, v:true))
!   call assert_equal(7, byteidx(str, 3, v:true))
!   call assert_equal(12, byteidx(str, 4, v:true))
!   call assert_equal(-1, byteidx(str, 5, v:true))
! 
!   " empty string
!   call assert_equal(0, byteidx('', 0, v:true))
!   call assert_equal(-1, byteidx('', 1, v:true))
! 
!   " error cases
!   call assert_fails('call byteidx(str, 0, [])', 'E745:')
! endfunc
! 
! " Test for byteidxcomp() using a UTF-16 index
! func Test_byteidxcomp_from_utf16_index()
!   " string with single byte characters
!   let str = "abc"
!   for i in range(3)
!     call assert_equal(i, byteidxcomp(str, i, v:true))
!   endfor
!   call assert_equal(3, byteidxcomp(str, 3, v:true))
!   call assert_equal(-1, byteidxcomp(str, 4, v:true))
! 
!   " string with two byte characters
!   let str = "a©©b"
!   call assert_equal(0, byteidxcomp(str, 0, v:true))
!   call assert_equal(1, byteidxcomp(str, 1, v:true))
!   call assert_equal(3, byteidxcomp(str, 2, v:true))
!   call assert_equal(5, byteidxcomp(str, 3, v:true))
!   call assert_equal(6, byteidxcomp(str, 4, v:true))
!   call assert_equal(-1, byteidxcomp(str, 5, v:true))
! 
!   " string with two byte characters
!   let str = "a😊😊b"
!   call assert_equal(0, byteidxcomp(str, 0, v:true))
!   call assert_equal(1, byteidxcomp(str, 1, v:true))
!   call assert_equal(1, byteidxcomp(str, 2, v:true))
!   call assert_equal(5, byteidxcomp(str, 3, v:true))
!   call assert_equal(5, byteidxcomp(str, 4, v:true))
!   call assert_equal(9, byteidxcomp(str, 5, v:true))
!   call assert_equal(10, byteidxcomp(str, 6, v:true))
!   call assert_equal(-1, byteidxcomp(str, 7, v:true))
! 
!   " string with composing characters
!   let str = '-á-b́'
!   call assert_equal(0, byteidxcomp(str, 0, v:true))
!   call assert_equal(1, byteidxcomp(str, 1, v:true))
!   call assert_equal(2, byteidxcomp(str, 2, v:true))
!   call assert_equal(4, byteidxcomp(str, 3, v:true))
!   call assert_equal(5, byteidxcomp(str, 4, v:true))
!   call assert_equal(6, byteidxcomp(str, 5, v:true))
!   call assert_equal(8, byteidxcomp(str, 6, v:true))
!   call assert_equal(-1, byteidxcomp(str, 7, v:true))
!   call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
! 
!   " string with multiple composing characters
!   let str = '-ą́-ą́'
!   call assert_equal(0, byteidxcomp(str, 0, v:true))
!   call assert_equal(1, byteidxcomp(str, 1, v:true))
!   call assert_equal(2, byteidxcomp(str, 2, v:true))
!   call assert_equal(4, byteidxcomp(str, 3, v:true))
!   call assert_equal(6, byteidxcomp(str, 4, v:true))
!   call assert_equal(7, byteidxcomp(str, 5, v:true))
!   call assert_equal(8, byteidxcomp(str, 6, v:true))
!   call assert_equal(10, byteidxcomp(str, 7, v:true))
!   call assert_equal(12, byteidxcomp(str, 8, v:true))
!   call assert_equal(-1, byteidxcomp(str, 9, v:true))
! 
!   " empty string
!   call assert_equal(0, byteidxcomp('', 0, v:true))
!   call assert_equal(-1, byteidxcomp('', 1, v:true))
! 
!   " error cases
!   call assert_fails('call byteidxcomp(str, 0, [])', 'E745:')
! endfunc
! 
! " Test for charidx() using a byte index
  func Test_charidx()
    let a = 'xáb́y'
    call assert_equal(0, charidx(a, 0))
***************
*** 1232,1248 ****
    call assert_equal(3, charidx(a, 7))
    call assert_equal(-1, charidx(a, 8))
    call assert_equal(-1, charidx(a, -1))
-   call assert_equal(-1, charidx('', 0))
-   call assert_equal(-1, charidx(test_null_string(), 0))
  
    " count composing characters
!   call assert_equal(0, charidx(a, 0, 1))
!   call assert_equal(2, charidx(a, 2, 1))
!   call assert_equal(3, charidx(a, 4, 1))
!   call assert_equal(5, charidx(a, 7, 1))
!   call assert_equal(-1, charidx(a, 8, 1))
    call assert_equal(-1, charidx('', 0, 1))
  
    call assert_fails('let x = charidx([], 1)', 'E1174:')
    call assert_fails('let x = charidx("abc", [])', 'E1210:')
    call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
--- 1393,1412 ----
    call assert_equal(3, charidx(a, 7))
    call assert_equal(-1, charidx(a, 8))
    call assert_equal(-1, charidx(a, -1))
  
    " count composing characters
!   call assert_equal(0, a->charidx(0, 1))
!   call assert_equal(2, a->charidx(2, 1))
!   call assert_equal(3, a->charidx(4, 1))
!   call assert_equal(5, a->charidx(7, 1))
!   call assert_equal(-1, a->charidx(8, 1))
! 
!   " empty string
!   call assert_equal(-1, charidx('', 0))
    call assert_equal(-1, charidx('', 0, 1))
  
+   " error cases
+   call assert_equal(-1, charidx(test_null_string(), 0))
    call assert_fails('let x = charidx([], 1)', 'E1174:')
    call assert_fails('let x = charidx("abc", [])', 'E1210:')
    call assert_fails('let x = charidx("abc", 1, [])', 'E1212:')
***************
*** 1250,1255 ****
--- 1414,1650 ----
    call assert_fails('let x = charidx("abc", 1, 2)', 'E1212:')
  endfunc
  
+ " Test for charidx() using a UTF-16 index
+ func Test_charidx_from_utf16_index()
+   " string with single byte characters
+   let str = "abc"
+   for i in range(3)
+     call assert_equal(i, charidx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, charidx(str, 3, v:false, v:true))
+ 
+   " string with two byte characters
+   let str = "a©©b"
+   call assert_equal(0, charidx(str, 0, v:false, v:true))
+   call assert_equal(1, charidx(str, 1, v:false, v:true))
+   call assert_equal(2, charidx(str, 2, v:false, v:true))
+   call assert_equal(3, charidx(str, 3, v:false, v:true))
+   call assert_equal(-1, charidx(str, 4, v:false, v:true))
+ 
+   " string with four byte characters
+   let str = "a😊😊b"
+   call assert_equal(0, charidx(str, 0, v:false, v:true))
+   call assert_equal(1, charidx(str, 1, v:false, v:true))
+   call assert_equal(1, charidx(str, 2, v:false, v:true))
+   call assert_equal(2, charidx(str, 3, v:false, v:true))
+   call assert_equal(2, charidx(str, 4, v:false, v:true))
+   call assert_equal(3, charidx(str, 5, v:false, v:true))
+   call assert_equal(-1, charidx(str, 6, v:false, v:true))
+ 
+   " string with composing characters
+   let str = '-á-b́'
+   for i in str->strcharlen()->range()
+     call assert_equal(i, charidx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, charidx(str, 4, v:false, v:true))
+   for i in str->strchars()->range()
+     call assert_equal(i, charidx(str, i, v:true, v:true))
+   endfor
+   call assert_equal(-1, charidx(str, 6, v:true, v:true))
+ 
+   " string with multiple composing characters
+   let str = '-ą́-ą́'
+   for i in str->strcharlen()->range()
+     call assert_equal(i, charidx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, charidx(str, 4, v:false, v:true))
+   for i in str->strchars()->range()
+     call assert_equal(i, charidx(str, i, v:true, v:true))
+   endfor
+   call assert_equal(-1, charidx(str, 8, v:true, v:true))
+ 
+   " empty string
+   call assert_equal(-1, charidx('', 0, v:false, v:true))
+   call assert_equal(-1, charidx('', 0, v:true, v:true))
+ 
+   " error cases
+   call assert_equal(-1, charidx('', 0, v:false, v:true))
+   call assert_equal(-1, charidx('', 0, v:true, v:true))
+   call assert_equal(-1, charidx(test_null_string(), 0, v:false, v:true))
+   call assert_fails('let x = charidx("abc", 1, v:false, [])', 'E1212:')
+   call assert_fails('let x = charidx("abc", 1, v:true, [])', 'E1212:')
+ endfunc
+ 
+ " Test for utf16idx() using a byte index
+ func Test_utf16idx_from_byteidx()
+   " UTF-16 index of a string with single byte characters
+   let str = "abc"
+   for i in range(3)
+     call assert_equal(i, utf16idx(str, i))
+   endfor
+   call assert_equal(-1, utf16idx(str, 3))
+ 
+   " UTF-16 index of a string with two byte characters
+   let str = 'a©©b'
+   call assert_equal(0, str->utf16idx(0))
+   call assert_equal(1, str->utf16idx(1))
+   call assert_equal(1, str->utf16idx(2))
+   call assert_equal(2, str->utf16idx(3))
+   call assert_equal(2, str->utf16idx(4))
+   call assert_equal(3, str->utf16idx(5))
+   call assert_equal(-1, str->utf16idx(6))
+ 
+   " UTF-16 index of a string with four byte characters
+   let str = 'a😊😊b'
+   call assert_equal(0, utf16idx(str, 0))
+   call assert_equal(2, utf16idx(str, 1))
+   call assert_equal(2, utf16idx(str, 2))
+   call assert_equal(2, utf16idx(str, 3))
+   call assert_equal(2, utf16idx(str, 4))
+   call assert_equal(4, utf16idx(str, 5))
+   call assert_equal(4, utf16idx(str, 6))
+   call assert_equal(4, utf16idx(str, 7))
+   call assert_equal(4, utf16idx(str, 8))
+   call assert_equal(5, utf16idx(str, 9))
+   call assert_equal(-1, utf16idx(str, 10))
+ 
+   " UTF-16 index of a string with composing characters
+   let str = '-á-b́'
+   call assert_equal(0, utf16idx(str, 0))
+   call assert_equal(1, utf16idx(str, 1))
+   call assert_equal(1, utf16idx(str, 2))
+   call assert_equal(1, utf16idx(str, 3))
+   call assert_equal(2, utf16idx(str, 4))
+   call assert_equal(3, utf16idx(str, 5))
+   call assert_equal(3, utf16idx(str, 6))
+   call assert_equal(3, utf16idx(str, 7))
+   call assert_equal(-1, utf16idx(str, 8))
+   call assert_equal(0, utf16idx(str, 0, v:true))
+   call assert_equal(1, utf16idx(str, 1, v:true))
+   call assert_equal(2, utf16idx(str, 2, v:true))
+   call assert_equal(2, utf16idx(str, 3, v:true))
+   call assert_equal(3, utf16idx(str, 4, v:true))
+   call assert_equal(4, utf16idx(str, 5, v:true))
+   call assert_equal(5, utf16idx(str, 6, v:true))
+   call assert_equal(5, utf16idx(str, 7, v:true))
+   call assert_equal(-1, utf16idx(str, 8, v:true))
+ 
+   " string with multiple composing characters
+   let str = '-ą́-ą́'
+   call assert_equal(0, utf16idx(str, 0))
+   call assert_equal(1, utf16idx(str, 1))
+   call assert_equal(1, utf16idx(str, 2))
+   call assert_equal(1, utf16idx(str, 3))
+   call assert_equal(1, utf16idx(str, 4))
+   call assert_equal(1, utf16idx(str, 5))
+   call assert_equal(2, utf16idx(str, 6))
+   call assert_equal(3, utf16idx(str, 7))
+   call assert_equal(3, utf16idx(str, 8))
+   call assert_equal(3, utf16idx(str, 9))
+   call assert_equal(3, utf16idx(str, 10))
+   call assert_equal(3, utf16idx(str, 11))
+   call assert_equal(-1, utf16idx(str, 12))
+   call assert_equal(0, utf16idx(str, 0, v:true))
+   call assert_equal(1, utf16idx(str, 1, v:true))
+   call assert_equal(2, utf16idx(str, 2, v:true))
+   call assert_equal(2, utf16idx(str, 3, v:true))
+   call assert_equal(3, utf16idx(str, 4, v:true))
+   call assert_equal(3, utf16idx(str, 5, v:true))
+   call assert_equal(4, utf16idx(str, 6, v:true))
+   call assert_equal(5, utf16idx(str, 7, v:true))
+   call assert_equal(6, utf16idx(str, 8, v:true))
+   call assert_equal(6, utf16idx(str, 9, v:true))
+   call assert_equal(7, utf16idx(str, 10, v:true))
+   call assert_equal(7, utf16idx(str, 11, v:true))
+   call assert_equal(-1, utf16idx(str, 12, v:true))
+ 
+   " empty string
+   call assert_equal(-1, utf16idx('', 0))
+   call assert_equal(-1, utf16idx('', 0, v:true))
+ 
+   " error cases
+   call assert_equal(-1, utf16idx("", 0))
+   call assert_equal(-1, utf16idx("abc", -1))
+   call assert_equal(-1, utf16idx(test_null_string(), 0))
+   call assert_fails('let l = utf16idx([], 0)', 'E1174:')
+   call assert_fails('let l = utf16idx("ab", [])', 'E1210:')
+   call assert_fails('let l = utf16idx("ab", 0, [])', 'E1212:')
+ endfunc
+ 
+ " Test for utf16idx() using a character index
+ func Test_utf16idx_from_charidx()
+   let str = "abc"
+   for i in str->strcharlen()->range()
+     call assert_equal(i, utf16idx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, utf16idx(str, 3, v:false, v:true))
+ 
+   " UTF-16 index of a string with two byte characters
+   let str = "a©©b"
+   for i in str->strcharlen()->range()
+     call assert_equal(i, utf16idx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+ 
+   " UTF-16 index of a string with four byte characters
+   let str = "a😊😊b"
+   call assert_equal(0, utf16idx(str, 0, v:false, v:true))
+   call assert_equal(2, utf16idx(str, 1, v:false, v:true))
+   call assert_equal(4, utf16idx(str, 2, v:false, v:true))
+   call assert_equal(5, utf16idx(str, 3, v:false, v:true))
+   call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+ 
+   " UTF-16 index of a string with composing characters
+   let str = '-á-b́'
+   for i in str->strcharlen()->range()
+     call assert_equal(i, utf16idx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+   for i in str->strchars()->range()
+     call assert_equal(i, utf16idx(str, i, v:true, v:true))
+   endfor
+   call assert_equal(-1, utf16idx(str, 6, v:true, v:true))
+ 
+   " string with multiple composing characters
+   let str = '-ą́-ą́'
+   for i in str->strcharlen()->range()
+     call assert_equal(i, utf16idx(str, i, v:false, v:true))
+   endfor
+   call assert_equal(-1, utf16idx(str, 4, v:false, v:true))
+   for i in str->strchars()->range()
+     call assert_equal(i, utf16idx(str, i, v:true, v:true))
+   endfor
+   call assert_equal(-1, utf16idx(str, 8, v:true, v:true))
+ 
+   " empty string
+   call assert_equal(-1, utf16idx('', 0, v:false, v:true))
+   call assert_equal(-1, utf16idx('', 0, v:true, v:true))
+ 
+   " error cases
+   call assert_equal(-1, utf16idx(test_null_string(), 0, v:true, v:true))
+   call assert_fails('let l = utf16idx("ab", 0, v:false, [])', 'E1212:')
+ endfunc
+ 
+ " Test for strutf16len()
+ func Test_strutf16len()
+   call assert_equal(3, strutf16len('abc'))
+   call assert_equal(3, 'abc'->strutf16len(v:true))
+   call assert_equal(4, strutf16len('a©©b'))
+   call assert_equal(4, strutf16len('a©©b', v:true))
+   call assert_equal(6, strutf16len('a😊😊b'))
+   call assert_equal(6, strutf16len('a😊😊b', v:true))
+   call assert_equal(4, strutf16len('-á-b́'))
+   call assert_equal(6, strutf16len('-á-b́', v:true))
+   call assert_equal(4, strutf16len('-ą́-ą́'))
+   call assert_equal(8, strutf16len('-ą́-ą́', v:true))
+   call assert_equal(0, strutf16len(''))
+ 
+   " error cases
+   call assert_fails('let l = strutf16len([])', 'E1174:')
+   call assert_fails('let l = strutf16len("a", [])', 'E1212:')
+   call assert_equal(0, strutf16len(test_null_string()))
+ endfunc
+ 
  func Test_count()
    let l = ['a', 'a', 'A', 'b']
    call assert_equal(2, count(l, 'a'))
***************
*** 3074,3078 ****
    call StopVimInTerminal(buf)
  endfunc
  
- 
  " vim: shiftwidth=2 sts=2 expandtab
--- 3469,3472 ----
*** ../vim-9.0.1484/src/version.c       2023-04-24 18:11:32.156258651 +0100
--- src/version.c       2023-04-24 20:24:11.132470661 +0100
***************
*** 697,698 ****
--- 697,700 ----
  {   /* Add new patch number below this line */
+ /**/
+     1485,
  /**/

-- 
It might look like I'm doing nothing, but at the cellular level
I'm really quite busy.

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///                                                                      \\\
\\\        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/vim_dev/20230424201032.DF14A1C074F%40moolenaar.net.

Patch 9.0.1485

Raspunde prin e-mail lui