patch 9.1.1623: Buffer menu does not handle unicode names correctly

Commit: 
https://github.com/vim/vim/commit/8f9de4991e84dfdc9bcc9dad0eaa2b3544ef963e
Author: Yee Cheng Chin <ychin....@gmail.com>
Date:   Sun Aug 10 10:06:14 2025 +0200

    patch 9.1.1623: Buffer menu does not handle unicode names correctly
    
    Problem:  Buffer menu does not handle unicode names correctly
              (after v9.1.1622)
    Solution: Fix the BMHash() function (Yee Cheng Chin)
    
    The Buffers menu uses a BMHash() function to generate a sortable number
    to be used for the menu index. It used a naive (and incorrect) way of
    encoding multiple ASCII values into a single integer, but assumes each
    character to be only in the ASCII 32-96 range. This means if we use
    non-ASCII file names (e.g. Unicode values like CJK or emojis) we get
    integer underflow and overflow, causing the menu index to wrap around.
    Vim's GUI implementations internally use a signed 32-bit integer for the
    `gui_mch_add_menu_item()` function and so we need to make sure the menu
    index is in the (0, 2^31-1) range.
    
    To do this, if the file name starts with a non-ASCII value, we just use
    the first character's value and set the high bit so it sorts after the
    other ASCII ones. Otherwise, we just take the first 5 characters, and
    use 5 bit for each character to encode a 30-bit number that can be
    sorted.
    
    This means Unicode file names won't be sorted beyond the first
    character. This is likely going to be fine as there are lots of ways to
    query buffers.
    
    related: #17403
    closes: #17928
    
    Signed-off-by: Yee Cheng Chin <ychin....@gmail.com>
    Signed-off-by: Christian Brabandt <c...@256bit.org>

diff --git a/runtime/menu.vim b/runtime/menu.vim
index c9acf76ca..edd628f4b 100644
--- a/runtime/menu.vim
+++ b/runtime/menu.vim
@@ -2,7 +2,7 @@
 " You can also use this as a start for your own set of menus.
 "
 " Maintainer:  The Vim Project <https://github.com/vim/vim>
-" Last Change: 2023 Aug 10
+" Last Change: 2025 Aug 10
 " Former Maintainer:   Bram Moolenaar <b...@vim.org>
 
 " Note that ":an" (short for ":anoremenu") is often used to make a menu work
@@ -797,8 +797,21 @@ def s:BMShow()
 enddef
 
 def s:BMHash(name: string): number
-  # Make name all upper case, so that chars are between 32 and 96
-  var nm = substitute(name, ".*", '\U
+  # Create a sortable numeric hash of the name. This number has to be within
+  # the bounds of a signed 32-bit integer as this is what Vim GUI uses
+  # internally for the index.
+
+  # Make name all upper case, so that alphanumeric chars are between 32 and 96
+  var nm = toupper(name)
+
+  if char2nr(nm[0]) < 32 || char2nr(nm[0]) > 96
+    # We don't have an ASCII character, so just return the raw character value
+    # for first character (clamped to 2^31) and set the high bit to make it
+    # sort after other items. This means only the first character will be
+    # sorted, unfortunately.
+    return or(and(char2nr(nm), 0x7fffffff), 0x40000000)
+  endif
+
   var sp: number
   if has("ebcdic")
     # HACK: Replace all non alphabetics with 'Z'
@@ -808,12 +821,18 @@ def s:BMHash(name: string): number
   else
     sp = char2nr(' ')
   endif
-  # convert first six chars into a number for sorting:
-  return (char2nr(nm[0]) - sp) * 0x800000 + (char2nr(nm[1]) - sp) * 0x20000 + 
(char2nr(nm[2]) - sp) * 0x1000 + (char2nr(nm[3]) - sp) * 0x80 + (char2nr(nm[4]) 
- sp) * 0x20 + (char2nr(nm[5]) - sp)
+  # convert first five chars into a number for sorting by compressing each
+  # char into 5 bits (0-63), to a total of 30 bits. If any character is not
+  # ASCII, it will simply be clamped to prevent overflow.
+  return (max([0, min([63, char2nr(nm[0]) - sp])]) << 24) +
+    (max([0, min([63, char2nr(nm[1]) - sp])]) << 18) +
+    (max([0, min([63, char2nr(nm[2]) - sp])]) << 12) +
+    (max([0, min([63, char2nr(nm[3]) - sp])]) <<  6) +
+    max([0, min([63, char2nr(nm[4]) - sp])])
 enddef
 
 def s:BMHash2(name: string): string
-  var nm = substitute(name, ".", '\L
+  var nm = tolower(name[0])
   if nm[0] < 'a' || nm[0] > 'z'
     return '&others.'
   elseif nm[0] <= 'd'
diff --git a/src/testdir/test_gui.vim b/src/testdir/test_gui.vim
index 16e8f9134..b08849271 100644
--- a/src/testdir/test_gui.vim
+++ b/src/testdir/test_gui.vim
@@ -1767,4 +1767,37 @@ func Test_CursorHold_not_triggered_at_startup()
   call assert_equal(['g:cursorhold_triggered=0'], found)
 endfunc
 
+" Test that Buffers menu generates the correct index for different buffer
+" names for sorting.
+func Test_Buffers_Menu()
+  doautocmd LoadBufferMenu VimEnter
+
+  " Non-ASCII characters only use the first character as idx
+  let idx_emoji = or(char2nr('馃槕'), 0x40000000)
+
+  " Only first five letters are used for alphanumeric:
+  " ('a'-32) << 24 + ('b'-32) << 18 + ('c'-32) << 12 + ('d'-32) << 6 + ('e'-32)
+  let idx_abcde = 0x218A3925
+  " ('a'-32) << 24 + ('b'-32) << 18 + ('c'-32) << 12 + ('d'-32) << 6 + ('f'-32)
+  let idx_abcdf = 0x218A3926
+  " ('a'-32) << 24 + 63 (clamped) << 18 + ('c'-32) << 12 + ('d'-32) << 6 + 
('e'-32)
+  let idx_a_emoji_cde = 0x21FE3925
+
+  let names = ['馃槕', '馃槕1', '馃槕2', 'abcde', 'abcdefghi', 'abcdf', 'a馃槕cde']
+  let indices = [idx_emoji, idx_emoji, idx_emoji, idx_abcde, idx_abcde, 
idx_abcdf, idx_a_emoji_cde]
+  for i in range(len(names))
+    let name = names[i]
+    let idx = indices[i]
+    exe ':badd ' .. name
+    let nr = bufnr('$')
+
+    let cmd = printf(':amenu Buffers.%s\ (%d)', name, nr)
+    let menu = split(execute(cmd), '
')[1]
+    call assert_inrange(0, 0x7FFFFFFF, idx)
+    call assert_match('^' .. idx .. ' '.. name, menu)
+  endfor
+
+  %bw!
+endfunc
+
 " vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index b28e3b2a4..78cf50877 100644
--- a/src/version.c
+++ b/src/version.c
@@ -719,6 +719,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1623,
 /**/
     1622,
 /**/

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to vim_dev+unsubscr...@googlegroups.com.
To view this discussion visit 
https://groups.google.com/d/msgid/vim_dev/E1ul1Cu-0064KF-G2%40256bit.org.

Raspunde prin e-mail lui