Patch 8.2.1665
Problem:    Cannot do fuzzy string matching.
Solution:   Add matchfuzzy(). (Yegappan Lakshmanan, closes #6932)
Files:      runtime/doc/eval.txt, runtime/doc/usr_41.txt, src/evalfunc.c,
            src/proto/search.pro, src/search.c, src/testdir/test_functions.vim


*** ../vim-8.2.1664/runtime/doc/eval.txt        2020-09-06 21:47:39.323041533 
+0200
--- runtime/doc/eval.txt        2020-09-11 22:20:52.847578582 +0200
***************
*** 2627,2632 ****
--- 2641,2647 ----
  matchdelete({id} [, {win}])   Number  delete match identified by {id}
  matchend({expr}, {pat} [, {start} [, {count}]])
                                Number  position where {pat} ends in {expr}
+ matchfuzzy({list}, {str})     List    fuzzy match {str} in {list}
  matchlist({expr}, {pat} [, {start} [, {count}]])
                                List    match and submatches of {pat} in {expr}
  matchstr({expr}, {pat} [, {start} [, {count}]])
***************
*** 7260,7265 ****
--- 7310,7338 ----
                Can also be used as a |method|: >
                        GetText()->matchend('word')
  
+ 
+ matchfuzzy({list}, {str})                     *matchfuzzy()*
+               Returns a list with all the strings in {list} that fuzzy
+               match {str}. The strings in the returned list are sorted
+               based on the matching score. {str} is treated as a literal
+               string and regular expression matching is NOT supported.
+               The maximum supported {str} length is 256.
+ 
+               If there are no matching strings or there is an error, then an
+               empty list is returned. If length of {str} is greater than
+               256, then returns an empty list.
+ 
+               Example: >
+                  :echo matchfuzzy(["clay", "crow"], "cay")
+ <             results in ["clay"]. >
+                  :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl")
+ <             results in a list of buffer names fuzzy matching "ndl". >
+                  :echo v:oldfiles->matchfuzzy("test")
+ <             results in a list of file names fuzzy matching "test". >
+                  :let l = readfile("buffer.c")->matchfuzzy("str")
+ <             results in a list of lines in "buffer.c" fuzzy matching "str".
+ 
+ 
  matchlist({expr}, {pat} [, {start} [, {count}]])              *matchlist()*
                Same as |match()|, but return a |List|.  The first item in the
                list is the matched string, same as what matchstr() would
*** ../vim-8.2.1664/runtime/doc/usr_41.txt      2020-09-04 16:35:06.421571299 
+0200
--- runtime/doc/usr_41.txt      2020-09-11 22:15:07.264666300 +0200
***************
*** 595,600 ****
--- 603,609 ----
        charclass()             class of a character
        match()                 position where a pattern matches in a string
        matchend()              position where a pattern match ends in a string
+       matchfuzzy()            fuzzy matches a string in a list of strings
        matchstr()              match of a pattern in a string
        matchstrpos()           match and positions of a pattern in a string
        matchlist()             like matchstr() and also return submatches
*** ../vim-8.2.1664/src/evalfunc.c      2020-09-06 21:47:39.323041533 +0200
--- src/evalfunc.c      2020-09-11 22:15:07.264666300 +0200
***************
*** 750,755 ****
--- 750,756 ----
      {"matcharg",      1, 1, FEARG_1,    ret_list_string, f_matcharg},
      {"matchdelete",   1, 2, FEARG_1,    ret_number,   f_matchdelete},
      {"matchend",      2, 4, FEARG_1,    ret_number,   f_matchend},
+     {"matchfuzzy",    2, 2, FEARG_1,    ret_list_string,      f_matchfuzzy},
      {"matchlist",     2, 4, FEARG_1,    ret_list_string, f_matchlist},
      {"matchstr",      2, 4, FEARG_1,    ret_string,   f_matchstr},
      {"matchstrpos",   2, 4, FEARG_1,    ret_list_any, f_matchstrpos},
*** ../vim-8.2.1664/src/proto/search.pro        2020-06-01 17:28:31.511939716 
+0200
--- src/proto/search.pro        2020-09-11 22:15:07.264666300 +0200
***************
*** 36,39 ****
--- 36,40 ----
  spat_T *get_spat(int idx);
  int get_spat_last_idx(void);
  void f_searchcount(typval_T *argvars, typval_T *rettv);
+ void f_matchfuzzy(typval_T *argvars, typval_T *rettv);
  /* vim: set ft=c : */
*** ../vim-8.2.1664/src/search.c        2020-09-05 23:15:56.409267096 +0200
--- src/search.c        2020-09-11 22:15:07.264666300 +0200
***************
*** 4165,4168 ****
--- 4165,4508 ----
  the_end:
      restore_last_search_pattern();
  }
+ 
+ /*
+  * Fuzzy string matching
+  *
+  * Ported from the lib_fts library authored by Forrest Smith.
+  * https://github.com/forrestthewoods/lib_fts/tree/master/code
+  *
+  * Blog describing the algorithm:
+  * 
https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/
+  *
+  * Each matching string is assigned a score. The following factors are 
checked:
+  *   Matched letter
+  *   Unmatched letter
+  *   Consecutively matched letters
+  *   Proximity to start
+  *   Letter following a separator (space, underscore)
+  *   Uppercase letter following lowercase (aka CamelCase)
+  *
+  * Matched letters are good. Unmatched letters are bad. Matching near the 
start
+  * is good. Matching the first letter in the middle of a phrase is good.
+  * Matching the uppercase letters in camel case entries is good.
+  *
+  * The score assigned for each factor is explained below.
+  * File paths are different from file names. File extensions may be ignorable.
+  * Single words care about consecutive matches but not separators or camel
+  * case.
+  *   Score starts at 0
+  *   Matched letter: +0 points
+  *   Unmatched letter: -1 point
+  *   Consecutive match bonus: +5 points
+  *   Separator bonus: +10 points
+  *   Camel case bonus: +10 points
+  *   Unmatched leading letter: -3 points (max: -9)
+  *
+  * There is some nuance to this. Scores don’t have an intrinsic meaning. The
+  * score range isn’t 0 to 100. It’s roughly [-50, 50]. Longer words have a
+  * lower minimum score due to unmatched letter penalty. Longer search patterns
+  * have a higher maximum score due to match bonuses.
+  *
+  * Separator and camel case bonus is worth a LOT. Consecutive matches are 
worth
+  * quite a bit.
+  *
+  * There is a penalty if you DON’T match the first three letters. Which
+  * effectively rewards matching near the start. However there’s no difference
+  * in matching between the middle and end.
+  *
+  * There is not an explicit bonus for an exact match. Unmatched letters 
receive
+  * a penalty. So shorter strings and closer matches are worth more.
+  */
+ typedef struct
+ {
+     listitem_T *item;
+     int               score;
+ } fuzzyItem_T;
+ 
+     static int
+ fuzzy_match_recursive(
+       char_u  *fuzpat,
+       char_u  *str,
+       int     *outScore,
+       char_u  *strBegin,
+       char_u  *srcMatches,
+       char_u  *matches,
+       int     maxMatches,
+       int     nextMatch,
+       int     *recursionCount,
+       int     recursionLimit)
+ {
+     // Recursion params
+     int               recursiveMatch = FALSE;
+     char_u    bestRecursiveMatches[256];
+     int               bestRecursiveScore = 0;
+     int               first_match;
+     int               matched;
+ 
+     // Count recursions
+     ++*recursionCount;
+     if (*recursionCount >= recursionLimit)
+       return FALSE;
+ 
+     // Detect end of strings
+     if (*fuzpat == '\0' || *str == '\0')
+       return FALSE;
+ 
+     // Loop through fuzpat and str looking for a match
+     first_match = TRUE;
+     while (*fuzpat != '\0' && *str != '\0')
+     {
+       // Found match
+       if (vim_tolower(*fuzpat) == vim_tolower(*str))
+       {
+           char_u      recursiveMatches[256];
+           int         recursiveScore = 0;
+ 
+           // Supplied matches buffer was too short
+           if (nextMatch >= maxMatches)
+               return FALSE;
+ 
+           // "Copy-on-Write" srcMatches into matches
+           if (first_match && srcMatches)
+           {
+               memcpy(matches, srcMatches, nextMatch);
+               first_match = FALSE;
+           }
+ 
+           // Recursive call that "skips" this match
+           if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore,
+                       strBegin, matches, recursiveMatches,
+                       sizeof(recursiveMatches), nextMatch, recursionCount,
+                       recursionLimit))
+           {
+               // Pick best recursive score
+               if (!recursiveMatch || recursiveScore > bestRecursiveScore)
+               {
+                   memcpy(bestRecursiveMatches, recursiveMatches, 256);
+                   bestRecursiveScore = recursiveScore;
+               }
+               recursiveMatch = TRUE;
+           }
+ 
+           // Advance
+           matches[nextMatch++] = (char_u)(str - strBegin);
+           ++fuzpat;
+       }
+       ++str;
+     }
+ 
+     // Determine if full fuzpat was matched
+     matched = *fuzpat == '\0' ? TRUE : FALSE;
+ 
+     // Calculate score
+     if (matched)
+     {
+       // bonus for adjacent matches
+       int     sequential_bonus = 15;
+       // bonus if match occurs after a separator
+       int     separator_bonus = 30;
+       // bonus if match is uppercase and prev is lower
+       int     camel_bonus = 30;
+       // bonus if the first letter is matched
+       int     first_letter_bonus = 15;
+       // penalty applied for every letter in str before the first match
+       int     leading_letter_penalty = -5;
+       // maximum penalty for leading letters
+       int     max_leading_letter_penalty = -15;
+       // penalty for every letter that doesn't matter
+       int     unmatched_letter_penalty = -1;
+       int     penalty;
+       int     unmatched;
+       int     i;
+ 
+       // Iterate str to end
+       while (*str != '\0')
+           ++str;
+ 
+       // Initialize score
+       *outScore = 100;
+ 
+       // Apply leading letter penalty
+       penalty = leading_letter_penalty * matches[0];
+       if (penalty < max_leading_letter_penalty)
+           penalty = max_leading_letter_penalty;
+       *outScore += penalty;
+ 
+       // Apply unmatched penalty
+       unmatched = (int)(str - strBegin) - nextMatch;
+       *outScore += unmatched_letter_penalty * unmatched;
+ 
+       // Apply ordering bonuses
+       for (i = 0; i < nextMatch; ++i)
+       {
+           char_u      currIdx = matches[i];
+ 
+           if (i > 0)
+           {
+               char_u  prevIdx = matches[i - 1];
+ 
+               // Sequential
+               if (currIdx == (prevIdx + 1))
+                   *outScore += sequential_bonus;
+           }
+ 
+           // Check for bonuses based on neighbor character value
+           if (currIdx > 0)
+           {
+               // Camel case
+               char_u  neighbor = strBegin[currIdx - 1];
+               char_u  curr = strBegin[currIdx];
+               int     neighborSeparator;
+ 
+               if (islower(neighbor) && isupper(curr))
+                   *outScore += camel_bonus;
+ 
+               // Separator
+               neighborSeparator = neighbor == '_' || neighbor == ' ';
+               if (neighborSeparator)
+                   *outScore += separator_bonus;
+           }
+           else
+           {
+               // First letter
+               *outScore += first_letter_bonus;
+           }
+       }
+     }
+ 
+     // Return best result
+     if (recursiveMatch && (!matched || bestRecursiveScore > *outScore))
+     {
+       // Recursive score is better than "this"
+       memcpy(matches, bestRecursiveMatches, maxMatches);
+       *outScore = bestRecursiveScore;
+       return TRUE;
+     }
+     else if (matched)
+       return TRUE;            // "this" score is better than recursive
+ 
+     return FALSE;             // no match
+ }
+ 
+ /*
+  * fuzzy_match()
+  *
+  * Performs exhaustive search via recursion to find all possible matches and
+  * match with highest score.
+  * Scores values have no intrinsic meaning.  Possible score range is not
+  * normalized and varies with pattern.
+  * Recursion is limited internally (default=10) to prevent degenerate cases
+  * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").
+  * Uses char_u for match indices. Therefore patterns are limited to 256
+  * characters.
+  *
+  * Returns TRUE if fuzpat is found AND calculates a score.
+  */
+     static int
+ fuzzy_match(char_u *str, char_u *fuzpat, int *outScore)
+ {
+     char_u    matches[256];
+     int               recursionCount = 0;
+     int               recursionLimit = 10;
+ 
+     *outScore = 0;
+ 
+     return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches,
+           sizeof(matches), 0, &recursionCount, recursionLimit);
+ }
+ 
+ /*
+  * Sort the fuzzy matches in the descending order of the match score.
+  */
+     static int
+ fuzzy_item_compare(const void *s1, const void *s2)
+ {
+     int               v1 = ((fuzzyItem_T *)s1)->score;
+     int               v2 = ((fuzzyItem_T *)s2)->score;
+ 
+     return v1 == v2 ? 0 : v1 > v2 ? -1 : 1;
+ }
+ 
+ /*
+  * Fuzzy search the string 'str' in 'strlist' and return the matching strings
+  * in 'fmatchlist'.
+  */
+     static void
+ match_fuzzy(list_T *strlist, char_u *str, list_T *fmatchlist)
+ {
+     long      len;
+     fuzzyItem_T       *ptrs;
+     listitem_T        *li;
+     long      i = 0;
+     int               found_match = FALSE;
+ 
+     len = list_len(strlist);
+     if (len == 0)
+       return;
+ 
+     ptrs = ALLOC_MULT(fuzzyItem_T, len);
+     if (ptrs == NULL)
+       return;
+ 
+     // For all the string items in strlist, get the fuzzy matching score
+     FOR_ALL_LIST_ITEMS(strlist, li)
+     {
+       int     score;
+ 
+       ptrs[i].item = li;
+       ptrs[i].score = -9999;
+       // ignore non-string items in the list
+       if (li->li_tv.v_type == VAR_STRING && li->li_tv.vval.v_string != NULL)
+           if (fuzzy_match(li->li_tv.vval.v_string, str, &score))
+           {
+               ptrs[i].score = score;
+               found_match = TRUE;
+           }
+       ++i;
+     }
+ 
+     if (found_match)
+     {
+       // Sort the list by the descending order of the match score
+       qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T),
+               fuzzy_item_compare);
+ 
+       // Copy the matching strings with 'score != -9999' to the return list
+       for (i = 0; i < len; i++)
+       {
+           if (ptrs[i].score == -9999)
+               break;
+           list_append_string(fmatchlist, ptrs[i].item->li_tv.vval.v_string,
+                   -1);
+       }
+     }
+ 
+     vim_free(ptrs);
+ }
+ 
+ /*
+  * "matchfuzzy()" function
+  */
+     void
+ f_matchfuzzy(typval_T *argvars, typval_T *rettv)
+ {
+     if (argvars[0].v_type != VAR_LIST)
+     {
+       emsg(_(e_listreq));
+       return;
+     }
+     if (argvars[0].vval.v_list == NULL)
+       return;
+     if (argvars[1].v_type != VAR_STRING
+           || argvars[1].vval.v_string == NULL)
+     {
+       semsg(_(e_invarg2), tv_get_string(&argvars[1]));
+       return;
+     }
+     if (rettv_list_alloc(rettv) == OK)
+       match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]),
+               rettv->vval.v_list);
+ }
+ 
  #endif
*** ../vim-8.2.1664/src/testdir/test_functions.vim      2020-09-04 
21:18:40.484161926 +0200
--- src/testdir/test_functions.vim      2020-09-11 22:15:07.264666300 +0200
***************
*** 2554,2557 ****
--- 2554,2581 ----
    call assert_fails('call browsedir("open", [])', 'E730:')
  endfunc
  
+ " Test for matchfuzzy()
+ func Test_matchfuzzy()
+   call assert_fails('call matchfuzzy(10, "abc")', 'E714:')
+   call assert_fails('call matchfuzzy(["abc"], [])', 'E730:')
+   call assert_equal([], matchfuzzy([], 'abc'))
+   call assert_equal([], matchfuzzy(['abc'], ''))
+   call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac'))
+   call assert_equal([], matchfuzzy([10, 20], 'ac'))
+   call assert_equal(['abc'], matchfuzzy(['abc'], 'abc'))
+   call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 
'cra'))
+   call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], 
matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa'))
+   call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one'))
+   call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 
'oneTwo'))
+   call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 
'oneTwo'))
+   call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 
matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa'))
+   call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257)))
+ 
+   %bw!
+   eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> 
bufadd(v) + bufload(v)})
+   let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl')
+   call assert_equal(1, len(l))
+   call assert_match('needle', l[0])
+ endfunc
+ 
  " vim: shiftwidth=2 sts=2 expandtab
*** ../vim-8.2.1664/src/version.c       2020-09-11 22:10:17.965597366 +0200
--- src/version.c       2020-09-11 22:23:52.163020448 +0200
***************
*** 752,753 ****
--- 752,755 ----
  {   /* Add new patch number below this line */
+ /**/
+     1665,
  /**/

-- 
SIGFUN -- signature too funny (core dumped)

 /// Bram Moolenaar -- [email protected] -- http://www.Moolenaar.net   \\\
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/vim_dev/202009112025.08BKPfIc965362%40masaka.moolenaar.net.

Raspunde prin e-mail lui