patch 9.1.1627: fuzzy matching can be improved Commit: https://github.com/vim/vim/commit/7e0df5eee9eab872261fd5eb0068cec967a2ba77 Author: Girish Palya <giris...@gmail.com> Date: Tue Aug 12 22:22:52 2025 +0200
patch 9.1.1627: fuzzy matching can be improved Problem: fuzzy-matching can be improved Solution: Implement a better fuzzy matching algorithm (Girish Palya) Replace fuzzy matching algorithm with improved fzy-based implementation The [current](https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/) fuzzy matching algorithm has several accuracy issues: * It struggles with CamelCase * It fails to prioritize matches at the beginning of strings, often ranking middle matches higher. After evaluating alternatives (see my comments [here](https://github.com/vim/vim/issues/17531#issuecomment-3112046897) and [here](https://github.com/vim/vim/issues/17531#issuecomment-3121593900)), I chose to adopt the [fzy](https://github.com/jhawthorn/fzy) algorithm, which: * Resolves the aforementioned issues. * Performs better. Implementation details This version is based on the original fzy [algorithm](https://github.com/jhawthorn/fzy/blob/master/src/match.c), with one key enhancement: **multibyte character support**. * The original implementation supports only ASCII. * This patch replaces ascii lookup tables with function calls, making it compatible with multibyte character sets. * Core logic (`match_row()` and `match_positions()`) remains faithful to the original, but now operates on codepoints rather than single-byte characters. Performance Tested against a dataset of **90,000 Linux kernel filenames**. Results (in milliseconds) show a **\~2x performance improvement** over the current fuzzy matching algorithm. ``` Search String Current Algo FZY Algo ------------------------------------------------- init 131.759 66.916 main 83.688 40.861 sig 98.348 39.699 index 109.222 30.738 ab 72.222 44.357 cd 83.036 54.739 a 58.94 62.242 b 43.612 43.442 c 64.39 67.442 k 40.585 36.371 z 34.708 22.781 w 38.033 30.109 cpa 82.596 38.116 arz 84.251 23.964 zzzz 35.823 22.75 dimag 110.686 29.646 xa 43.188 29.199 nha 73.953 31.001 nedax 94.775 29.568 dbue 79.846 25.902 fp 46.826 31.641 tr 90.951 55.883 kw 38.875 23.194 rp 101.575 55.775 kkkkkkkkkkkkkkkkkkkkkkkkkkkkk 48.519 30.921 ``` ```vim vim9script var haystack = readfile('/Users/gp/linux.files') var needles = ['init', 'main', 'sig', 'index', 'ab', 'cd', 'a', 'b', 'c', 'k', 'z', 'w', 'cpa', 'arz', 'zzzz', 'dimag', 'xa', 'nha', 'nedax', 'dbue', 'fp', 'tr', 'kw', 'rp', 'kkkkkkkkkkkkkkkkkkkkkkkkkkkkk'] for needle in needles var start = reltime() var tmp = matchfuzzy(haystack, needle) echom $'{needle}' (start->reltime()->reltimefloat() * 1000) endfor ``` Additional changes * Removed the "camelcase" option from both matchfuzzy() and matchfuzzypos(), as it's now obsolete with the improved algorithm. related: neovim/neovim#34101 fixes #17531 closes: #17900 Signed-off-by: Girish Palya <giris...@gmail.com> Signed-off-by: Christian Brabandt <c...@256bit.org> diff --git a/Filelist b/Filelist index b52506d75..413a88b65 100644 --- a/Filelist +++ b/Filelist @@ -82,6 +82,7 @@ SRC_ALL = \ src/findfile.c \ src/float.c \ src/fold.c \ + src/fuzzy.c \ src/getchar.c \ src/gc.c \ src/globals.h \ @@ -291,6 +292,7 @@ SRC_ALL = \ src/proto/findfile.pro \ src/proto/float.pro \ src/proto/fold.pro \ + src/proto/fuzzy.pro \ src/proto/getchar.pro \ src/proto/gc.pro \ src/proto/gui.pro \ diff --git a/runtime/doc/builtin.txt b/runtime/doc/builtin.txt index 9e2205af8..752d0515d 100644 --- a/runtime/doc/builtin.txt +++ b/runtime/doc/builtin.txt @@ -1,4 +1,4 @@ -*builtin.txt* For Vim version 9.1. Last change: 2025 Aug 10 +*builtin.txt* For Vim version 9.1. Last change: 2025 Aug 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -7421,9 +7421,6 @@ matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* given sequence. limit Maximum number of matches in {list} to be returned. Zero means no limit. - camelcase Use enhanced camel case scoring making results - better suited for completion related to - programming languages. Defaults to v:true. If {list} is a list of dictionaries, then the optional {dict} argument supports the following additional items: diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt index 8da707f1b..02668a047 100644 --- a/runtime/doc/pattern.txt +++ b/runtime/doc/pattern.txt @@ -1,4 +1,4 @@ -*pattern.txt* For Vim version 9.1. Last change: 2025 Aug 06 +*pattern.txt* For Vim version 9.1. Last change: 2025 Aug 12 VIM REFERENCE MANUAL by Bram Moolenaar @@ -1509,6 +1509,9 @@ characters in the search string. If the search string has multiple words, then each word is matched separately. So the words in the search string can be present in any order in a string. +Vim uses the same improved algorithm as the fzy project: +https://github.com/jhawthorn/fzy + Fuzzy matching assigns a score for each matched string based on the following criteria: - The number of sequentially matching characters. diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index fb278f0ab..281ae2d51 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -41723,6 +41723,8 @@ Functions: ~ - Add the optional {opts} |Dict| argument to |getchar()| to control: cursor behaviour, return type and whether or not to simplify the returned key - |chdir()| allows to optionally specify a scope argument +- |matchfuzzy()| and |matchfuzzypos()| use an improved fuzzy matching + algorithm (same as fzy). Others: ~ - the regex engines match correctly case-insensitive multi-byte characters diff --git a/src/Make_ami.mak b/src/Make_ami.mak index 7c9646735..839c6faa4 100644 --- a/src/Make_ami.mak +++ b/src/Make_ami.mak @@ -113,6 +113,7 @@ SRC += \ findfile.c \ float.c \ fold.c \ + fuzzy.c \ getchar.c \ gc.c \ hardcopy.c \ diff --git a/src/Make_cyg_ming.mak b/src/Make_cyg_ming.mak index 6ae1c0a05..3ffa8600d 100644 --- a/src/Make_cyg_ming.mak +++ b/src/Make_cyg_ming.mak @@ -823,6 +823,7 @@ OBJ = \ $(OUTDIR)/findfile.o \ $(OUTDIR)/float.o \ $(OUTDIR)/fold.o \ + $(OUTDIR)/fuzzy.o \ $(OUTDIR)/getchar.o \ $(OUTDIR)/gc.o \ $(OUTDIR)/gui_xim.o \ diff --git a/src/Make_mvc.mak b/src/Make_mvc.mak index f4899d566..383344d46 100644 --- a/src/Make_mvc.mak +++ b/src/Make_mvc.mak @@ -732,6 +732,7 @@ OBJ = \ $(OUTDIR)indfile.obj \ $(OUTDIR)loat.obj \ $(OUTDIR)old.obj \ + $(OUTDIR)uzzy.obj \ $(OUTDIR)\getchar.obj \ $(OUTDIR)\gc.obj \ $(OUTDIR)\gui_xim.obj \ @@ -1616,6 +1617,8 @@ $(OUTDIR)/float.obj: $(OUTDIR) float.c $(INCL) $(OUTDIR)/fold.obj: $(OUTDIR) fold.c $(INCL) +$(OUTDIR)/fuzzy.obj: $(OUTDIR) fuzzy.c $(INCL) + $(OUTDIR)/getchar.obj: $(OUTDIR) getchar.c $(INCL) $(OUTDIR)/gc.obj: $(OUTDIR) gc.c $(INCL) @@ -1961,6 +1964,7 @@ proto.h: \ proto/filepath.pro \ proto/findfile.pro \ proto/float.pro \ + proto/fuzzy.pro \ proto/getchar.pro \ proto/gc.pro \ proto/gui_xim.pro \ diff --git a/src/Make_vms.mms b/src/Make_vms.mms index 8eb2455a8..d452291af 100644 --- a/src/Make_vms.mms +++ b/src/Make_vms.mms @@ -529,6 +529,7 @@ SRC = \ findfile.c \ float.c \ fold.c \ + fuzzy.c \ getchar.c \ gc.c \ gui_xim.c \ @@ -665,6 +666,7 @@ OBJ = \ [.$(DEST)]findfile.obj \ [.$(DEST)]float.obj \ [.$(DEST)]fold.obj \ + [.$(DEST)]fuzzy.obj \ [.$(DEST)]getchar.obj \ [.$(DEST)]gc.obj \ [.$(DEST)]gui_xim.obj \ @@ -1141,6 +1143,10 @@ lua_env : [.$(DEST)]fold.obj : fold.c vim.h [.$(DEST)]config.h feature.h os_unix.h \ ascii.h keymap.h termdefs.h macros.h structs.h regexp.h gui.h beval.h \ [.proto]gui_beval.pro option.h ex_cmds.h proto.h errors.h globals.h +[.$(DEST)]fuzzy.obj : fuzzy.c vim.h [.$(DEST)]config.h feature.h os_unix.h \ + ascii.h keymap.h termdefs.h macros.h structs.h regexp.h \ + gui.h beval.h [.proto]gui_beval.pro option.h ex_cmds.h proto.h \ + errors.h globals.h [.$(DEST)]getchar.obj : getchar.c vim.h [.$(DEST)]config.h feature.h os_unix.h \ ascii.h keymap.h termdefs.h macros.h structs.h regexp.h \ gui.h beval.h [.proto]gui_beval.pro option.h ex_cmds.h proto.h \ diff --git a/src/Makefile b/src/Makefile index e3a860042..32c0d97d1 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1523,6 +1523,7 @@ BASIC_SRC = \ findfile.c \ float.c \ fold.c \ + fuzzy.c \ getchar.c \ gc.c \ gui_xim.c \ @@ -1701,6 +1702,7 @@ OBJ_COMMON = \ objects/findfile.o \ objects/float.o \ objects/fold.o \ + objects/fuzzy.o \ objects/getchar.o \ objects/gc.o \ objects/gui_xim.o \ @@ -1886,6 +1888,7 @@ PRO_AUTO = \ findfile.pro \ float.pro \ fold.pro \ + fuzzy.pro \ getchar.pro \ gc.pro \ gui_xim.pro \ @@ -3309,6 +3312,9 @@ objects/float.o: float.c objects/fold.o: fold.c $(CCC) -o $@ fold.c +objects/fuzzy.o: fuzzy.c + $(CCC) -o $@ fuzzy.c + objects/getchar.o: getchar.c $(CCC) -o $@ getchar.c @@ -3988,6 +3994,11 @@ objects/fold.o: fold.c vim.h protodef.h auto/config.h feature.h os_unix.h \ proto/gui_beval.pro structs.h regexp.h gui.h libvterm/include/vterm.h \ libvterm/include/vterm_keycodes.h alloc.h ex_cmds.h spell.h proto.h \ globals.h errors.h +objects/fuzzy.o: fuzzy.c vim.h protodef.h auto/config.h feature.h os_unix.h \ + auto/osdef.h ascii.h keymap.h termdefs.h macros.h option.h beval.h \ + proto/gui_beval.pro structs.h regexp.h gui.h libvterm/include/vterm.h \ + libvterm/include/vterm_keycodes.h alloc.h ex_cmds.h spell.h proto.h \ + globals.h errors.h objects/getchar.o: getchar.c vim.h protodef.h auto/config.h feature.h os_unix.h \ auto/osdef.h ascii.h keymap.h termdefs.h macros.h option.h beval.h \ proto/gui_beval.pro structs.h regexp.h gui.h libvterm/include/vterm.h \ diff --git a/src/README.md b/src/README.md index a01f695d9..6c6719934 100644 --- a/src/README.md +++ b/src/README.md @@ -48,6 +48,7 @@ fileio.c | reading and writing files filepath.c | dealing with file names and paths findfile.c | search for files in 'path' fold.c | folding +fuzzy.c | fuzzy matching getchar.c | getting characters and key mapping gc.c | garbage collection help.c | vim help related functions diff --git a/src/buffer.c b/src/buffer.c index 5c8cd259b..1ca561fce 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -2945,12 +2945,14 @@ ExpandBufnames( { p = NULL; // first try matching with the short file name - if ((score = fuzzy_match_str(buf->b_sfname, pat)) != 0) + if ((score = fuzzy_match_str(buf->b_sfname, pat)) + != FUZZY_SCORE_NONE) p = buf->b_sfname; if (p == NULL) { // next try matching with the full path file name - if ((score = fuzzy_match_str(buf->b_ffname, pat)) != 0) + if ((score = fuzzy_match_str(buf->b_ffname, pat)) + != FUZZY_SCORE_NONE) p = buf->b_ffname; } } diff --git a/src/cmdexpand.c b/src/cmdexpand.c index 4ef187911..dc65ae8ee 100644 --- a/src/cmdexpand.c +++ b/src/cmdexpand.c @@ -3612,7 +3612,7 @@ ExpandGenericExt( else { score = fuzzy_match_str(str, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } } else @@ -4022,7 +4022,7 @@ ExpandUserDefined( else { score = fuzzy_match_str(s, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } } else diff --git a/src/fuzzy.c b/src/fuzzy.c new file mode 100644 index 000000000..6ebfa5b21 --- /dev/null +++ b/src/fuzzy.c @@ -0,0 +1,1141 @@ +/* vi:set ts=8 sts=4 sw=4 noet: + * + * VIM - Vi IMproved by Bram Moolenaar + * + * Do ":help uganda" in Vim to read copying and usage conditions. + * Do ":help credits" in Vim to see a list of people who contributed. + * See README.txt for an overview of the Vim source code. + * + * fuzzy.c: fuzzy matching algorithm and related functions + * + * Portions of this file are adapted from fzy (https://github.com/jhawthorn/fzy) + * Original code: + * Copyright (c) 2014 John Hawthorn + * Licensed under the MIT License. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "vim.h" + +#if defined(FEAT_EVAL) || defined(FEAT_PROTO) +static int fuzzy_match_item_compare(const void *s1, const void *s2); +static void fuzzy_match_in_list(list_T *l, char_u *str, int matchseq, char_u *key, callback_T *item_cb, int retmatchpos, list_T *fmatchlist, long max_matches); +static void do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos); +#endif +static int fuzzy_match_str_compare(const void *s1, const void *s2); +static void fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz); +static int fuzzy_match_func_compare(const void *s1, const void *s2); +static void fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz); + +typedef double score_t; +static score_t match_positions(char_u *needle, char_u *haystack, int_u *positions); +static int has_match(char_u *needle, char_u *haystack); + +#define SCORE_MAX INFINITY +#define SCORE_MIN -INFINITY +#define SCORE_SCALE 1000 + +typedef struct +{ + int idx; // used for stable sort + listitem_T *item; + int score; + list_T *lmatchpos; + char_u *pat; + char_u *itemstr; + int itemstr_allocated; + int startpos; +} fuzzyItem_T; + +/* + * fuzzy_match() + * + * Returns TRUE if "pat_arg" matches "str". Also returns the match score in + * "outScore" and the matching character positions in "matches". + */ + int +fuzzy_match( + char_u *str, + char_u *pat_arg, + int matchseq, + int *outScore, + int_u *matches, + int maxMatches) +{ + char_u *save_pat; + char_u *pat; + char_u *p; + int complete = FALSE; + int score = 0; + int numMatches = 0; + score_t fzy_score; + + *outScore = 0; + + save_pat = vim_strsave(pat_arg); + if (save_pat == NULL) + return FALSE; + pat = save_pat; + p = pat; + + // Try matching each word in 'pat_arg' in 'str' + while (TRUE) + { + if (matchseq) + complete = TRUE; + else + { + // Extract one word from the pattern (separated by space) + p = skipwhite(p); + if (*p == NUL) + break; + pat = p; + while (*p != NUL && !VIM_ISWHITE(PTR2CHAR(p))) + { + if (has_mbyte) + MB_PTR_ADV(p); + else + ++p; + } + if (*p == NUL) // processed all the words + complete = TRUE; + *p = NUL; + } + + score = FUZZY_SCORE_NONE; + if (has_match(pat, str)) + { + fzy_score = match_positions(pat, str, matches + numMatches); + score = (fzy_score == SCORE_MIN) ? INT_MIN + 1 + : (fzy_score == SCORE_MAX) ? INT_MAX + : (fzy_score < 0) ? (int)ceil(fzy_score * SCORE_SCALE - 0.5) + : (int)floor(fzy_score * SCORE_SCALE + 0.5); + } + + if (score == FUZZY_SCORE_NONE) + { + numMatches = 0; + *outScore = FUZZY_SCORE_NONE; + break; + } + + if (score > 0 && *outScore > INT_MAX - score) + *outScore = INT_MAX; + else if (score < 0 && *outScore < INT_MIN + 1 - score) + *outScore = INT_MIN + 1; + else + *outScore += score; + + numMatches += MB_CHARLEN(pat); + + if (complete || numMatches >= maxMatches) + break; + + // try matching the next word + ++p; + } + + vim_free(save_pat); + return numMatches != 0; +} + +#if defined(FEAT_EVAL) || defined(FEAT_PROTO) +/* + * Sort the fuzzy matches in the descending order of the match score. + * For items with same score, retain the order using the index (stable sort) + */ + static int +fuzzy_match_item_compare(const void *s1, const void *s2) +{ + int v1 = ((fuzzyItem_T *)s1)->score; + int v2 = ((fuzzyItem_T *)s2)->score; + + if (v1 == v2) + { + int exact_match1 = FALSE, exact_match2 = FALSE; + char_u *pat = ((fuzzyItem_T *)s1)->pat; + int patlen = (int)STRLEN(pat); + int startpos = ((fuzzyItem_T *)s1)->startpos; + exact_match1 = (startpos >= 0) && STRNCMP(pat, + ((fuzzyItem_T *)s1)->itemstr + startpos, patlen) == 0; + startpos = ((fuzzyItem_T *)s2)->startpos; + exact_match2 = (startpos >= 0) && STRNCMP(pat, + ((fuzzyItem_T *)s2)->itemstr + startpos, patlen) == 0; + + if (exact_match1 == exact_match2) + { + int idx1 = ((fuzzyItem_T *)s1)->idx; + int idx2 = ((fuzzyItem_T *)s2)->idx; + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + } + else if (exact_match2) + return 1; + return -1; + } + else + return v1 > v2 ? -1 : 1; +} + +/* + * Fuzzy search the string 'str' in a list of 'items' and return the matching + * strings in 'fmatchlist'. + * If 'matchseq' is TRUE, then for multi-word search strings, match all the + * words in sequence. + * If 'items' is a list of strings, then search for 'str' in the list. + * If 'items' is a list of dicts, then either use 'key' to lookup the string + * for each item or use 'item_cb' Funcref function to get the string. + * If 'retmatchpos' is TRUE, then return a list of positions where 'str' + * matches for each item. + */ + static void +fuzzy_match_in_list( + list_T *l, + char_u *str, + int matchseq, + char_u *key, + callback_T *item_cb, + int retmatchpos, + list_T *fmatchlist, + long max_matches) +{ + long len; + fuzzyItem_T *items; + listitem_T *li; + int i = 0; + long match_count = 0; + int_u matches[FUZZY_MATCH_MAX_LEN]; + + len = list_len(l); + if (len == 0) + return; + if (max_matches > 0 && len > max_matches) + len = max_matches; + + items = ALLOC_CLEAR_MULT(fuzzyItem_T, len); + if (items == NULL) + return; + + // For all the string items in items, get the fuzzy matching score + FOR_ALL_LIST_ITEMS(l, li) + { + int score; + char_u *itemstr; + typval_T rettv; + int itemstr_allocate = FALSE; + + if (max_matches > 0 && match_count >= max_matches) + break; + + itemstr = NULL; + rettv.v_type = VAR_UNKNOWN; + if (li->li_tv.v_type == VAR_STRING) // list of strings + itemstr = li->li_tv.vval.v_string; + else if (li->li_tv.v_type == VAR_DICT + && (key != NULL || item_cb->cb_name != NULL)) + { + // For a dict, either use the specified key to lookup the string or + // use the specified callback function to get the string. + if (key != NULL) + itemstr = dict_get_string(li->li_tv.vval.v_dict, + (char *)key, FALSE); + else + { + typval_T argv[2]; + + // Invoke the supplied callback (if any) to get the dict item + li->li_tv.vval.v_dict->dv_refcount++; + argv[0].v_type = VAR_DICT; + argv[0].vval.v_dict = li->li_tv.vval.v_dict; + argv[1].v_type = VAR_UNKNOWN; + if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) + { + if (rettv.v_type == VAR_STRING) + { + itemstr = rettv.vval.v_string; + itemstr_allocate = TRUE; + } + } + dict_unref(li->li_tv.vval.v_dict); + } + } + + if (itemstr != NULL + && fuzzy_match(itemstr, str, matchseq, &score, matches, + FUZZY_MATCH_MAX_LEN)) + { + items[match_count].idx = match_count; + items[match_count].item = li; + items[match_count].score = score; + items[match_count].pat = str; + items[match_count].startpos = matches[0]; + if (itemstr_allocate) + items[match_count].itemstr = vim_strsave(itemstr); + else + items[match_count].itemstr = itemstr; + items[match_count].itemstr_allocated = itemstr_allocate; + + // Copy the list of matching positions in itemstr to a list + { + int j = 0; + char_u *p; + + items[match_count].lmatchpos = list_alloc(); + if (items[match_count].lmatchpos == NULL) + goto done; + + p = str; + while (*p != NUL && j < FUZZY_MATCH_MAX_LEN) + { + if (!VIM_ISWHITE(PTR2CHAR(p)) || matchseq) + { + if (list_append_number(items[match_count].lmatchpos, + matches[j]) == FAIL) + goto done; + j++; + } + if (has_mbyte) + MB_PTR_ADV(p); + else + ++p; + } + } + ++match_count; + } + clear_tv(&rettv); + } + + if (match_count > 0) + { + list_T *retlist; + + // Sort the list by the descending order of the match score + qsort((void *)items, (size_t)match_count, sizeof(fuzzyItem_T), + fuzzy_match_item_compare); + + // For matchfuzzy(), return a list of matched strings. + // ['str1', 'str2', 'str3'] + // For matchfuzzypos(), return a list with three items. + // The first item is a list of matched strings. The second item + // is a list of lists where each list item is a list of matched + // character positions. The third item is a list of matching scores. + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] + if (retmatchpos) + { + li = list_find(fmatchlist, 0); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + retlist = li->li_tv.vval.v_list; + } + else + retlist = fmatchlist; + + // Copy the matching strings to the return list + for (i = 0; i < match_count; i++) + list_append_tv(retlist, &items[i].item->li_tv); + + // next copy the list of matching positions + if (retmatchpos) + { + li = list_find(fmatchlist, -2); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + retlist = li->li_tv.vval.v_list; + + for (i = 0; i < match_count; i++) + if (items[i].lmatchpos != NULL + && list_append_list(retlist, items[i].lmatchpos) == FAIL) + goto done; + + // copy the matching scores + li = list_find(fmatchlist, -1); + if (li == NULL || li->li_tv.vval.v_list == NULL) + goto done; + retlist = li->li_tv.vval.v_list; + for (i = 0; i < match_count; i++) + if (list_append_number(retlist, items[i].score) == FAIL) + goto done; + } + } + +done: + for (i = 0; i < match_count; i++) + if (items[i].itemstr_allocated) + vim_free(items[i].itemstr); + vim_free(items); +} + +/* + * Do fuzzy matching. Returns the list of matched strings in 'rettv'. + * If 'retmatchpos' is TRUE, also returns the matching character positions. + */ + static void +do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) +{ + callback_T cb; + char_u *key = NULL; + int ret; + int matchseq = FALSE; + long max_matches = 0; + + if (in_vim9script() + && (check_for_list_arg(argvars, 0) == FAIL + || check_for_string_arg(argvars, 1) == FAIL + || check_for_opt_dict_arg(argvars, 2) == FAIL)) + return; + + CLEAR_POINTER(&cb); + + // validate and get the arguments + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) + { + semsg(_(e_argument_of_str_must_be_list), + retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); + return; + } + if (argvars[1].v_type != VAR_STRING + || argvars[1].vval.v_string == NULL) + { + semsg(_(e_invalid_argument_str), tv_get_string(&argvars[1])); + return; + } + + if (argvars[2].v_type != VAR_UNKNOWN) + { + dict_T *d; + dictitem_T *di; + + if (check_for_nonnull_dict_arg(argvars, 2) == FAIL) + return; + + // To search a dict, either a callback function or a key can be + // specified. + d = argvars[2].vval.v_dict; + if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) + { + if (di->di_tv.v_type != VAR_STRING + || di->di_tv.vval.v_string == NULL + || *di->di_tv.vval.v_string == NUL) + { + semsg(_(e_invalid_value_for_argument_str_str), "key", + tv_get_string(&di->di_tv)); + return; + } + key = tv_get_string(&di->di_tv); + } + else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) + { + cb = get_callback(&di->di_tv); + if (cb.cb_name == NULL) + { + semsg(_(e_invalid_value_for_argument_str), "text_cb"); + return; + } + } + + if ((di = dict_find(d, (char_u *)"limit", -1)) != NULL) + { + if (di->di_tv.v_type != VAR_NUMBER) + { + semsg(_(e_invalid_value_for_argument_str), "limit"); + return; + } + max_matches = (long)tv_get_number_chk(&di->di_tv, NULL); + } + + if (dict_has_key(d, "matchseq")) + matchseq = TRUE; + } + + // get the fuzzy matches + ret = rettv_list_alloc(rettv); + if (ret == FAIL) + goto done; + if (retmatchpos) + { + list_T *l; + + // For matchfuzzypos(), a list with three items are returned. First + // item is a list of matching strings, the second item is a list of + // lists with matching positions within each string and the third item + // is the list of scores of the matches. + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + { + vim_free(l); + goto done; + } + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + { + vim_free(l); + goto done; + } + l = list_alloc(); + if (l == NULL) + goto done; + if (list_append_list(rettv->vval.v_list, l) == FAIL) + { + vim_free(l); + goto done; + } + } + + fuzzy_match_in_list(argvars[0].vval.v_list, tv_get_string(&argvars[1]), + matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches); + +done: + free_callback(&cb); +} + +/* + * "matchfuzzy()" function + */ + void +f_matchfuzzy(typval_T *argvars, typval_T *rettv) +{ + do_fuzzymatch(argvars, rettv, FALSE); +} + +/* + * "matchfuzzypos()" function + */ + void +f_matchfuzzypos(typval_T *argvars, typval_T *rettv) +{ + do_fuzzymatch(argvars, rettv, TRUE); +} +#endif + +/* + * Same as fuzzy_match_item_compare() except for use with a string match + */ + static int +fuzzy_match_str_compare(const void *s1, const void *s2) +{ + int v1 = ((fuzmatch_str_T *)s1)->score; + int v2 = ((fuzmatch_str_T *)s2)->score; + int idx1 = ((fuzmatch_str_T *)s1)->idx; + int idx2 = ((fuzmatch_str_T *)s2)->idx; + + if (v1 == v2) + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + else + return v1 > v2 ? -1 : 1; +} + +/* + * Sort fuzzy matches by score + */ + static void +fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz) +{ + // Sort the list by the descending order of the match score + qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), + fuzzy_match_str_compare); +} + +/* + * Same as fuzzy_match_item_compare() except for use with a function name + * string match. <SNR> functions should be sorted to the end. + */ + static int +fuzzy_match_func_compare(const void *s1, const void *s2) +{ + int v1 = ((fuzmatch_str_T *)s1)->score; + int v2 = ((fuzmatch_str_T *)s2)->score; + int idx1 = ((fuzmatch_str_T *)s1)->idx; + int idx2 = ((fuzmatch_str_T *)s2)->idx; + char_u *str1 = ((fuzmatch_str_T *)s1)->str; + char_u *str2 = ((fuzmatch_str_T *)s2)->str; + + if (*str1 != '<' && *str2 == '<') + return -1; + if (*str1 == '<' && *str2 != '<') + return 1; + if (v1 == v2) + return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; + else + return v1 > v2 ? -1 : 1; +} + +/* + * Sort fuzzy matches of function names by score. + * <SNR> functions should be sorted to the end. + */ + static void +fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz) +{ + // Sort the list by the descending order of the match score + qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), + fuzzy_match_func_compare); +} + +/* + * Fuzzy match 'pat' in 'str'. Returns 0 if there is no match. Otherwise, + * returns the match score. + */ + int +fuzzy_match_str(char_u *str, char_u *pat) +{ + int score = FUZZY_SCORE_NONE; + int_u matchpos[FUZZY_MATCH_MAX_LEN]; + + if (str == NULL || pat == NULL) + return score; + + fuzzy_match(str, pat, TRUE, &score, matchpos, + sizeof(matchpos) / sizeof(matchpos[0])); + + return score; +} + +/* + * Fuzzy match the position of string 'pat' in string 'str'. + * Returns a dynamic array of matching positions. If there is no match, + * returns NULL. + */ + garray_T * +fuzzy_match_str_with_pos(char_u *str UNUSED, char_u *pat UNUSED) +{ +#ifdef FEAT_SEARCH_EXTRA + int score = FUZZY_SCORE_NONE; + garray_T *match_positions = NULL; + int_u matches[FUZZY_MATCH_MAX_LEN]; + int j = 0; + + if (str == NULL || pat == NULL) + return NULL; + + match_positions = ALLOC_ONE(garray_T); + if (match_positions == NULL) + return NULL; + ga_init2(match_positions, sizeof(int_u), 10); + + if (!fuzzy_match(str, pat, FALSE, &score, matches, FUZZY_MATCH_MAX_LEN) + || score == FUZZY_SCORE_NONE) + { + ga_clear(match_positions); + vim_free(match_positions); + return NULL; + } + + for (char_u *p = pat; *p != NUL; MB_PTR_ADV(p)) + { + if (!VIM_ISWHITE(PTR2CHAR(p))) + { + ga_grow(match_positions, 1); + ((int_u *)match_positions->ga_data)[match_positions->ga_len] = + matches[j]; + match_positions->ga_len++; + j++; + } + } + + return match_positions; +#else + return NULL; +#endif +} + +/* + * This function splits the line pointed to by `*ptr` into words and performs + * a fuzzy match for the pattern `pat` on each word. It iterates through the + * line, moving `*ptr` to the start of each word during the process. + * + * If a match is found: + * - `*ptr` points to the start of the matched word. + * - `*len` is set to the length of the matched word. + * - `*score` contains the match score. + * + * If no match is found, `*ptr` is updated to the end of the line. + */ + int +fuzzy_match_str_in_line( + char_u **ptr, + char_u *pat, + int *len, + pos_T *current_pos, + int *score) +{ + char_u *str = *ptr; + char_u *strBegin = str; + char_u *end = NULL; + char_u *start = NULL; + int found = FALSE; + char save_end; + char_u *line_end = NULL; + + if (str == NULL || pat == NULL) + return found; + line_end = find_line_end(str); + + while (str < line_end) + { + // Skip non-word characters + start = find_word_start(str); + if (*start == NUL) + break; + end = find_word_end(start); + + // Extract the word from start to end + save_end = *end; + *end = NUL; + + // Perform fuzzy match + *score = fuzzy_match_str(start, pat); + *end = save_end; + + if (*score != FUZZY_SCORE_NONE) + { + *len = (int)(end - start); + found = TRUE; + *ptr = start; + if (current_pos) + current_pos->col += (int)(end - strBegin); + break; + } + + // Move to the end of the current word for the next iteration + str = end; + // Ensure we continue searching after the current word + while (*str != NUL && !vim_iswordp(str)) + MB_PTR_ADV(str); + } + + if (!found) + *ptr = line_end; + + return found; +} + +/* + * Search for the next fuzzy match in the specified buffer. + * This function attempts to find the next occurrence of the given pattern + * in the buffer, starting from the current position. It handles line wrapping + * and direction of search. + * + * Return TRUE if a match is found, otherwise FALSE. + */ + int +search_for_fuzzy_match( + buf_T *buf, + pos_T *pos, + char_u *pattern, + int dir, + pos_T *start_pos, + int *len, + char_u **ptr, + int *score) +{ + pos_T current_pos = *pos; + pos_T circly_end; + int found_new_match = FALSE; + int looped_around = FALSE; + int whole_line = ctrl_x_mode_whole_line(); + + if (buf == curbuf) + circly_end = *start_pos; + else + { + circly_end.lnum = buf->b_ml.ml_line_count; + circly_end.col = 0; + circly_end.coladd = 0; + } + + if (whole_line && start_pos->lnum != pos->lnum) + current_pos.lnum += dir; + + do + { + + // Check if looped around and back to start position + if (looped_around && EQUAL_POS(current_pos, circly_end)) + break; + + // Ensure current_pos is valid + if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) + { + // Get the current line buffer + *ptr = ml_get_buf(buf, current_pos.lnum, FALSE); + if (!whole_line) + *ptr += current_pos.col; + + // If ptr is end of line is reached, move to next line + // or previous line based on direction + if (*ptr != NULL && **ptr != NUL) + { + if (!whole_line) + { + // Try to find a fuzzy match in the current line starting + // from current position + found_new_match = fuzzy_match_str_in_line(ptr, pattern, + len, ¤t_pos, score); + if (found_new_match) + { + *pos = current_pos; + break; + } + else if (looped_around && current_pos.lnum == circly_end.lnum) + break; + } + else + { + if (fuzzy_match_str(*ptr, pattern) != FUZZY_SCORE_NONE) + { + found_new_match = TRUE; + *pos = current_pos; + *len = (int)ml_get_buf_len(buf, current_pos.lnum); + break; + } + } + } + } + + // Move to the next line or previous line based on direction + if (dir == FORWARD) + { + if (++current_pos.lnum > buf->b_ml.ml_line_count) + { + if (p_ws) + { + current_pos.lnum = 1; + looped_around = TRUE; + } + else + break; + } + } + else + { + if (--current_pos.lnum < 1) + { + if (p_ws) + { + current_pos.lnum = buf->b_ml.ml_line_count; + looped_around = TRUE; + } + else + break; + + } + } + current_pos.col = 0; + } while (TRUE); + + return found_new_match; +} + +/* + * Free an array of fuzzy string matches "fuzmatch[count]". + */ + void +fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count) +{ + int i; + + if (fuzmatch == NULL) + return; + for (i = 0; i < count; ++i) + vim_free(fuzmatch[i].str); + vim_free(fuzmatch); +} + +/* + * Copy a list of fuzzy matches into a string list after sorting the matches by + * the fuzzy score. Frees the memory allocated for 'fuzmatch'. + * Returns OK on success and FAIL on memory allocation failure. + */ + int +fuzzymatches_to_strmatches( + fuzmatch_str_T *fuzmatch, + char_u ***matches, + int count, + int funcsort) +{ + int i; + + if (count <= 0) + return OK; + + *matches = ALLOC_MULT(char_u *, count); + if (*matches == NULL) + { + fuzmatch_str_free(fuzmatch, count); + return FAIL; + } + + // Sort the list by the descending order of the match score + if (funcsort) + fuzzy_match_func_sort((void *)fuzmatch, (size_t)count); + else + fuzzy_match_str_sort((void *)fuzmatch, (size_t)count); + + for (i = 0; i < count; i++) + (*matches)[i] = fuzmatch[i].str; + vim_free(fuzmatch); + + return OK; +} + +/* + * Fuzzy match algorithm ported from https://github.com/jhawthorn/fzy. + * This implementation extends the original by supporting multibyte characters. + */ + +#define MATCH_MAX_LEN FUZZY_MATCH_MAX_LEN + +#define SCORE_GAP_LEADING -0.005 +#define SCORE_GAP_TRAILING -0.005 +#define SCORE_GAP_INNER -0.01 +#define SCORE_MATCH_CONSECUTIVE 1.0 +#define SCORE_MATCH_SLASH 0.9 +#define SCORE_MATCH_WORD 0.8 +#define SCORE_MATCH_CAPITAL 0.7 +#define SCORE_MATCH_DOT 0.6 + + static int +has_match(char_u *needle, char_u *haystack) +{ + while (*needle != NUL) + { + int n_char = mb_ptr2char(needle); + char_u *p = haystack; + int h_char; + int matched = FALSE; + + while (*p != NUL) + { + h_char = mb_ptr2char(p); + + if (n_char == h_char + || MB_TOUPPER(n_char) == h_char) + { + matched = TRUE; + break; + } + p += mb_ptr2len(p); + } + + if (!matched) + return 0; + + needle += mb_ptr2len(needle); + haystack = p + mb_ptr2len(p); + } + return 1; +} + +typedef struct match_struct +{ + int needle_len; + int haystack_len; + int lower_needle[MATCH_MAX_LEN]; // stores codepoints + int lower_haystack[MATCH_MAX_LEN]; // stores codepoints + score_t match_bonus[MATCH_MAX_LEN]; +} match_struct; + +#define IS_WORD_SEP(c) ((c) == '-' || (c) == '_' || (c) == ' ') +#define IS_PATH_SEP(c) ((c) == '/') +#define IS_DOT(c) ((c) == '.') + + static score_t +compute_bonus_codepoint(int last_c, int c) +{ + if (ASCII_ISALNUM(c) || vim_iswordc(c)) + { + if (IS_PATH_SEP(last_c)) + return SCORE_MATCH_SLASH; + if (IS_WORD_SEP(last_c)) + return SCORE_MATCH_WORD; + if (IS_DOT(last_c)) + return SCORE_MATCH_DOT; + if (MB_ISUPPER(c) && MB_ISLOWER(last_c)) + return SCORE_MATCH_CAPITAL; + } + return 0; +} + + static void +setup_match_struct(match_struct *match, char_u *needle, + char_u *haystack) +{ + int i = 0; + char_u *p = needle; + while (*p != NUL && i < MATCH_MAX_LEN) + { + int c = mb_ptr2char(p); + match->lower_needle[i++] = MB_TOLOWER(c); + MB_PTR_ADV(p); + } + match->needle_len = i; + + i = 0; + p = haystack; + int prev_c = '/'; + while (*p != NUL && i < MATCH_MAX_LEN) + { + int c = mb_ptr2char(p); + match->lower_haystack[i] = MB_TOLOWER(c); + match->match_bonus[i] = compute_bonus_codepoint(prev_c, c); + prev_c = c; + MB_PTR_ADV(p); + i++; + } + match->haystack_len = i; +} + + static inline void +match_row(const match_struct *match, int row, score_t *curr_D, + score_t *curr_M, const score_t *last_D, const score_t *last_M) +{ + int n = match->needle_len; + int m = match->haystack_len; + int i = row; + + const int *lower_needle = match->lower_needle; + const int *lower_haystack = match->lower_haystack; + const score_t *match_bonus = match->match_bonus; + + score_t prev_score = SCORE_MIN; + score_t gap_score = i == n - 1 ? SCORE_GAP_TRAILING : SCORE_GAP_INNER; + + /* These will not be used with this value, but not all compilers see it */ + score_t prev_M = SCORE_MIN, prev_D = SCORE_MIN; + + for (int j = 0; j < m; j++) + { + if (lower_needle[i] == lower_haystack[j]) + { + score_t score = SCORE_MIN; + if (!i) + { + score = (j * SCORE_GAP_LEADING) + match_bonus[j]; + } + else if (j) + { /* i > 0 && j > 0*/ + score = MAX( + prev_M + match_bonus[j], + /* consecutive match, doesn't stack with match_bonus */ + prev_D + SCORE_MATCH_CONSECUTIVE); + } + prev_D = last_D[j]; + prev_M = last_M[j]; + curr_D[j] = score; + curr_M[j] = prev_score = MAX(score, prev_score + gap_score); + } + else + { + prev_D = last_D[j]; + prev_M = last_M[j]; + curr_D[j] = SCORE_MIN; + curr_M[j] = prev_score = prev_score + gap_score; + } + } +} + + static score_t +match_positions(char_u *needle, char_u *haystack, int_u *positions) +{ + if (!*needle) + return SCORE_MIN; + + match_struct match; + setup_match_struct(&match, needle, haystack); + + int n = match.needle_len; + int m = match.haystack_len; + + if (m > MATCH_MAX_LEN || n > m) + { + /* + * Unreasonably large candidate: return no score + * If it is a valid match it will still be returned, it will + * just be ranked below any reasonably sized candidates + */ + return SCORE_MIN; + } + else if (n == m) + { + /* Since this method can only be called with a haystack which + * matches needle. If the lengths of the strings are equal the + * strings themselves must also be equal (ignoring case). + */ + if (positions) + for (int i = 0; i < n; i++) + positions[i] = i; + return SCORE_MAX; + } + + /* + * D[][] Stores the best score for this position ending with a match. + * M[][] Stores the best possible score at this position. + */ + score_t (*D)[MATCH_MAX_LEN], (*M)[MATCH_MAX_LEN]; + M = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + D = malloc(sizeof(score_t) * MATCH_MAX_LEN * n); + + match_row(&match, 0, D[0], M[0], D[0], M[0]); + for (int i = 1; i < n; i++) + match_row(&match, i, D[i], M[i], D[i - 1], M[i - 1]); + + /* backtrace to find the positions of optimal matching */ + if (positions) + { + int match_required = 0; + for (int i = n - 1, j = m - 1; i >= 0; i--) + { + for (; j >= 0; j--) + { + /* + * There may be multiple paths which result in + * the optimal weight. + * + * For simplicity, we will pick the first one + * we encounter, the latest in the candidate + * string. + */ + if (D[i][j] != SCORE_MIN && + (match_required || D[i][j] == M[i][j])) + { + /* If this score was determined using + * SCORE_MATCH_CONSECUTIVE, the + * previous character MUST be a match + */ + match_required = + i && j && + M[i][j] == D[i - 1][j - 1] + SCORE_MATCH_CONSECUTIVE; + positions[i] = j--; + break; + } + } + } + } + + score_t result = M[n - 1][m - 1]; + + vim_free(M); + vim_free(D); + + return result; +} diff --git a/src/insexpand.c b/src/insexpand.c index bc77b2d02..454466b30 100644 --- a/src/insexpand.c +++ b/src/insexpand.c @@ -966,7 +966,7 @@ ins_compl_add( // current match in the list of matches . if (compl_first_match == NULL) match->cp_next = match->cp_prev = NULL; - else if (cfc_has_mode() && score > 0 && compl_get_longest) + else if (cfc_has_mode() && score != FUZZY_SCORE_NONE && compl_get_longest) { current = compl_first_match->cp_next; prev = compl_first_match; @@ -1193,7 +1193,8 @@ ins_compl_add_matches( for (int i = 0; i < num_matches && add_r != FAIL; i++) { add_r = ins_compl_add(matches[i], -1, NULL, NULL, NULL, dir, - CP_FAST | (icase ? CP_ICASE : 0), FALSE, NULL, 0); + CP_FAST | (icase ? CP_ICASE : 0), FALSE, NULL, + FUZZY_SCORE_NONE); if (add_r == OK) // if dir was BACKWARD then honor it just once dir = FORWARD; @@ -1430,7 +1431,7 @@ cp_compare_nearest(const void* a, const void* b) { int score_a = ((compl_T*)a)->cp_score; int score_b = ((compl_T*)b)->cp_score; - if (score_a == 0 || score_b == 0) + if (score_a == FUZZY_SCORE_NONE || score_b == FUZZY_SCORE_NONE) return 0; return (score_a > score_b) ? 1 : (score_a < score_b) ? -1 : 0; } @@ -1627,7 +1628,7 @@ ins_compl_build_pum(void) && (leader->string == NULL || ins_compl_equal(compl, leader->string, (int)leader->length) - || (fuzzy_filter && compl->cp_score > 0))) + || (fuzzy_filter && compl->cp_score != FUZZY_SCORE_NONE))) { // Limit number of items from each source if max_items is set. int match_limit_exceeded = FALSE; @@ -2001,7 +2002,7 @@ thesaurus_add_words_in_line( if (wstart != skip_word) { status = ins_compl_add_infercase(wstart, (int)(ptr - wstart), p_ic, - fname, dir, FALSE, 0); + fname, dir, FALSE, FUZZY_SCORE_NONE); if (status == FAIL) break; } @@ -2072,7 +2073,7 @@ ins_compl_files( : find_word_end(ptr); add_r = ins_compl_add_infercase(regmatch->startp[0], (int)(ptr - regmatch->startp[0]), - p_ic, files[i], *dir, FALSE, 0); + p_ic, files[i], *dir, FALSE, FUZZY_SCORE_NONE); if (thesaurus) { // For a thesaurus, add all the words in the line @@ -3662,7 +3663,7 @@ ins_compl_add_tv(typval_T *tv, int dir, int fast) return FAIL; } status = ins_compl_add(word, -1, NULL, cptext, - &user_data, dir, flags, dup, user_hl, 0); + &user_data, dir, flags, dup, user_hl, FUZZY_SCORE_NONE); if (status != OK) clear_tv(&user_data); return status; @@ -3757,7 +3758,7 @@ set_completion(colnr_T startcol, list_T *list) compl_orig_text.length = (size_t)compl_length; if (ins_compl_add(compl_orig_text.string, (int)compl_orig_text.length, NULL, NULL, NULL, 0, - flags | CP_FAST, FALSE, NULL, 0) != OK) + flags | CP_FAST, FALSE, NULL, FUZZY_SCORE_NONE) != OK) return; ctrl_x_mode = CTRL_X_EVAL; @@ -4747,7 +4748,7 @@ get_next_filename_completion(void) { ptr = matches[i]; score = fuzzy_match_str(ptr, leader); - if (score > 0) + if (score != FUZZY_SCORE_NONE) { if (ga_grow(&fuzzy_indices, 1) == OK) { @@ -4959,7 +4960,7 @@ get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_pos) int in_fuzzy_collect = (cfc_has_mode() && compl_length > 0) || ((get_cot_flags() & COT_FUZZY) && compl_autocomplete); char_u *leader = ins_compl_leader(); - int score = 0; + int score = FUZZY_SCORE_NONE; int in_curbuf = st->ins_buf == curbuf; // If 'infercase' is set, don't use 'smartcase' here @@ -5053,7 +5054,6 @@ get_next_default_completion(ins_compl_next_state_T *st, pos_T *start_pos) score = st->cur_match_pos->lnum - curwin->w_cursor.lnum; if (score < 0) score = -score; - score++; } if (ins_compl_add_infercase(ptr, len, p_ic, @@ -5159,7 +5159,7 @@ get_register_completion(void) compl_orig_text.length) == 0)) { if (ins_compl_add_infercase(str, str_len, p_ic, NULL, - dir, FALSE, 0) == OK) + dir, FALSE, FUZZY_SCORE_NONE) == OK) dir = FORWARD; } } @@ -5198,7 +5198,7 @@ get_register_completion(void) compl_orig_text.length) == 0))) { if (ins_compl_add_infercase(p, len, p_ic, NULL, - dir, FALSE, 0) == OK) + dir, FALSE, FUZZY_SCORE_NONE) == OK) dir = FORWARD; } @@ -5568,7 +5568,8 @@ ins_compl_get_exp(pos_T *ini) // For `^P` completion, reset `compl_curr_match` to the head to avoid // mixing matches from different sources. if (!compl_dir_forward()) - while (compl_curr_match->cp_prev) + while (compl_curr_match->cp_prev + && !match_at_original_text(compl_curr_match->cp_prev)) compl_curr_match = compl_curr_match->cp_prev; } cpt_sources_index = -1; @@ -5966,7 +5967,8 @@ find_next_completion_match( && leader->string != NULL && !ins_compl_equal(compl_shown_match, leader->string, (int)leader->length) - && !(compl_fuzzy_match && compl_shown_match->cp_score > 0)) + && !(compl_fuzzy_match + && compl_shown_match->cp_score != FUZZY_SCORE_NONE)) ++todo; else // Remember a matching item. @@ -6947,7 +6949,7 @@ ins_compl_start(void) if (compl_orig_text.string == NULL || ins_compl_add(compl_orig_text.string, (int)compl_orig_text.length, - NULL, NULL, NULL, 0, flags, FALSE, NULL, 0) != OK) + NULL, NULL, NULL, 0, flags, FALSE, NULL, FUZZY_SCORE_NONE) != OK) { VIM_CLEAR_STRING(compl_pattern); VIM_CLEAR_STRING(compl_orig_text); diff --git a/src/map.c b/src/map.c index 2d378b55c..fbecf4ace 100644 --- a/src/map.c +++ b/src/map.c @@ -1433,7 +1433,7 @@ ExpandMappings( else { score = fuzzy_match_str(p, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } if (!match) @@ -1480,7 +1480,7 @@ ExpandMappings( else { score = fuzzy_match_str(p, pat); - match = (score != 0); + match = (score != FUZZY_SCORE_NONE); } if (!match) diff --git a/src/option.c b/src/option.c index 46a38d500..2ec959c72 100644 --- a/src/option.c +++ b/src/option.c @@ -7956,7 +7956,7 @@ match_str( int score; score = fuzzy_match_str(str, fuzzystr); - if (score != 0) + if (score != FUZZY_SCORE_NONE) { if (!test_only) { diff --git a/src/proto.h b/src/proto.h index 8c345b90f..e9e5efcbc 100644 --- a/src/proto.h +++ b/src/proto.h @@ -188,6 +188,7 @@ void mbyte_im_set_active(int active_arg); # if defined(FEAT_CRYPT) || defined(FEAT_PERSISTENT_UNDO) # include "sha256.pro" # endif +# include "fuzzy.pro" # include "search.pro" # ifdef FEAT_SIGNS # include "sign.pro" diff --git a/src/proto/fuzzy.pro b/src/proto/fuzzy.pro new file mode 100644 index 000000000..dc78af52d --- /dev/null +++ b/src/proto/fuzzy.pro @@ -0,0 +1,11 @@ +/* fuzzy.c */ +int fuzzy_match(char_u *str, char_u *pat_arg, int matchseq, int *outScore, int_u *matches, int maxMatches); +void f_matchfuzzy(typval_T *argvars, typval_T *rettv); +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); +int fuzzy_match_str(char_u *str, char_u *pat); +garray_T *fuzzy_match_str_with_pos(char_u *str, char_u *pat); +int fuzzy_match_str_in_line(char_u **ptr, char_u *pat, int *len, pos_T *current_pos, int *score); +int search_for_fuzzy_match(buf_T *buf, pos_T *pos, char_u *pattern, int dir, pos_T *start_pos, int *len, char_u **ptr, int *score); +void fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count); +int fuzzymatches_to_strmatches(fuzmatch_str_T *fuzmatch, char_u ***matches, int count, int funcsort); +/* vim: set ft=c : */ diff --git a/src/proto/search.pro b/src/proto/search.pro index 4fcce1bd3..445817762 100644 --- a/src/proto/search.pro +++ b/src/proto/search.pro @@ -37,13 +37,4 @@ void find_pattern_in_path(char_u *ptr, int dir, int len, int whole, int skip_com spat_T *get_spat(int idx); int get_spat_last_idx(void); void f_searchcount(typval_T *argvars, typval_T *rettv); -int fuzzy_match(char_u *str, char_u *pat_arg, int matchseq, int *outScore, int_u *matches, int maxMatches, int camelcase); -void f_matchfuzzy(typval_T *argvars, typval_T *rettv); -void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); -int fuzzy_match_str(char_u *str, char_u *pat); -garray_T *fuzzy_match_str_with_pos(char_u *str, char_u *pat); -int fuzzy_match_str_in_line(char_u **ptr, char_u *pat, int *len, pos_T *current_pos, int *score); -int search_for_fuzzy_match(buf_T *buf, pos_T *pos, char_u *pattern, int dir, pos_T *start_pos, int *len, char_u **ptr, int *score); -void fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count); -int fuzzymatches_to_strmatches(fuzmatch_str_T *fuzmatch, char_u ***matches, int count, int funcsort); /* vim: set ft=c : */ diff --git a/src/quickfix.c b/src/quickfix.c index ab595d7bb..9cd2fe245 100644 --- a/src/quickfix.c +++ b/src/quickfix.c @@ -6429,8 +6429,8 @@ vgr_match_buflines( long lnum; colnr_T col; int pat_len = (int)STRLEN(spat); - if (pat_len > MAX_FUZZY_MATCHES) - pat_len = MAX_FUZZY_MATCHES; + if (pat_len > FUZZY_MATCH_MAX_LEN) + pat_len = FUZZY_MATCH_MAX_LEN; for (lnum = 1; lnum <= buf->b_ml.ml_line_count && *tomatch > 0; ++lnum) { @@ -6483,13 +6483,13 @@ vgr_match_buflines( char_u *str = ml_get_buf(buf, lnum, FALSE); colnr_T linelen = ml_get_buf_len(buf, lnum); int score; - int_u matches[MAX_FUZZY_MATCHES]; + int_u matches[FUZZY_MATCH_MAX_LEN]; int_u sz = ARRAY_LENGTH(matches); // Fuzzy string match CLEAR_FIELD(matches); while (fuzzy_match(str + col, spat, FALSE, &score, - matches, sz, TRUE) > 0) + matches, sz) > 0) { // Pass the buffer number so that it gets used even for a // dummy buffer, unless duplicate_name is set, then the diff --git a/src/search.c b/src/search.c index 68f17e25f..9a6bb3fbd 100644 --- a/src/search.c +++ b/src/search.c @@ -42,17 +42,6 @@ static void find_mps_values(int *initc, int *findc, int *backwards, int switchit static int is_zero_width(char_u *pattern, size_t patternlen, int move, pos_T *cur, int direction); static void cmdline_search_stat(int dirc, pos_T *pos, pos_T *cursor_pos, int show_top_bot_msg, char_u *msgbuf, size_t msgbuflen, int recompute, int maxcount, long timeout); static void update_search_stat(int dirc, pos_T *pos, pos_T *cursor_pos, searchstat_T *stat, int recompute, int maxcount, long timeout); -static int fuzzy_match_compute_score(char_u *fuzpat, char_u *str, int strSz, int_u *matches, int numMatches, int camelcase); -static int fuzzy_match_recursive(char_u *fuzpat, char_u *str, int_u strIdx, int *outScore, char_u *strBegin, int strLen, int_u *srcMatches, int_u *matches, int maxMatches, int nextMatch, int *recursionCount, int camelcase); -#if defined(FEAT_EVAL) || defined(FEAT_PROTO) -static int fuzzy_match_item_compare(const void *s1, const void *s2); -static void fuzzy_match_in_list(list_T *l, char_u *str, int matchseq, char_u *key, callback_T *item_cb, int retmatchpos, list_T *fmatchlist, long max_matches, int camelcase); -static void do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos); -#endif -static int fuzzy_match_str_compare(const void *s1, const void *s2); -static void fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz); -static int fuzzy_match_func_compare(const void *s1, const void *s2); -static void fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz); #define SEARCH_STAT_DEF_TIMEOUT 40L // 'W ': 2 + @@ -4292,1190 +4281,3 @@ the_end: #endif } #endif - -/* - * Fuzzy string matching - * - * Ported from the lib_fts library authored by Forrest Smith. - * https://github.com/forrestthewoods/lib_fts/tree/master/code - * - * The following blog describes the fuzzy matching algorithm: - * https://www.forrestthewoods.com/blog/reverse_engineering_sublime_texts_fuzzy_match/ - * - * Each matching string is assigned a score. The following factors are checked: - * - Matched letter - * - Unmatched letter - * - Consecutively matched letters - * - Proximity to start - * - Letter following a separator (space, underscore) - * - Uppercase letter following lowercase (aka CamelCase) - * - * Matched letters are good. Unmatched letters are bad. Matching near the start - * is good. Matching the first letter in the middle of a phrase is good. - * Matching the uppercase letters in camel case entries is good. - * - * The score assigned for each factor is explained below. - * File paths are different from file names. File extensions may be ignorable. - * Single words care about consecutive matches but not separators or camel - * case. - * Score starts at 100 - * Matched letter: +0 points - * Unmatched letter: -1 point - * Consecutive match bonus: +15 points - * First letter bonus: +15 points - * Separator bonus: +30 points - * Camel case bonus: +30 points - * Unmatched leading letter: -5 points (max: -15) - * - * There is some nuance to this. Scores don’t have an intrinsic meaning. The - * score range isn’t 0 to 100. It’s roughly [50, 150]. Longer words have a - * lower minimum score due to unmatched letter penalty. Longer search patterns - * have a higher maximum score due to match bonuses. - * - * Separator and camel case bonus is worth a LOT. Consecutive matches are worth - * quite a bit. - * - * There is a penalty if you DON’T match the first three letters. Which - * effectively rewards matching near the start. However there’s no difference - * in matching between the middle and end. - * - * There is not an explicit bonus for an exact match. Unmatched letters receive - * a penalty. So shorter strings and closer matches are worth more. - */ -typedef struct -{ - int idx; // used for stable sort - listitem_T *item; - int score; - list_T *lmatchpos; -} fuzzyItem_T; - -// bonus for adjacent matches; this is higher than SEPARATOR_BONUS so that -// matching a whole word is preferred. -#define SEQUENTIAL_BONUS 40 -// bonus if match occurs after a path separator -#define PATH_SEPARATOR_BONUS 30 -// bonus if match occurs after a word separator -#define WORD_SEPARATOR_BONUS 25 -// bonus if match is uppercase and prev is lower -#define CAMEL_BONUS 30 -// bonus if the first letter is matched -#define FIRST_LETTER_BONUS 15 -// bonus if exact match -#define EXACT_MATCH_BONUS 100 -// bonus if case match when no ignorecase -#define CASE_MATCH_BONUS 25 -// penalty applied for every letter in str before the first match -#define LEADING_LETTER_PENALTY (-5) -// maximum penalty for leading letters -#define MAX_LEADING_LETTER_PENALTY (-15) -// penalty for every letter that doesn't match -#define UNMATCHED_LETTER_PENALTY (-1) -// penalty for gap in matching positions (-2 * k) -#define GAP_PENALTY (-2) -// Score for a string that doesn't fuzzy match the pattern -#define SCORE_NONE (-9999) - -#define FUZZY_MATCH_RECURSION_LIMIT 10 - -/* - * Compute a score for a fuzzy matched string. The matching character locations - * are in 'matches'. - */ - static int -fuzzy_match_compute_score( - char_u *fuzpat, - char_u *str, - int strSz, - int_u *matches, - int numMatches, - int camelcase) -{ - int score; - int penalty; - int unmatched; - int i; - char_u *p = str; - int_u sidx = 0; - int is_exact_match = TRUE; - char_u *orig_fuzpat = fuzpat - numMatches; - char_u *curpat = orig_fuzpat; - int pat_idx = 0; - // Track consecutive camel case matches - int consecutive_camel = 0; - - // Initialize score - score = 100; - - // Apply leading letter penalty - penalty = LEADING_LETTER_PENALTY * matches[0]; - if (penalty < MAX_LEADING_LETTER_PENALTY) - penalty = MAX_LEADING_LETTER_PENALTY; - score += penalty; - - // Apply unmatched penalty - unmatched = strSz - numMatches; - score += UNMATCHED_LETTER_PENALTY * unmatched; - // In a long string, not all matches may be found due to the recursion limit. - // If at least one match is found, reset the score to a non-negative value. - if (score < 0 && numMatches > 0) - score = 0; - - // Apply ordering bonuses - for (i = 0; i < numMatches; ++i) - { - int_u currIdx = matches[i]; - int curr; - int is_camel = FALSE; - - if (i > 0) - { - int_u prevIdx = matches[i - 1]; - - // Sequential - if (currIdx == (prevIdx + 1)) - score += SEQUENTIAL_BONUS; - else - { - score += GAP_PENALTY * (currIdx - prevIdx); - // Reset consecutive camel count on gap - consecutive_camel = 0; - } - } - - // Check for bonuses based on neighbor character value - if (currIdx > 0) - { - // Camel case - int neighbor = ' '; - - if (has_mbyte) - { - while (sidx < currIdx) - { - neighbor = (*mb_ptr2char)(p); - MB_PTR_ADV(p); - sidx++; - } - curr = (*mb_ptr2char)(p); - } - else - { - neighbor = str[currIdx - 1]; - curr = str[currIdx]; - } - - // Enhanced camel case scoring - if (camelcase && vim_islower(neighbor) && vim_isupper(curr)) - { - score += CAMEL_BONUS * 2; // Double the camel case bonus - is_camel = TRUE; - consecutive_camel++; - // Additional bonus for consecutive camel - if (consecutive_camel > 1) - score += CAMEL_BONUS; - } - else - consecutive_camel = 0; - - // Bonus if the match follows a separator character - if (neighbor == '/' || neighbor == '\') - score += PATH_SEPARATOR_BONUS; - else if (neighbor == ' ' || neighbor == '_') - score += WORD_SEPARATOR_BONUS; - } - else - { - // First letter - score += FIRST_LETTER_BONUS; - curr = has_mbyte ? (*mb_ptr2char)(p) : str[currIdx]; - } - - // Case matching bonus - if (vim_isalpha(curr)) - { - while (pat_idx < i && *curpat) - { - if (has_mbyte) - MB_PTR_ADV(curpat); - else - curpat++; - pat_idx++; - } - - if (has_mbyte) - { - if (curr == (*mb_ptr2char)(curpat)) - { - score += CASE_MATCH_BONUS; - // Extra bonus for exact case match in camel - if (is_camel) - score += CASE_MATCH_BONUS / 2; - } - } - else if (curr == *curpat) - { - score += CASE_MATCH_BONUS; - if (is_camel) - score += CASE_MATCH_BONUS / 2; - } - } - - // Check exact match condition - if (currIdx != (int_u)i) - is_exact_match = FALSE; - } - - // Boost score for exact matches - if (is_exact_match && numMatches == strSz) - score += EXACT_MATCH_BONUS; - - return score; -} - -/* - * Perform a recursive search for fuzzy matching 'fuzpat' in 'str'. - * Return the number of matching characters. - */ - static int -fuzzy_match_recursive( - char_u *fuzpat, - char_u *str, - int_u strIdx, - int *outScore, - char_u *strBegin, - int strLen, - int_u *srcMatches, - int_u *matches, - int maxMatches, - int nextMatch, - int *recursionCount, - int camelcase) -{ - // Recursion params - int recursiveMatch = FALSE; - int_u bestRecursiveMatches[MAX_FUZZY_MATCHES]; - int bestRecursiveScore = 0; - int first_match; - int matched; - - // Count recursions - ++*recursionCount; - if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) - return 0; - - // Detect end of strings - if (*fuzpat == NUL || *str == NUL) - return 0; - - // Loop through fuzpat and str looking for a match - first_match = TRUE; - while (*fuzpat != NUL && *str != NUL) - { - int c1; - int c2; - - c1 = PTR2CHAR(fuzpat); - c2 = PTR2CHAR(str); - - // Found match - if (vim_tolower(c1) == vim_tolower(c2)) - { - // Supplied matches buffer was too short - if (nextMatch >= maxMatches) - return 0; - - int recursiveScore = 0; - int_u recursiveMatches[MAX_FUZZY_MATCHES]; - CLEAR_FIELD(recursiveMatches); - - // "Copy-on-Write" srcMatches into matches - if (first_match && srcMatches) - { - memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); - first_match = FALSE; - } - - // Recursive call that "skips" this match - char_u *next_char = str + (has_mbyte ? (*mb_ptr2len)(str) : 1); - if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, - &recursiveScore, strBegin, strLen, matches, - recursiveMatches, - ARRAY_LENGTH(recursiveMatches), - nextMatch, recursionCount, camelcase)) - { - // Pick best recursive score - if (!recursiveMatch || recursiveScore > bestRecursiveScore) - { - memcpy(bestRecursiveMatches, recursiveMatches, - MAX_FUZZY_MATCHES * sizeof(recursiveMatches[0])); - bestRecursiveScore = recursiveScore; - } - recursiveMatch = TRUE; - } - - // Advance - matches[nextMatch++] = strIdx; - if (has_mbyte) - MB_PTR_ADV(fuzpat); - else - ++fuzpat; - } - if (has_mbyte) - MB_PTR_ADV(str); - else - ++str; - strIdx++; - } - - // Determine if full fuzpat was matched - matched = *fuzpat == NUL ? TRUE : FALSE; - - // Calculate score - if (matched) - *outScore = fuzzy_match_compute_score(fuzpat, strBegin, strLen, matches, - nextMatch, camelcase); - - // Return best result - if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) - { - // Recursive score is better than "this" - memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); - *outScore = bestRecursiveScore; - return nextMatch; - } - else if (matched) - return nextMatch; // "this" score is better than recursive - - return 0; // no match -} - -/* - * fuzzy_match() - * - * Performs exhaustive search via recursion to find all possible matches and - * match with highest score. - * Scores values have no intrinsic meaning. Possible score range is not - * normalized and varies with pattern. - * Recursion is limited internally (default=10) to prevent degenerate cases - * (pat_arg="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). - * Uses char_u for match indices. Therefore patterns are limited to - * MAX_FUZZY_MATCHES characters. - * - * Returns TRUE if "pat_arg" matches "str". Also returns the match score in - * "outScore" and the matching character positions in "matches". - */ - int -fuzzy_match( - char_u *str, - char_u *pat_arg, - int matchseq, - int *outScore, - int_u *matches, - int maxMatches, - int camelcase) -{ - int recursionCount = 0; - int len = MB_CHARLEN(str); - char_u *save_pat; - char_u *pat; - char_u *p; - int complete = FALSE; - int score = 0; - int numMatches = 0; - int matchCount; - - *outScore = 0; - - save_pat = vim_strsave(pat_arg); - if (save_pat == NULL) - return FALSE; - pat = save_pat; - p = pat; - - // Try matching each word in 'pat_arg' in 'str' - while (TRUE) - { - if (matchseq) - complete = TRUE; - else - { - // Extract one word from the pattern (separated by space) - p = skipwhite(p); - if (*p == NUL) - break; - pat = p; - while (*p != NUL && !VIM_ISWHITE(PTR2CHAR(p))) - { - if (has_mbyte) - MB_PTR_ADV(p); - else - ++p; - } - if (*p == NUL) // processed all the words - complete = TRUE; - *p = NUL; - } - - score = 0; - recursionCount = 0; - matchCount = fuzzy_match_recursive(pat, str, 0, &score, str, len, NULL, - matches + numMatches, maxMatches - numMatches, - 0, &recursionCount, camelcase); - if (matchCount == 0) - { - numMatches = 0; - break; - } - - // Accumulate the match score and the number of matches - *outScore += score; - numMatches += matchCount; - - if (complete) - break; - - // try matching the next word - ++p; - } - - vim_free(save_pat); - return numMatches != 0; -} - -#if defined(FEAT_EVAL) || defined(FEAT_PROTO) -/* - * Sort the fuzzy matches in the descending order of the match score. - * For items with same score, retain the order using the index (stable sort) - */ - static int -fuzzy_match_item_compare(const void *s1, const void *s2) -{ - int v1 = ((fuzzyItem_T *)s1)->score; - int v2 = ((fuzzyItem_T *)s2)->score; - int idx1 = ((fuzzyItem_T *)s1)->idx; - int idx2 = ((fuzzyItem_T *)s2)->idx; - - if (v1 == v2) - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - else - return v1 > v2 ? -1 : 1; -} - -/* - * Fuzzy search the string 'str' in a list of 'items' and return the matching - * strings in 'fmatchlist'. - * If 'matchseq' is TRUE, then for multi-word search strings, match all the - * words in sequence. - * If 'items' is a list of strings, then search for 'str' in the list. - * If 'items' is a list of dicts, then either use 'key' to lookup the string - * for each item or use 'item_cb' Funcref function to get the string. - * If 'retmatchpos' is TRUE, then return a list of positions where 'str' - * matches for each item. - */ - static void -fuzzy_match_in_list( - list_T *l, - char_u *str, - int matchseq, - char_u *key, - callback_T *item_cb, - int retmatchpos, - list_T *fmatchlist, - long max_matches, - int camelcase) -{ - long len; - fuzzyItem_T *items; - listitem_T *li; - long i = 0; - long match_count = 0; - int_u matches[MAX_FUZZY_MATCHES]; - - len = list_len(l); - if (len == 0) - return; - if (max_matches > 0 && len > max_matches) - len = max_matches; - - items = ALLOC_CLEAR_MULT(fuzzyItem_T, len); - if (items == NULL) - return; - - // For all the string items in items, get the fuzzy matching score - FOR_ALL_LIST_ITEMS(l, li) - { - int score; - char_u *itemstr; - typval_T rettv; - - if (max_matches > 0 && match_count >= max_matches) - break; - - itemstr = NULL; - rettv.v_type = VAR_UNKNOWN; - if (li->li_tv.v_type == VAR_STRING) // list of strings - itemstr = li->li_tv.vval.v_string; - else if (li->li_tv.v_type == VAR_DICT - && (key != NULL || item_cb->cb_name != NULL)) - { - // For a dict, either use the specified key to lookup the string or - // use the specified callback function to get the string. - if (key != NULL) - itemstr = dict_get_string(li->li_tv.vval.v_dict, - (char *)key, FALSE); - else - { - typval_T argv[2]; - - // Invoke the supplied callback (if any) to get the dict item - li->li_tv.vval.v_dict->dv_refcount++; - argv[0].v_type = VAR_DICT; - argv[0].vval.v_dict = li->li_tv.vval.v_dict; - argv[1].v_type = VAR_UNKNOWN; - if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) - { - if (rettv.v_type == VAR_STRING) - itemstr = rettv.vval.v_string; - } - dict_unref(li->li_tv.vval.v_dict); - } - } - - if (itemstr != NULL - && fuzzy_match(itemstr, str, matchseq, &score, matches, - MAX_FUZZY_MATCHES, camelcase)) - { - items[match_count].idx = match_count; - items[match_count].item = li; - items[match_count].score = score; - - // Copy the list of matching positions in itemstr to a list, if - // 'retmatchpos' is set. - if (retmatchpos) - { - int j = 0; - char_u *p; - - items[match_count].lmatchpos = list_alloc(); - if (items[match_count].lmatchpos == NULL) - goto done; - - p = str; - while (*p != NUL) - { - if (!VIM_ISWHITE(PTR2CHAR(p)) || matchseq) - { - if (list_append_number(items[match_count].lmatchpos, - matches[j]) == FAIL) - goto done; - j++; - } - if (has_mbyte) - MB_PTR_ADV(p); - else - ++p; - } - } - ++match_count; - } - clear_tv(&rettv); - } - - if (match_count > 0) - { - list_T *retlist; - - // Sort the list by the descending order of the match score - qsort((void *)items, (size_t)match_count, sizeof(fuzzyItem_T), - fuzzy_match_item_compare); - - // For matchfuzzy(), return a list of matched strings. - // ['str1', 'str2', 'str3'] - // For matchfuzzypos(), return a list with three items. - // The first item is a list of matched strings. The second item - // is a list of lists where each list item is a list of matched - // character positions. The third item is a list of matching scores. - // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] - if (retmatchpos) - { - li = list_find(fmatchlist, 0); - if (li == NULL || li->li_tv.vval.v_list == NULL) - goto done; - retlist = li->li_tv.vval.v_list; - } - else - retlist = fmatchlist; - - // Copy the matching strings with a valid score to the return list - for (i = 0; i < match_count; i++) - { - if (items[i].score == SCORE_NONE) - break; - list_append_tv(retlist, &items[i].item->li_tv); - } - - // next copy the list of matching positions - if (retmatchpos) - { - li = list_find(fmatchlist, -2); - if (li == NULL || li->li_tv.vval.v_list == NULL) - goto done; - retlist = li->li_tv.vval.v_list; - - for (i = 0; i < match_count; i++) - { - if (items[i].score == SCORE_NONE) - break; - if (items[i].lmatchpos != NULL - && list_append_list(retlist, items[i].lmatchpos) == FAIL) - goto done; - } - - // copy the matching scores - li = list_find(fmatchlist, -1); - if (li == NULL || li->li_tv.vval.v_list == NULL) - goto done; - retlist = li->li_tv.vval.v_list; - for (i = 0; i < match_count; i++) - { - if (items[i].score == SCORE_NONE) - break; - if (list_append_number(retlist, items[i].score) == FAIL) - goto done; - } - } - } - -done: - vim_free(items); -} - -/* - * Do fuzzy matching. Returns the list of matched strings in 'rettv'. - * If 'retmatchpos' is TRUE, also returns the matching character positions. - */ - static void -do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) -{ - callback_T cb; - char_u *key = NULL; - int ret; - int matchseq = FALSE; - long max_matches = 0; - int camelcase = TRUE; - - if (in_vim9script() - && (check_for_list_arg(argvars, 0) == FAIL - || check_for_string_arg(argvars, 1) == FAIL - || check_for_opt_dict_arg(argvars, 2) == FAIL)) - return; - - CLEAR_POINTER(&cb); - - // validate and get the arguments - if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) - { - semsg(_(e_argument_of_str_must_be_list), - retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); - return; - } - if (argvars[1].v_type != VAR_STRING - || argvars[1].vval.v_string == NULL) - { - semsg(_(e_invalid_argument_str), tv_get_string(&argvars[1])); - return; - } - - if (argvars[2].v_type != VAR_UNKNOWN) - { - dict_T *d; - dictitem_T *di; - - if (check_for_nonnull_dict_arg(argvars, 2) == FAIL) - return; - - // To search a dict, either a callback function or a key can be - // specified. - d = argvars[2].vval.v_dict; - if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) - { - if (di->di_tv.v_type != VAR_STRING - || di->di_tv.vval.v_string == NULL - || *di->di_tv.vval.v_string == NUL) - { - semsg(_(e_invalid_value_for_argument_str_str), "key", - tv_get_string(&di->di_tv)); - return; - } - key = tv_get_string(&di->di_tv); - } - else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) - { - cb = get_callback(&di->di_tv); - if (cb.cb_name == NULL) - { - semsg(_(e_invalid_value_for_argument_str), "text_cb"); - return; - } - } - - if ((di = dict_find(d, (char_u *)"limit", -1)) != NULL) - { - if (di->di_tv.v_type != VAR_NUMBER) - { - semsg(_(e_invalid_value_for_argument_str), "limit"); - return; - } - max_matches = (long)tv_get_number_chk(&di->di_tv, NULL); - } - - if ((di = dict_find(d, (char_u *)"camelcase", -1)) != NULL) - { - if (di->di_tv.v_type != VAR_BOOL) - { - semsg(_(e_invalid_value_for_argument_str), "camelcase"); - return; - } - camelcase = tv_get_bool_chk(&di->di_tv, NULL); - } - - if (dict_has_key(d, "matchseq")) - matchseq = TRUE; - } - - // get the fuzzy matches - ret = rettv_list_alloc(rettv); - if (ret == FAIL) - goto done; - if (retmatchpos) - { - list_T *l; - - // For matchfuzzypos(), a list with three items are returned. First - // item is a list of matching strings, the second item is a list of - // lists with matching positions within each string and the third item - // is the list of scores of the matches. - l = list_alloc(); - if (l == NULL) - goto done; - if (list_append_list(rettv->vval.v_list, l) == FAIL) - { - vim_free(l); - goto done; - } - l = list_alloc(); - if (l == NULL) - goto done; - if (list_append_list(rettv->vval.v_list, l) == FAIL) - { - vim_free(l); - goto done; - } - l = list_alloc(); - if (l == NULL) - goto done; - if (list_append_list(rettv->vval.v_list, l) == FAIL) - { - vim_free(l); - goto done; - } - } - - fuzzy_match_in_list(argvars[0].vval.v_list, tv_get_string(&argvars[1]), - matchseq, key, &cb, retmatchpos, rettv->vval.v_list, max_matches, - camelcase); - -done: - free_callback(&cb); -} - -/* - * "matchfuzzy()" function - */ - void -f_matchfuzzy(typval_T *argvars, typval_T *rettv) -{ - do_fuzzymatch(argvars, rettv, FALSE); -} - -/* - * "matchfuzzypos()" function - */ - void -f_matchfuzzypos(typval_T *argvars, typval_T *rettv) -{ - do_fuzzymatch(argvars, rettv, TRUE); -} -#endif - -/* - * Same as fuzzy_match_item_compare() except for use with a string match - */ - static int -fuzzy_match_str_compare(const void *s1, const void *s2) -{ - int v1 = ((fuzmatch_str_T *)s1)->score; - int v2 = ((fuzmatch_str_T *)s2)->score; - int idx1 = ((fuzmatch_str_T *)s1)->idx; - int idx2 = ((fuzmatch_str_T *)s2)->idx; - - if (v1 == v2) - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - else - return v1 > v2 ? -1 : 1; -} - -/* - * Sort fuzzy matches by score - */ - static void -fuzzy_match_str_sort(fuzmatch_str_T *fm, int sz) -{ - // Sort the list by the descending order of the match score - qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), - fuzzy_match_str_compare); -} - -/* - * Same as fuzzy_match_item_compare() except for use with a function name - * string match. <SNR> functions should be sorted to the end. - */ - static int -fuzzy_match_func_compare(const void *s1, const void *s2) -{ - int v1 = ((fuzmatch_str_T *)s1)->score; - int v2 = ((fuzmatch_str_T *)s2)->score; - int idx1 = ((fuzmatch_str_T *)s1)->idx; - int idx2 = ((fuzmatch_str_T *)s2)->idx; - char_u *str1 = ((fuzmatch_str_T *)s1)->str; - char_u *str2 = ((fuzmatch_str_T *)s2)->str; - - if (*str1 != '<' && *str2 == '<') - return -1; - if (*str1 == '<' && *str2 != '<') - return 1; - if (v1 == v2) - return idx1 == idx2 ? 0 : idx1 > idx2 ? 1 : -1; - else - return v1 > v2 ? -1 : 1; -} - -/* - * Sort fuzzy matches of function names by score. - * <SNR> functions should be sorted to the end. - */ - static void -fuzzy_match_func_sort(fuzmatch_str_T *fm, int sz) -{ - // Sort the list by the descending order of the match score - qsort((void *)fm, (size_t)sz, sizeof(fuzmatch_str_T), - fuzzy_match_func_compare); -} - -/* - * Fuzzy match 'pat' in 'str'. Returns 0 if there is no match. Otherwise, - * returns the match score. - */ - int -fuzzy_match_str(char_u *str, char_u *pat) -{ - int score = 0; - int_u matchpos[MAX_FUZZY_MATCHES]; - - if (str == NULL || pat == NULL) - return 0; - - fuzzy_match(str, pat, TRUE, &score, matchpos, - sizeof(matchpos) / sizeof(matchpos[0]), TRUE); - - return score; -} - -/* - * Fuzzy match the position of string 'pat' in string 'str'. - * Returns a dynamic array of matching positions. If there is no match, - * returns NULL. - */ - garray_T * -fuzzy_match_str_with_pos(char_u *str UNUSED, char_u *pat UNUSED) -{ -#ifdef FEAT_SEARCH_EXTRA - int score = 0; - garray_T *match_positions = NULL; - int_u matches[MAX_FUZZY_MATCHES]; - int j = 0; - - if (str == NULL || pat == NULL) - return NULL; - - match_positions = ALLOC_ONE(garray_T); - if (match_positions == NULL) - return NULL; - ga_init2(match_positions, sizeof(int_u), 10); - - if (!fuzzy_match(str, pat, FALSE, &score, matches, MAX_FUZZY_MATCHES, TRUE) - || score == 0) - { - ga_clear(match_positions); - vim_free(match_positions); - return NULL; - } - - for (char_u *p = pat; *p != NUL; MB_PTR_ADV(p)) - { - if (!VIM_ISWHITE(PTR2CHAR(p))) - { - ga_grow(match_positions, 1); - ((int_u *)match_positions->ga_data)[match_positions->ga_len] = - matches[j]; - match_positions->ga_len++; - j++; - } - } - - return match_positions; -#else - return NULL; -#endif -} - -/* - * This function splits the line pointed to by `*ptr` into words and performs - * a fuzzy match for the pattern `pat` on each word. It iterates through the - * line, moving `*ptr` to the start of each word during the process. - * - * If a match is found: - * - `*ptr` points to the start of the matched word. - * - `*len` is set to the length of the matched word. - * - `*score` contains the match score. - * - * If no match is found, `*ptr` is updated to the end of the line. - */ - int -fuzzy_match_str_in_line( - char_u **ptr, - char_u *pat, - int *len, - pos_T *current_pos, - int *score) -{ - char_u *str = *ptr; - char_u *strBegin = str; - char_u *end = NULL; - char_u *start = NULL; - int found = FALSE; - char save_end; - char_u *line_end = NULL; - - if (str == NULL || pat == NULL) - return found; - line_end = find_line_end(str); - - while (str < line_end) - { - // Skip non-word characters - start = find_word_start(str); - if (*start == NUL) - break; - end = find_word_end(start); - - // Extract the word from start to end - save_end = *end; - *end = NUL; - - // Perform fuzzy match - *score = fuzzy_match_str(start, pat); - *end = save_end; - - if (*score > 0) - { - *len = (int)(end - start); - found = TRUE; - *ptr = start; - if (current_pos) - current_pos->col += (int)(end - strBegin); - break; - } - - // Move to the end of the current word for the next iteration - str = end; - // Ensure we continue searching after the current word - while (*str != NUL && !vim_iswordp(str)) - MB_PTR_ADV(str); - } - - if (!found) - *ptr = line_end; - - return found; -} - -/* - * Search for the next fuzzy match in the specified buffer. - * This function attempts to find the next occurrence of the given pattern - * in the buffer, starting from the current position. It handles line wrapping - * and direction of search. - * - * Return TRUE if a match is found, otherwise FALSE. - */ - int -search_for_fuzzy_match( - buf_T *buf, - pos_T *pos, - char_u *pattern, - int dir, - pos_T *start_pos, - int *len, - char_u **ptr, - int *score) -{ - pos_T current_pos = *pos; - pos_T circly_end; - int found_new_match = FALSE; - int looped_around = FALSE; - int whole_line = ctrl_x_mode_whole_line(); - - if (buf == curbuf) - circly_end = *start_pos; - else - { - circly_end.lnum = buf->b_ml.ml_line_count; - circly_end.col = 0; - circly_end.coladd = 0; - } - - if (whole_line && start_pos->lnum != pos->lnum) - current_pos.lnum += dir; - - do - { - - // Check if looped around and back to start position - if (looped_around && EQUAL_POS(current_pos, circly_end)) - break; - - // Ensure current_pos is valid - if (current_pos.lnum >= 1 && current_pos.lnum <= buf->b_ml.ml_line_count) - { - // Get the current line buffer - *ptr = ml_get_buf(buf, current_pos.lnum, FALSE); - if (!whole_line) - *ptr += current_pos.col; - - // If ptr is end of line is reached, move to next line - // or previous line based on direction - if (*ptr != NULL && **ptr != NUL) - { - if (!whole_line) - { - // Try to find a fuzzy match in the current line starting - // from current position - found_new_match = fuzzy_match_str_in_line(ptr, pattern, - len, ¤t_pos, score); - if (found_new_match) - { - *pos = current_pos; - break; - } - else if (looped_around && current_pos.lnum == circly_end.lnum) - break; - } - else - { - if (fuzzy_match_str(*ptr, pattern) > 0) - { - found_new_match = TRUE; - *pos = current_pos; - *len = (int)ml_get_buf_len(buf, current_pos.lnum); - break; - } - } - } - } - - // Move to the next line or previous line based on direction - if (dir == FORWARD) - { - if (++current_pos.lnum > buf->b_ml.ml_line_count) - { - if (p_ws) - { - current_pos.lnum = 1; - looped_around = TRUE; - } - else - break; - } - } - else - { - if (--current_pos.lnum < 1) - { - if (p_ws) - { - current_pos.lnum = buf->b_ml.ml_line_count; - looped_around = TRUE; - } - else - break; - - } - } - current_pos.col = 0; - } while (TRUE); - - return found_new_match; -} - -/* - * Free an array of fuzzy string matches "fuzmatch[count]". - */ - void -fuzmatch_str_free(fuzmatch_str_T *fuzmatch, int count) -{ - int i; - - if (fuzmatch == NULL) - return; - for (i = 0; i < count; ++i) - vim_free(fuzmatch[i].str); - vim_free(fuzmatch); -} - -/* - * Copy a list of fuzzy matches into a string list after sorting the matches by - * the fuzzy score. Frees the memory allocated for 'fuzmatch'. - * Returns OK on success and FAIL on memory allocation failure. - */ - int -fuzzymatches_to_strmatches( - fuzmatch_str_T *fuzmatch, - char_u ***matches, - int count, - int funcsort) -{ - int i; - - if (count <= 0) - return OK; - - *matches = ALLOC_MULT(char_u *, count); - if (*matches == NULL) - { - fuzmatch_str_free(fuzmatch, count); - return FAIL; - } - - // Sort the list by the descending order of the match score - if (funcsort) - fuzzy_match_func_sort((void *)fuzmatch, (size_t)count); - else - fuzzy_match_str_sort((void *)fuzmatch, (size_t)count); - - for (i = 0; i < count; i++) - (*matches)[i] = fuzmatch[i].str; - vim_free(fuzmatch); - - return OK; -} diff --git a/src/testdir/test_cmdline.vim b/src/testdir/test_cmdline.vim index e6dcfd399..ef311ed73 100644 --- a/src/testdir/test_cmdline.vim +++ b/src/testdir/test_cmdline.vim @@ -3224,7 +3224,7 @@ func Test_fuzzy_completion_bufname_fullpath() call assert_equal('"b CmdStateFile', @:) set wildoptions=fuzzy call feedkeys(":b CmdStateFile\<Tab>\<C-B>\"\<CR>", 'tx') - call assert_match('Xcmd/Xstate/Xfile.js$', @:) + call assert_equal('"b CmdStateFile', @:) cd - set wildoptions& endfunc @@ -3502,7 +3502,7 @@ func Test_fuzzy_completion_mapname() nmap <Plug>state : nmap <Plug>FendingOff : call feedkeys(":nmap <Plug>fo\<C-A>\<C-B>\"\<CR>", 'tx') - call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>goformat <Plug>fendoff", @:) + call assert_equal("\"nmap <Plug>format <Plug>TestFOrmat <Plug>FendingOff <Plug>fendoff <Plug>goformat", @:) nunmap <Plug>format nunmap <Plug>goformat nunmap <Plug>TestFOrmat @@ -3674,7 +3674,7 @@ func Test_fuzzy_completion_cmd_sort_results() command T123FendingOff : set wildoptions=fuzzy call feedkeys(":T123fo\<C-A>\<C-B>\"\<CR>", 'tx') - call assert_equal('"T123format T123TestFOrmat T123FendingOff T123goformat T123fendoff', @:) + call assert_equal('"T123format T123TestFOrmat T123FendingOff T123fendoff T123goformat', @:) delcommand T123format delcommand T123goformat delcommand T123TestFOrmat diff --git a/src/testdir/test_matchfuzzy.vim b/src/testdir/test_matchfuzzy.vim index 8aa7f3218..0ee1b7ba6 100644 --- a/src/testdir/test_matchfuzzy.vim +++ b/src/testdir/test_matchfuzzy.vim @@ -14,11 +14,11 @@ func Test_matchfuzzy() call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) - call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) - call assert_equal(['onetwo', 'one_two'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) + call assert_equal(['oneTwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) + call assert_equal([], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) - call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) + call assert_equal([repeat('a', 300)], matchfuzzy([repeat('a', 300)], repeat('a', 257))) " full match has highest score call assert_equal(['Cursor', 'lCursor'], matchfuzzy(["hello", "lCursor", "Cursor"], "Cursor")) " matches with same score should not be reordered @@ -26,8 +26,7 @@ func Test_matchfuzzy() call assert_equal(l, l->matchfuzzy('abc')) " Tests for match preferences - " preference for camel case match - call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) + call assert_equal(['onetwo', 'oneTwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) " preference for match after a separator (_ or space) call assert_equal(['onetwo', 'one_two', 'one two'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) " preference for leading letter match @@ -43,7 +42,7 @@ func Test_matchfuzzy() " gap penalty call assert_equal(['xxayybxxxx', 'xxayyybxxx', 'xxayyyybxx'], ['xxayyyybxx', 'xxayyybxxx', 'xxayybxxxx']->matchfuzzy('ab')) " path separator vs word separator - call assert_equal(['color/setup.vim', 'color\setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\setup.vim'], 'setup.vim')) + call assert_equal(['color/setup.vim', 'color setup.vim', 'color_setup.vim', 'colorsetup.vim', 'color\setup.vim'], matchfuzzy(['colorsetup.vim', 'color setup.vim', 'color/setup.vim', 'color_setup.vim', 'color\setup.vim'], 'setup.vim')) " match multiple words (separated by space) call assert_equal(['foo bar baz'], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzy('baz foo')) @@ -85,15 +84,16 @@ func Test_matchfuzzy() call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') " camelcase - call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) - call assert_equal(['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true})) call assert_equal(['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], - \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false})) + \ matchfuzzy(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) call assert_equal(['things', 'sThings', 'thisThings'], - \ matchfuzzy(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false})) - call assert_fails("let x = matchfuzzy([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase') + \ matchfuzzy(['things','sThings', 'thisThings'], 'thin')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) + call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) + call assert_equal(['MyTest'], matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) + call assert_equal(['CamelCaseMatchIngAlg'], + \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], 'CamelCase')) + call assert_equal(['SomeWord', 'PrefixSomeWord'], matchfuzzy(['PrefixSomeWord', 'SomeWord'], 'somewor')) " Test in latin1 encoding let save_enc = &encoding @@ -104,50 +104,57 @@ endfunc " Test for the matchfuzzypos() function func Test_matchfuzzypos() - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'curl'], 'rl')) - call assert_equal([['curl', 'world'], [[2,3], [2,3]], [178, 177]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'curl'], 'rl')) + call assert_equal([['curl', 'world'], [[2,3], [2,3]], [990, 985]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) call assert_equal([['hello', 'hello world hello world'], - \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [500, 382]], + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]], [2147483647, 4810]], \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) - call assert_equal([['aaaaaaa'], [[0, 1, 2]], [266]], matchfuzzypos(['aaaaaaa'], 'aaa')) - call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0, 3]], [269]], matchfuzzypos(['a b'], 'a b')) - call assert_equal([['a b'], [[0]], [137]], matchfuzzypos(['a b'], ' a ')) + call assert_equal([['aaaaaaa'], [[0, 1, 2]], [2880]], matchfuzzypos(['aaaaaaa'], 'aaa')) + call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0, 3]], [1670]], matchfuzzypos(['a b'], 'a b')) + call assert_equal([['a b'], [[0]], [885]], matchfuzzypos(['a b'], ' a ')) call assert_equal([[], [], []], matchfuzzypos(['a b'], ' ')) call assert_equal([[], [], []], matchfuzzypos(['world', 'curl'], 'ab')) let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], [], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) + + " fuzzy matches limited to 1024 chars + let matches = matchfuzzypos([repeat('a', 1030)], repeat('a', 1025)) + call assert_equal([repeat('a', 1030)], matches[0]) + call assert_equal(1024, len(matches[1][0])) + call assert_equal(1023, matches[1][0][1023]) + call assert_equal([[], [], []], matchfuzzypos([], 'abc')) + call assert_equal([[], [], []], matchfuzzypos(['abc'], '')) " match in a long string - call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [155]], + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]], [500]], \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) " preference for camel case match - call assert_equal([['xabcxxaBc'], [[6, 7, 8]], [269]], matchfuzzypos(['xabcxxaBc'], 'abc')) + call assert_equal([['xabcxxaBc'], [[7, 8]], [1665]], matchfuzzypos(['xabcxxaBc'], 'bc')) " preference for match after a separator (_ or space) - call assert_equal([['xabx_ab'], [[5, 6]], [195]], matchfuzzypos(['xabx_ab'], 'ab')) + call assert_equal([['xabx_ab'], [[5, 6]], [1775]], matchfuzzypos(['xabx_ab'], 'ab')) " preference for leading letter match - call assert_equal([['abcxabc'], [[0, 1]], [200]], matchfuzzypos(['abcxabc'], 'ab')) + call assert_equal([['abcxabc'], [[0, 1]], [1875]], matchfuzzypos(['abcxabc'], 'ab')) " preference for sequential match - call assert_equal([['aobncedone'], [[7, 8, 9]], [233]], matchfuzzypos(['aobncedone'], 'one')) + call assert_equal([['aobncedone'], [[7, 8, 9]], [1965]], matchfuzzypos(['aobncedone'], 'one')) " best recursive match - call assert_equal([['xoone'], [[2, 3, 4]], [243]], matchfuzzypos(['xoone'], 'one')) + call assert_equal([['xoone'], [[2, 3, 4]], [1990]], matchfuzzypos(['xoone'], 'one')) " match multiple words (separated by space) - call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) + call assert_equal([['foo bar baz'], [[8, 9, 10, 0, 1, 2]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo')) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('baz foo', {'matchseq': 1})) - call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [519]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) - call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 5, 10]], [476]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) + call assert_equal([['foo bar baz'], [[0, 1, 2, 8, 9, 10]], [5620]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz')) + call assert_equal([['foo bar baz'], [[0, 1, 2, 3, 4, 9, 10]], [6660]], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('foo baz', {'matchseq': 1})) call assert_equal([[], [], []], ['foo bar baz', 'foo', 'foo bar', 'baz bar']->matchfuzzypos('one two')) call assert_equal([[], [], []], ['foo bar']->matchfuzzypos(" ")) - call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [1057]], ['grace']->matchfuzzypos('race ace grace')) + call assert_equal([['grace'], [[1, 2, 3, 4, 2, 3, 4, 0, 1, 2, 3, 4]], [2147483647]], ['grace']->matchfuzzypos('race ace grace')) let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]], \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) - call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [267]], + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]], [2885]], \ matchfuzzypos(l, 'cam', {'key' : 'val'})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) call assert_equal([[], [], []], matchfuzzypos(l, 'day', {'key' : 'val'})) @@ -164,30 +171,17 @@ func Test_matchfuzzypos() call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') " case match - call assert_equal([['Match', 'match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['match', 'Match'], 'Ma')) - call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [202, 177]], matchfuzzypos(['Match', 'match'], 'ma')) - " CamelCase has high weight even case match - call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['mytestcase', 'MyTestCase'], 'mtc')) - call assert_equal(['MyTestCase', 'mytestcase'], matchfuzzy(['MyTestCase', 'mytestcase'], 'mtc')) - call assert_equal(['MyTest', 'Mytest', 'mytest', ],matchfuzzy(['Mytest', 'mytest', 'MyTest'], 'MyT')) - call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], - \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelCase')) - call assert_equal(['CamelCaseMatchIngAlg', 'camelCaseMatchingAlg', 'camelcasematchingalg'], - \ matchfuzzy(['CamelCaseMatchIngAlg', 'camelcasematchingalg', 'camelCaseMatchingAlg'], 'CamelcaseM')) + call assert_equal([['Match'], [[0, 1]], [1885]], matchfuzzypos(['match', 'Match'], 'Ma')) + call assert_equal([['match', 'Match'], [[0, 1], [0, 1]], [1885, 1885]], matchfuzzypos(['Match', 'match'], 'ma')) let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') " camelcase - call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]], + call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [2885, 2870, 2865, 2680, 2670, 2655, 2655]], \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur')) - call assert_equal([['lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'Cursor', 'CurSearch', 'CursorLine'], [[1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8], [0, 1, 2], [0, 1, 2], [0, 1, 2]], [318, 311, 308, 303, 267, 264, 263]], - \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:true})) - call assert_equal([['Cursor', 'CurSearch', 'CursorLine', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor'], [[0, 1, 2], [0, 1, 2], [0, 1, 2], [1, 2, 3], [2, 3, 4], [2, 3, 4], [6, 7, 8]], [267, 264, 263, 246, 239, 236, 231]], - \ matchfuzzypos(['Cursor', 'lCursor', 'shCurlyIn', 'shCurlyError', 'TracesCursor', 'CurSearch', 'CursorLine'], 'Cur', {"camelcase": v:false})) - call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 2, 7]], [333, 287, 279]], - \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin', {'camelcase': v:false})) - call assert_fails("let x = matchfuzzypos([], 'foo', {'camelcase': []})", 'E475: Invalid value for argument camelcase') + call assert_equal([['things', 'sThings', 'thisThings'], [[0, 1, 2, 3], [1, 2, 3, 4], [0, 1, 6, 7]], [3890, 3685, 3670]], + \ matchfuzzypos(['things','sThings', 'thisThings'], 'thin')) endfunc " Test for matchfuzzy() with multibyte characters @@ -205,9 +199,14 @@ func Test_matchfuzzy_mbyte() call assert_equal(['세 마리의 작은 돼지'], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('돼지 마리의')) call assert_equal([], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzy('파란 하늘')) - " preference for camel case match + " camel case match + call assert_equal(['oneąwo', 'oneĄwo'], + \ ['oneĄwo', 'oneąwo']->matchfuzzy('oneąwo')) + call assert_equal(['oneĄwo'], + \ ['oneĄwo', 'oneąwo']->matchfuzzy('Ąwo')) + " prefer camelcase when searched first character matches upper case call assert_equal(['oneĄwo', 'oneąwo'], - \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) + \ ['oneĄwo', 'oneąwo']->matchfuzzy('ąw')) " preference for complete match then match after separator (_ or space) call assert_equal(['ⅠⅡabㄟㄠ'] + sort(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']), \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡa_bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) @@ -231,41 +230,39 @@ endfunc " Test for matchfuzzypos() with multibyte characters func Test_matchfuzzypos_mbyte() CheckFeature multi_lang - call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [273]], + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]], [4890]], \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) - call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [88]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]], [-20]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) " reverse the order of characters call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) - call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [252, 143]], + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]], [2885, 670]], \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], - \ [[0, 1], [0, 1], [0, 1], [0, 2]], [176, 173, 170, 135]], + \ [[0, 1], [0, 1], [0, 1], [0, 2]], [1880, 1865, 1850, 890]], \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) - call assert_equal([['ααααααα'], [[0, 1, 2]], [216]], + call assert_equal([['ααααααα'], [[0, 1, 2]], [2880]], \ matchfuzzypos(['ααααααα'], 'ααα')) call assert_equal([[], [], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) call assert_equal(range(256), x[1][0]) - call assert_equal([[], [], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) + call assert_equal([repeat('✓', 300)], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))[0]) " match multiple words (separated by space) - call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [328]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) + call assert_equal([['세 마리의 작은 돼지'], [[9, 10, 2, 3, 4]], [4515]], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('돼지 마리의')) call assert_equal([[], [], []], ['세 마리의 작은 돼지', '마리의', '마리의 작은', '작은 돼지']->matchfuzzypos('파란 하늘')) " match in a long string - call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [105]], + call assert_equal([[repeat('ぶ', 300) .. 'ẼẼẼ'], [[300, 301, 302]], [500]], \ matchfuzzypos([repeat('ぶ', 300) .. 'ẼẼẼ'], 'ẼẼẼ')) - " preference for camel case match - call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]], [219]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) " preference for match after a separator (_ or space) - call assert_equal([['xちだx_ちだ'], [[5, 6]], [145]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) + call assert_equal([['xちだx_ちだ'], [[5, 6]], [1775]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) " preference for leading letter match - call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [150]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]], [1875]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) " preference for sequential match - call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [158]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]], [1965]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) " best recursive match - call assert_equal([['xффйд'], [[2, 3, 4]], [168]], matchfuzzypos(['xффйд'], 'фйд')) + call assert_equal([['xффйд'], [[2, 3, 4]], [1990]], matchfuzzypos(['xффйд'], 'фйд')) endfunc " Test for matchfuzzy() with limit diff --git a/src/version.c b/src/version.c index 071eca3f0..87550542e 100644 --- a/src/version.c +++ b/src/version.c @@ -719,6 +719,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 1627, /**/ 1626, /**/ diff --git a/src/vim.h b/src/vim.h index 2c42c2cdd..79d3add8a 100644 --- a/src/vim.h +++ b/src/vim.h @@ -3036,8 +3036,9 @@ long elapsed(DWORD start_tick); #define EVAL_VAR_IMPORT 4 // may return special variable for import #define EVAL_VAR_NO_FUNC 8 // do not look for a function -// Maximum number of characters that can be fuzzy matched -#define MAX_FUZZY_MATCHES 256 +// Fuzzy matching +#define FUZZY_MATCH_MAX_LEN 1024 // max characters that can be matched +#define FUZZY_SCORE_NONE INT_MIN // invalid fuzzy score // flags for equal_type() #define ETYPE_ARG_UNKNOWN 1 -- -- You received this message from the "vim_dev" maillist. Do not top-post! Type your reply below the text you are replying to. For more information, visit http://www.vim.org/maillist.php --- You received this message because you are subscribed to the Google Groups "vim_dev" group. To unsubscribe from this group and stop receiving emails from it, send an email to vim_dev+unsubscr...@googlegroups.com. To view this discussion visit https://groups.google.com/d/msgid/vim_dev/E1ulvdF-00B6N9-1n%40256bit.org.