patch 9.1.0297: Patch 9.1.0296 causes too many issues Commit: https://github.com/vim/vim/commit/c97f4d61cde24030f2f7d2318e1b409a0ccc3e43 Author: Christian Brabandt <c...@256bit.org> Date: Wed Apr 10 16:18:15 2024 +0200
patch 9.1.0297: Patch 9.1.0296 causes too many issues Problem: Patch 9.1.0296 causes too many issues (Tony Mechelynck, @chdiza, CI) Solution: Back out the change for now Revert "patch 9.1.0296: regexp: engines do not handle case-folding well" This reverts commit 7a27c108e0509f3255ebdcb6558e896c223e4d23 it causes issues with syntax highlighting and breaks the FreeBSD and MacOS CI. It needs more work. fixes: #14487 Signed-off-by: Christian Brabandt <c...@256bit.org> diff --git a/src/mbyte.c b/src/mbyte.c index 3be75099f..d6fb7ecc7 100644 --- a/src/mbyte.c +++ b/src/mbyte.c @@ -3800,15 +3800,6 @@ utf_strnicmp( * Returns zero if s1 and s2 are equal (ignoring case), the difference between * two characters otherwise. */ - int -mb_strnicmp2(char_u *s1, char_u *s2, int n1, int n2) -{ - if (n1 == n2 || !enc_utf8) - return mb_strnicmp(s1, s2, n1); - else - return utf_strnicmp(s1, s2, n1, n2); -} - int mb_strnicmp(char_u *s1, char_u *s2, size_t nn) { diff --git a/src/proto/mbyte.pro b/src/proto/mbyte.pro index c49f7e707..7883b3b4c 100644 --- a/src/proto/mbyte.pro +++ b/src/proto/mbyte.pro @@ -48,7 +48,6 @@ int utf_islower(int a); int utf_tolower(int a); int utf_isupper(int a); int mb_strnicmp(char_u *s1, char_u *s2, size_t nn); -int mb_strnicmp2(char_u *s1, char_u *s2, int n1, int n2); void show_utf8(void); int latin_head_off(char_u *base, char_u *p); int dbcs_screen_head_off(char_u *base, char_u *p); diff --git a/src/regexp.c b/src/regexp.c index 4e85ebc29..4373ae0cf 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1606,9 +1606,7 @@ mb_decompose(int c, int *c1, int *c2, int *c3) /* * Compare two strings, ignore case if rex.reg_ic set. * Return 0 if strings match, non-zero otherwise. - * Correct the length "*n" when composing characters are ignored - * or for utf8 when both utf codepoints are considered equal because of - * case-folding but have different length (e.g. 's' and 'ſ') + * Correct the length "*n" when composing characters are ignored. */ static int cstrncmp(char_u *s1, char_u *s2, int *n) @@ -1617,13 +1615,6 @@ cstrncmp(char_u *s1, char_u *s2, int *n) if (!rex.reg_ic) result = STRNCMP(s1, s2, *n); - else if (enc_utf8) - { - int l2 = mb_ptr2len(s2); - result = MB_STRNICMP2(s1, s2, *n, l2); - if (result == 0 && l2 < *n) - *n = l2; - } else result = MB_STRNICMP(s1, s2, *n); diff --git a/src/regexp_bt.c b/src/regexp_bt.c index 2a03fec57..5d9450d87 100644 --- a/src/regexp_bt.c +++ b/src/regexp_bt.c @@ -3816,14 +3816,6 @@ regmatch( } } } - else if (enc_utf8) - { - if (cstrncmp(opnd, rex.input, &len) != 0) - { - status = RA_NOMATCH; - break; - } - } else for (i = 0; i < len; ++i) if (opnd[i] != rex.input[i]) diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c index 451720a09..5e4fadd02 100644 --- a/src/regexp_nfa.c +++ b/src/regexp_nfa.c @@ -5666,12 +5666,7 @@ find_match_text(colnr_T *startcol, int regstart, char_u *match_text) for (;;) { match = TRUE; - // skip regstart - len2 = MB_CHAR2LEN(regstart); - if (enc_utf8 && len2 > 1 && MB_CHAR2LEN(PTR2CHAR(rex.line + col)) != len2) - // because of case-folding of the previously matched text, we may need - // to skip fewer bytes than mb_char2len(regstart) - len2 = mb_char2len(utf_fold(regstart)); + len2 = MB_CHAR2LEN(regstart); // skip regstart for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1)) { c1 = PTR2CHAR(match_text + len1); @@ -7508,7 +7503,7 @@ nfa_regexec_both( // If match_text is set it contains the full text that must match. // Nothing else to try. Doesn't handle combining chars well. - if (prog->match_text != NULL && *prog->match_text != NUL && !rex.reg_icombine) + if (prog->match_text != NULL && !rex.reg_icombine) { retval = find_match_text(&col, prog->regstart, prog->match_text); if (REG_MULTI) diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim index 9980e5b7f..6669dee57 100644 --- a/src/testdir/test_regexp_utf8.vim +++ b/src/testdir/test_regexp_utf8.vim @@ -587,32 +587,4 @@ func Test_combining_chars_in_collection() bw! endfunc -func Test_search_multibyte_match_ascii() - new - " Match single 'ſ' and 's' - call setline(1, 'das abc heraus abc ſich abc ſind') - for i in range(0, 2) - exe "set re="..i - let ic_match = matchbufline('%', ' -- -- You received this message from the "vim_dev" maillist. Do not top-post! Type your reply below the text you are replying to. For more information, visit http://www.vim.org/maillist.php --- You received this message because you are subscribed to the Google Groups "vim_dev" group. To unsubscribe from this group and stop receiving emails from it, send an email to vim_dev+unsubscr...@googlegroups.com. To view this discussion on the web visit https://groups.google.com/d/msgid/vim_dev/E1ruYxk-002IiY-7p%40256bit.org.