Commit: patch 9.0.1777: patch 9.0.1771 causes problems

Christian Brabandt Sun, 20 Aug 2023 13:30:09 -0700

patch 9.0.1777: patch 9.0.1771 causes problems

Commit: 
https://github.com/vim/vim/commit/be07caa071ea93c07b1b2204a17237133f38b2bd
Author: Christian Brabandt <[email protected]>
Date:   Sun Aug 20 22:26:15 2023 +0200


    patch 9.0.1777: patch 9.0.1771 causes problems
    
    Problem:  patch 9.0.1771 causes problems
    Solution: revert it
    
    Revert "patch 9.0.1771: regex: combining chars in collections not handled"
    This reverts commit ca22fc36a4e8a315f199893ee8ff6253573f5fbe.
    
    Signed-off-by: Christian Brabandt <[email protected]>

diff --git a/src/regexp_bt.c b/src/regexp_bt.c
index 198946e0d..522cf37e2 100644
--- a/src/regexp_bt.c
+++ b/src/regexp_bt.c
@@ -3743,38 +3743,13 @@ regmatch(
 
          case ANYOF:
          case ANYBUT:
-           {
-               char_u  *q = OPERAND(scan);
-
-               if (c == NUL)
-                   status = RA_NOMATCH;
-               else if ((cstrchr(q, c) == NULL) == (op == ANYOF))
-                   status = RA_NOMATCH;
-               else
-               {
-                   // Check following combining characters
-                   int len = 0;
-                   int i;
-
-                   if (enc_utf8)
-                       len = utfc_ptr2len(q) - utf_ptr2len(q);
-
-                   MB_CPTR_ADV(rex.input);
-                   MB_CPTR_ADV(q);
-
-                   if (!enc_utf8 || len == 0)
-                       break;
-
-                   for (i = 0; i < len; ++i)
-                       if (q[i] != rex.input[i])
-                       {
-                           status = RA_NOMATCH;
-                           break;
-                       }
-                   rex.input += len;
-               }
-               break;
-           }
+           if (c == NUL)
+               status = RA_NOMATCH;
+           else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
+               status = RA_NOMATCH;
+           else
+               ADVANCE_REGINPUT();
+           break;
 
          case MULTIBYTECODE:
            if (has_mbyte)
diff --git a/src/regexp_nfa.c b/src/regexp_nfa.c
index 60cd29cf5..d724d527b 100644
--- a/src/regexp_nfa.c
+++ b/src/regexp_nfa.c
@@ -1764,7 +1764,6 @@ collection:
            endp = skip_anyof(p);
            if (*endp == ']')
            {
-               int plen;
                /*
                 * Try to reverse engineer character classes. For example,
                 * recognize that [0-9] stands for \d and [A-Za-z_] for \h,
@@ -2036,34 +2035,11 @@ collection:
                            if (got_coll_char == TRUE && startc == 0)
                                EMIT(0x0a);
                            else
-                           {
                                EMIT(startc);
-                               if (!(enc_utf8 && (utf_ptr2len(regparse) != 
(plen = utfc_ptr2len(regparse)))))
-                               {
-                                   EMIT(NFA_CONCAT);
-                               }
-                           }
-                       }
-                   }
-
-                   if (enc_utf8 && (utf_ptr2len(regparse) != (plen = 
utfc_ptr2len(regparse))))
-                   {
-                       int i = utf_ptr2len(regparse);
-
-                       c = utf_ptr2char(regparse + i);
-
-                       // Add composing characters
-                       for (;;)
-                       {
-                           EMIT(c);
                            EMIT(NFA_CONCAT);
-                           if ((i += utf_char2len(c)) >= plen)
-                               break;
-                           c = utf_ptr2char(regparse + i);
                        }
-                       EMIT(NFA_COMPOSING);
-                       EMIT(NFA_CONCAT);
                    }
+
                    MB_PTR_ADV(regparse);
                } // while (p < endp)
 
@@ -6442,84 +6418,6 @@ nfa_regmatch(
                result_if_matched = (t->state->c == NFA_START_COLL);
                for (;;)
                {
-                   if (state->c == NFA_COMPOSING)
-                   {
-                       int         mc = curc;
-                       int         len = 0;
-                       nfa_state_T *end;
-                       nfa_state_T *sta;
-                       int         cchars[MAX_MCO];
-                       int         ccount = 0;
-                       int         j;
-
-                       sta = t->state->out->out;
-                       len = 0;
-                       if (utf_iscomposing(sta->c))
-                       {
-                           // Only match composing character(s), ignore base
-                           // character.  Used for ".{composing}" and 
"{composing}"
-                           // (no preceding character).
-                           len += mb_char2len(mc);
-                       }
-                       if (rex.reg_icombine && len == 0)
-                       {
-                           // If \Z was present, then ignore composing 
characters.
-                           // When ignoring the base character this always 
matches.
-                           if (sta->c != curc)
-                               result = FAIL;
-                           else
-                               result = OK;
-                           while (sta->c != NFA_END_COMPOSING)
-                               sta = sta->out;
-                       }
-                       // Check base character matches first, unless ignored.
-                       else if (len > 0 || mc == sta->c)
-//                     if (len > 0 || mc == sta->c)
-                       {
-                           if (len == 0)
-                           {
-                               len += mb_char2len(mc);
-                               sta = sta->out;
-                           }
-
-                           // We don't care about the order of composing 
characters.
-                           // Get them into cchars[] first.
-                           while (len < clen)
-                           {
-                               mc = mb_ptr2char(rex.input + len);
-                               cchars[ccount++] = mc;
-                               len += mb_char2len(mc);
-                               if (ccount == MAX_MCO)
-                                   break;
-                           }
-
-                           // Check that each composing char in the pattern 
matches a
-                           // composing char in the text.  We do not check if 
all
-                           // composing chars are matched.
-                           result = OK;
-                           while (sta->c != NFA_END_COMPOSING)
-                           {
-                               for (j = 0; j < ccount; ++j)
-                                   if (cchars[j] == sta->c)
-                                       break;
-                               if (j == ccount)
-                               {
-                                   result = FAIL;
-                                   break;
-                               }
-                               sta = sta->out;
-                           }
-                       }
-                       else
-                           result = FAIL;
-
-                       if (t->state->out->out1->c == NFA_END_COMPOSING)
-                       {
-                           end = t->state->out->out1;
-                           ADD_STATE_IF_MATCH(end);
-                       }
-                       break;
-                   }
                    if (state->c == NFA_END_COLL)
                    {
                        result = !result_if_matched;
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
index 6669dee57..b591aedbb 100644
--- a/src/testdir/test_regexp_utf8.vim
+++ b/src/testdir/test_regexp_utf8.vim
@@ -575,16 +575,5 @@ func Test_match_too_complicated()
   set regexpengine=0
 endfunc
 
-func Test_combining_chars_in_collection()
-  new
-  for i in range(0,2)
-    exe "set re=".i
-    put =['ɔ̃', 'ɔ',  '̃  ã', 'abcd']
-    :%s/[ɔ̃]//
-    call assert_equal(['', '', 'ɔ', '̃  ã', 'abcd'], getline(1,'$'))
-    %d
-  endfor
-  bw!
-endfunc
 
 " vim: shiftwidth=2 sts=2 expandtab
diff --git a/src/version.c b/src/version.c
index 9a6f1622a..43626ab65 100644
--- a/src/version.c
+++ b/src/version.c
@@ -699,6 +699,8 @@ static char *(features[]) =
 
 static int included_patches[] =
 {   /* Add new patch number below this line */
+/**/
+    1777,
 /**/
     1776,
 /**/

-- 
-- 
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php

--- 
You received this message because you are subscribed to the Google Groups 
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/vim_dev/E1qXp3j-00AzbT-Gm%40256bit.org.

Commit: patch 9.0.1777: patch 9.0.1771 causes problems

Raspunde prin e-mail lui