>From a8649c2724f6fc2bd921836803b63e5b0c3fc77b Mon Sep 17 00:00:00 2001
From: AndreyDobrovolskyOdessa <[email protected]>
Date: Mon, 28 Jun 2021 02:43:02 +0300
Subject: [PATCH 2/2] vi: allow regular expressions in ':s' commands

BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands.  Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.

The implementation:

- uses basic regular expressions, to match those used in the search
  command;

- only supports substitution of back references ('\0' - '\9') in the
  replacement string.  Any other character following a backslash is
  treated as that literal character.

VI_REGEX_SEARCH isn't enabled in the default build.  In that case:

function                                             old     new   delta
colon                                               4024    4021      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3)               Total: -3 bytes

When VI_REGEX_SEARCH is enabled:

function                                             old     new   delta
colon                                               4024    4306    +282
do_substitution                                        -     122    +122
.rodata                                           108207  108229     +22
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/0 up/down: 426/0)             Total: 426 bytes

Signed-off-by: Andrey Dobrovolsky <[email protected]>
Signed-off-by: Ron Yorston <[email protected]>
---
 editors/vi.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 97 insertions(+), 8 deletions(-)

diff --git a/editors/vi.c b/editors/vi.c
index 959362b25..eed51fbad 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2675,6 +2675,49 @@ static char *expand_args(char *args)
 # endif
 #endif /* FEATURE_VI_COLON */
 
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+# define MAX_SUBPATTERN 10     // subpatterns \0 .. \9
+
+// orig   - input string
+// regm   - array of subpatterns bounds
+// s      - replace pattern
+// result - buffer to place the substitution result
+static size_t do_substitution(const char *orig, regmatch_t *regm,
+                                       const char *s, char *result)
+{
+       const char *from;       // memcpy source pointer
+       size_t len, total_len = 0;
+       regmatch_t *cur_match;
+
+       while (*s) {
+               len = 1;        // default is to copy one char from replace 
pattern
+               from = s;
+               if (*s == '\\') {
+                       from = ++s;     // skip backslash
+                       if (*s >= '0' && *s < '0' + MAX_SUBPATTERN) {
+                               cur_match = regm + (*s - '0');
+                               if (cur_match->rm_so >= 0) {
+                                       len = cur_match->rm_eo - 
cur_match->rm_so;
+                                       from = orig + cur_match->rm_so;
+                               }
+                       }
+               }
+               total_len += len;
+               if (result) {
+                       memcpy(result, from, len);
+                       result += len;
+                       *result = '\0';
+               }
+               s++;
+       }
+
+       return total_len;
+}
+
+// do_substitution dry run
+# define get_substituted_size(x, y) do_substitution(NULL, x, y, NULL)
+#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
 // buf must be no longer than MAX_INPUT_LEN!
 static void colon(char *buf)
 {
@@ -3082,6 +3125,12 @@ static void colon(char *buf)
 #  if ENABLE_FEATURE_VI_VERBOSE_STATUS
                int last_line = 0, lines = 0;
 #  endif
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+               regex_t preg;
+               int cflags;
+               regmatch_t regmatch[MAX_SUBPATTERN];
+               char *Rorig;
+#  endif
 
                // F points to the "find" pattern
                // R points to the "replace" pattern
@@ -3098,7 +3147,6 @@ static void colon(char *buf)
                        *flags++ = '\0';        // terminate "replace"
                        gflag = *flags;
                }
-               len_R = strlen(R);
 
                if (len_F) {    // save "find" as last search pattern
                        free(last_search_pattern);
@@ -3120,18 +3168,61 @@ static void colon(char *buf)
                        b = e;
                }
 
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+               Rorig = R;
+               cflags = 0;
+               if (ignorecase)
+                       cflags = REG_ICASE;
+               memset(&preg, 0, sizeof(preg));
+               if (regcomp(&preg, F, cflags) != 0) {
+                       status_line(":s bad search pattern");
+                       goto regex_search_end;
+               }
+#  else
+               len_R = strlen(R);
+#  endif
+
                for (i = b; i <= e; i++) {      // so, :20,23 s \0 find \0 
replace \0
                        char *ls = q;           // orig line start
                        char *found;
  vc4:
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+                       found = NULL;
+                       regmatch[0].rm_so = 0;
+                       regmatch[0].rm_eo = end_line(q) - q;
+                       if (!regexec(&preg, q, MAX_SUBPATTERN, regmatch, 
REG_STARTEND)) {
+                               found = q + regmatch[0].rm_so;
+                               len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+                               len_R = get_substituted_size(regmatch, Rorig);
+                               R = xmalloc(len_R + 1);
+                               do_substitution(q, regmatch, Rorig, R);
+                       }
+#  else
                        found = char_search(q, F, (FORWARD << 1) | LIMITED);    
// search cur line only for "find"
+#  endif
                        if (found) {
                                uintptr_t bias;
                                // we found the "find" pattern - delete it
                                // For undo support, the first item should not 
be chained
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+                               if (len_F) {    // match can be empty, no 
delete needed
+                                       text_hole_delete(found, found + len_F - 
1,
+                                                       subs ? 
ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+                               }
+                               // insert the "replace" patern
+                               bias = string_insert(found, R,
+                                                       subs || len_F ? 
ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+                               free(R);
+#  else
                                text_hole_delete(found, found + len_F - 1,
                                                        subs ? 
ALLOW_UNDO_CHAIN: ALLOW_UNDO);
-                               // can't do this above, no undo => no third 
argument
+                               // insert the "replace" patern
+                               bias = string_insert(found, R, 
ALLOW_UNDO_CHAIN);
+#  endif
+                               found += bias;
+                               ls += bias;
+                               dot = ls;
+                               //q += bias; - recalculated anyway
                                subs++;
 #  if ENABLE_FEATURE_VI_VERBOSE_STATUS
                                if (last_line != i) {
@@ -3139,12 +3230,6 @@ static void colon(char *buf)
                                        ++lines;
                                }
 #  endif
-                               // insert the "replace" patern
-                               bias = string_insert(found, R, 
ALLOW_UNDO_CHAIN);
-                               found += bias;
-                               ls += bias;
-                               dot = ls;
-                               //q += bias; - recalculated anyway
                                // check for "global"  :s/foo/bar/g
                                if (gflag == 'g') {
                                        if ((found + len_R) < end_line(ls)) {
@@ -3164,6 +3249,10 @@ static void colon(char *buf)
                                status_line("%d substitutions on %d lines", 
subs, lines);
 #  endif
                }
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_search_end:
+               regfree(&preg);
+#  endif
 # endif /* FEATURE_VI_SEARCH */
        } else if (strncmp(cmd, "version", i) == 0) {  // show software version
                status_line(BB_VER);
-- 
2.31.1

_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to