Hi Bram,
when working with csv-files, I have often wished for more than 10
capturing groups. So here is a patch, that allows 99 capturing groups in
the replacement part. This uses the perl-like syntax ${1} until ${99}
for the capturing groups in the replacement part. If there does not
exist a capturing group, this will resolve to the empty string.
I have tested it locally and it works for me[1]. This obviously needs to
be very well tested, before included.
[1] running make test as well as trying several different :s commands.
regards,
Christian
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
diff --git a/runtime/doc/pattern.txt b/runtime/doc/pattern.txt
--- a/runtime/doc/pattern.txt
+++ b/runtime/doc/pattern.txt
@@ -960,6 +960,16 @@
in the pattern (going left to right), NOT based on what is matched
first.
+${1} In the replacement part, replace with the first sub- */${}*
+ expression, that was matched with \( and \).
+ ...
+${99} In the replacement part, replace with the ${99} sub-expression
+ Note: If there are no 99 subexpressions, ${99} will replace
+ the match with an empty string.
+ Note also, that the numbering of groups is done based on which
+ "\(" comes first in the pattern (going left to right), NOT based
+ on what is matched first.
+
\%(\) A pattern enclosed by escaped parentheses. */\%(\)* */\%(* *E53*
Just like \(\), but without counting it as a sub-expression. This
allows using more groups and it's a little bit faster.
diff --git a/src/regexp.c b/src/regexp.c
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -1116,8 +1116,10 @@
else if ((OP(scan) == BOW
|| OP(scan) == EOW
|| OP(scan) == NOTHING
- || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
- || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
+ || (OP(scan) == MOPEN && OPERAND_MIN(scan) == 0)
+ || OP(scan) == NOPEN
+ || (OP(scan) == MCLOSE && OPERAND_MIN(scan) == 0)
+ || OP(scan) == NCLOSE)
&& OP(regnext(scan)) == EXACTLY)
{
#ifdef FEAT_MBYTE
@@ -1245,7 +1247,11 @@
EMSG_M_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
parno = regnpar;
++regnpar;
- ret = regnode(MOPEN + parno);
+ ret = regnode(MOPEN);
+ if (ret == JUST_CALC_SIZE)
+ regsize += 4;
+ else
+ regcode = re_put_long(regcode,parno);
}
else if (paren == REG_NPAREN)
{
@@ -1286,8 +1292,12 @@
#ifdef FEAT_SYN_HL
paren == REG_ZPAREN ? ZCLOSE + parno :
#endif
- paren == REG_PAREN ? MCLOSE + parno :
+ paren == REG_PAREN ? MCLOSE :
paren == REG_NPAREN ? NCLOSE : END);
+ if (ender == JUST_CALC_SIZE && paren == REG_PAREN)
+ regsize += 4;
+ else if (paren == REG_PAREN)
+ regcode = re_put_long(regcode, parno);
regtail(ret, ender);
/* Hook the tails of the branches to the closing node. */
@@ -1794,9 +1804,19 @@
case Magic('8'):
case Magic('9'):
{
- int refnum;
+ int refnum;
+ int refnum2 = -1;
+ int d = getchr();
+ if ( '0' <= d && d <= '9')
+ {
+ refnum2 = d - Magic('0');
+ }
+ else
+ ungetchr();
refnum = c - Magic('0');
+ if (refnum2 >= 0 && refnum2 <= 9)
+ refnum = refnum * 10 + refnum2;
/*
* Check if the back reference is legal. We must have seen the
* close brace.
@@ -1815,7 +1835,11 @@
if (*p == NUL)
EMSG_RET_NULL(_("E65: Illegal back reference"));
}
- ret = regnode(BACKREF + refnum);
+ ret = regnode(BACKREF);
+ if (ret == JUST_CALC_SIZE)
+ regsize += 4;
+ else
+ regcode = re_put_long(regcode, refnum);
}
break;
@@ -1851,10 +1875,18 @@
break;
#endif
- case 's': ret = regnode(MOPEN + 0);
+ case 's': ret = regnode(MOPEN);
+ if (ret == JUST_CALC_SIZE)
+ regsize += 4;
+ else
+ regcode = re_put_long(regcode,0);
break;
- case 'e': ret = regnode(MCLOSE + 0);
+ case 'e': ret = regnode(MCLOSE);
+ if (ret == JUST_CALC_SIZE)
+ regsize += 4;
+ else
+ regcode = re_put_long(regcode,0);
break;
default: EMSG_RET_NULL(_("E68: Invalid character after \\z"));
@@ -4456,18 +4488,9 @@
}
break;
- case MOPEN + 0: /* Match start: \zs */
- case MOPEN + 1: /* \( */
- case MOPEN + 2:
- case MOPEN + 3:
- case MOPEN + 4:
- case MOPEN + 5:
- case MOPEN + 6:
- case MOPEN + 7:
- case MOPEN + 8:
- case MOPEN + 9:
+ case MOPEN: /* Match start: \zs, \( */
{
- no = op - MOPEN;
+ no = OPERAND_MIN(scan);
cleanup_subexpr();
rp = regstack_push(RS_MOPEN, scan);
if (rp == NULL)
@@ -4516,18 +4539,9 @@
break;
#endif
- case MCLOSE + 0: /* Match end: \ze */
- case MCLOSE + 1: /* \) */
- case MCLOSE + 2:
- case MCLOSE + 3:
- case MCLOSE + 4:
- case MCLOSE + 5:
- case MCLOSE + 6:
- case MCLOSE + 7:
- case MCLOSE + 8:
- case MCLOSE + 9:
+ case MCLOSE: /* Match end: \ze \) */
{
- no = op - MCLOSE;
+ no = OPERAND_MIN(scan);
cleanup_subexpr();
rp = regstack_push(RS_MCLOSE, scan);
if (rp == NULL)
@@ -4568,22 +4582,14 @@
break;
#endif
- case BACKREF + 1:
- case BACKREF + 2:
- case BACKREF + 3:
- case BACKREF + 4:
- case BACKREF + 5:
- case BACKREF + 6:
- case BACKREF + 7:
- case BACKREF + 8:
- case BACKREF + 9:
+ case BACKREF:
{
int len;
linenr_T clnum;
colnr_T ccol;
char_u *p;
- no = op - BACKREF;
+ no = OPERAND_MIN(scan);
cleanup_subexpr();
if (!REG_MULTI) /* Single-line regexp */
{
@@ -7055,6 +7061,19 @@
{
if (c == '&' && magic)
no = 0;
+ else if (c == '$' && *src != NUL && *src == '{')
+ {
+ int t=0;
+ src++;
+ while ( '0' <= *src && *src <= '9' && *src != NUL)
+ {
+ t = 10*t + *src - '0';
+ ++src;
+ }
+ if (*src == '}')
+ ++src;
+ no=t;
+ }
else if (c == '\\' && *src != NUL)
{
if (*src == '&' && !magic)
diff --git a/src/regexp.h b/src/regexp.h
--- a/src/regexp.h
+++ b/src/regexp.h
@@ -19,7 +19,7 @@
* The second one (index 1) is the first sub-match, referenced with "\1".
* This goes up to the tenth (index 9), referenced with "\9".
*/
-#define NSUBEXP 10
+#define NSUBEXP 100
/*
* Structure returned by vim_regcomp() to pass on to vim_regexec().