Hi
1) Vim is trying to allocate a negative number of bytes
when doing:
$ vim -u NONE \
-c 'call search("\\%[123456789012345678901234567]\\{1,30000}")'
It reports:
E342: Out of memory! (allocating 18446744071740188400 bytes)
(18446744071740188400 in hex is 0xFFFFFFFF8A9DE6F0)
It's admittedly an improbable regexp, but vim should not
allocate a negative number of bytes. It happens because
allocated size for states in nfa_regmatch() uses 'int'
instead of 'size_t' so beyond 2Gb, vim tries to allocate
a negative number of bytes.
Fixed in attached patch: "int-overflow-regexp_nfa.c-7.4.591.patch"
Perhaps new regexp engine should not be used when it creates
so many states because it's slow and uses lots of memory,
but I did not try to change that.
2) Since some regexp can allocate many nfa_thread_T states,
I looked whether we can save memory and noticed that
sizeof(nfa_thread_T) is quite big: 1360 bytes on x86_64.
Patch "memory-opt-regexp_nfa.c-7.4.591.patch" avoids
wasting padding bytes in:
struct multipos
{
lpos_T start;
lpos_T end;
} multi[NSUBEXP];
start.lnum is 64 bits (long) whereas start.col is 32 bits (int).
so 2x32 bits of padding are wasted inside the multipos struct.
We can avoid wasting 2x32 bits of padding by grouping start.lnum
and end.lnum next to each other using:
struct multipos
{
linenr_T start_lnum;
linenr_T end_lnum;
colnr_T start_col;
colnr_T end_col;
} multi[NSUBEXP];
This makes sizeof(nfa_thread_T)=1040 bytes instead of 1360 bytes.
Using the same case as earlier...
$ vim -u NONE \
-c 'call search("\\%[123456789012345678901234567]\\{1,30000}")'
Prior to patch, nfa_regmatch() was allocating:
1,710,003 states * 1360 bytes = 2,325,604,080 bytes.
After patch, it allocates:
1,710,003 states * 1040 bytes = 1,778,403,120 bytes
That saves 23.5% of memory for NFA states.
Regards
Dominique
--
--
You received this message from the "vim_dev" maillist.
Do not top-post! Type your reply below the text you are replying to.
For more information, visit http://www.vim.org/maillist.php
---
You received this message because you are subscribed to the Google Groups
"vim_dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
diff -r 3d2db5a7403f src/regexp_nfa.c
--- a/src/regexp_nfa.c Thu Jan 22 22:41:56 2015 +0100
+++ b/src/regexp_nfa.c Sun Jan 25 00:19:56 2015 +0100
@@ -5399,7 +5399,7 @@
regsubs_T *m;
{
int result;
- int size = 0;
+ size_t size = 0;
int flag = 0;
int go_to_nextline = FALSE;
nfa_thread_T *t;
diff -r 3d2db5a7403f src/regexp_nfa.c
--- a/src/regexp_nfa.c Thu Jan 22 22:41:56 2015 +0100
+++ b/src/regexp_nfa.c Sun Jan 25 00:42:39 2015 +0100
@@ -1453,7 +1453,7 @@
* matched an unlimited number of times. NFA_NOPEN is
* added only once at a position, while NFA_SPLIT is
* added multiple times. This is more efficient than
- * not allowsing NFA_SPLIT multiple times, it is used
+ * not allowing NFA_SPLIT multiple times, it is used
* a lot. */
EMIT(NFA_NOPEN);
break;
@@ -3717,8 +3717,10 @@
{
struct multipos
{
- lpos_T start;
- lpos_T end;
+ linenr_T start_lnum;
+ linenr_T end_lnum;
+ colnr_T start_col;
+ colnr_T end_col;
} multi[NSUBEXP];
struct linepos
{
@@ -3803,10 +3805,10 @@
if (REG_MULTI)
fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n",
j,
- sub->list.multi[j].start.col,
- (int)sub->list.multi[j].start.lnum,
- sub->list.multi[j].end.col,
- (int)sub->list.multi[j].end.lnum);
+ sub->list.multi[j].start_col,
+ (int)sub->list.multi[j].start_lnum,
+ sub->list.multi[j].end_col,
+ (int)sub->list.multi[j].end_lnum);
else
{
char *s = (char *)sub->list.line[j].start;
@@ -3943,8 +3945,11 @@
{
if (REG_MULTI)
{
- if (from->list.multi[0].end.lnum >= 0)
- to->list.multi[0].end = from->list.multi[0].end;
+ if (from->list.multi[0].end_lnum >= 0)
+ {
+ to->list.multi[0].end_lnum = from->list.multi[0].end_lnum;
+ to->list.multi[0].end_col = from->list.multi[0].end_col;
+ }
}
else
{
@@ -3976,33 +3981,33 @@
for (i = 0; i < todo; ++i)
{
if (i < sub1->in_use)
- s1 = sub1->list.multi[i].start.lnum;
+ s1 = sub1->list.multi[i].start_lnum;
else
s1 = -1;
if (i < sub2->in_use)
- s2 = sub2->list.multi[i].start.lnum;
+ s2 = sub2->list.multi[i].start_lnum;
else
s2 = -1;
if (s1 != s2)
return FALSE;
- if (s1 != -1 && sub1->list.multi[i].start.col
- != sub2->list.multi[i].start.col)
+ if (s1 != -1 && sub1->list.multi[i].start_col
+ != sub2->list.multi[i].start_col)
return FALSE;
if (nfa_has_backref)
{
if (i < sub1->in_use)
- s1 = sub1->list.multi[i].end.lnum;
+ s1 = sub1->list.multi[i].end_lnum;
else
s1 = -1;
if (i < sub2->in_use)
- s2 = sub2->list.multi[i].end.lnum;
+ s2 = sub2->list.multi[i].end_lnum;
else
s2 = -1;
if (s1 != s2)
return FALSE;
- if (s1 != -1 && sub1->list.multi[i].end.col
- != sub2->list.multi[i].end.col)
+ if (s1 != -1 && sub1->list.multi[i].end_col
+ != sub2->list.multi[i].end_col)
return FALSE;
}
}
@@ -4053,7 +4058,7 @@
if (sub->in_use <= 0)
col = -1;
else if (REG_MULTI)
- col = sub->list.multi[0].start.col;
+ col = sub->list.multi[0].start_col;
else
col = (int)(sub->list.line[0].start - regline);
nfa_set_code(state->c);
@@ -4473,7 +4478,8 @@
{
if (subidx < sub->in_use)
{
- save_lpos = sub->list.multi[subidx].start;
+ save_lpos.lnum = sub->list.multi[subidx].start_lnum;
+ save_lpos.col = sub->list.multi[subidx].start_col;
save_in_use = -1;
}
else
@@ -4481,20 +4487,20 @@
save_in_use = sub->in_use;
for (i = sub->in_use; i < subidx; ++i)
{
- sub->list.multi[i].start.lnum = -1;
- sub->list.multi[i].end.lnum = -1;
+ sub->list.multi[i].start_lnum = -1;
+ sub->list.multi[i].end_lnum = -1;
}
sub->in_use = subidx + 1;
}
if (off == -1)
{
- sub->list.multi[subidx].start.lnum = reglnum + 1;
- sub->list.multi[subidx].start.col = 0;
+ sub->list.multi[subidx].start_lnum = reglnum + 1;
+ sub->list.multi[subidx].start_col = 0;
}
else
{
- sub->list.multi[subidx].start.lnum = reglnum;
- sub->list.multi[subidx].start.col =
+ sub->list.multi[subidx].start_lnum = reglnum;
+ sub->list.multi[subidx].start_col =
(colnr_T)(reginput - regline + off);
}
}
@@ -4530,7 +4536,10 @@
if (save_in_use == -1)
{
if (REG_MULTI)
- sub->list.multi[subidx].start = save_lpos;
+ {
+ sub->list.multi[subidx].start_lnum = save_lpos.lnum;
+ sub->list.multi[subidx].start_col = save_lpos.col;
+ }
else
sub->list.line[subidx].start = save_ptr;
}
@@ -4540,7 +4549,7 @@
case NFA_MCLOSE:
if (nfa_has_zend && (REG_MULTI
- ? subs->norm.list.multi[0].end.lnum >= 0
+ ? subs->norm.list.multi[0].end_lnum >= 0
: subs->norm.list.line[0].end != NULL))
{
/* Do not overwrite the position set by \ze. */
@@ -4594,16 +4603,17 @@
sub->in_use = subidx + 1;
if (REG_MULTI)
{
- save_lpos = sub->list.multi[subidx].end;
+ save_lpos.lnum = sub->list.multi[subidx].end_lnum;
+ save_lpos.col = sub->list.multi[subidx].end_col;
if (off == -1)
{
- sub->list.multi[subidx].end.lnum = reglnum + 1;
- sub->list.multi[subidx].end.col = 0;
+ sub->list.multi[subidx].end_lnum = reglnum + 1;
+ sub->list.multi[subidx].end_col = 0;
}
else
{
- sub->list.multi[subidx].end.lnum = reglnum;
- sub->list.multi[subidx].end.col =
+ sub->list.multi[subidx].end_lnum = reglnum;
+ sub->list.multi[subidx].end_col =
(colnr_T)(reginput - regline + off);
}
/* avoid compiler warnings */
@@ -4628,7 +4638,10 @@
sub = &subs->norm;
if (REG_MULTI)
- sub->list.multi[subidx].end = save_lpos;
+ {
+ sub->list.multi[subidx].end_lnum = save_lpos.lnum;
+ sub->list.multi[subidx].end_col = save_lpos.col;
+ }
else
sub->list.line[subidx].end = save_ptr;
sub->in_use = save_in_use;
@@ -4816,15 +4829,15 @@
if (REG_MULTI)
{
- if (sub->list.multi[subidx].start.lnum < 0
- || sub->list.multi[subidx].end.lnum < 0)
+ if (sub->list.multi[subidx].start_lnum < 0
+ || sub->list.multi[subidx].end_lnum < 0)
goto retempty;
- if (sub->list.multi[subidx].start.lnum == reglnum
- && sub->list.multi[subidx].end.lnum == reglnum)
+ if (sub->list.multi[subidx].start_lnum == reglnum
+ && sub->list.multi[subidx].end_lnum == reglnum)
{
- len = sub->list.multi[subidx].end.col
- - sub->list.multi[subidx].start.col;
- if (cstrncmp(regline + sub->list.multi[subidx].start.col,
+ len = sub->list.multi[subidx].end_col
+ - sub->list.multi[subidx].start_col;
+ if (cstrncmp(regline + sub->list.multi[subidx].start_col,
reginput, &len) == 0)
{
*bytelen = len;
@@ -4834,10 +4847,10 @@
else
{
if (match_with_backref(
- sub->list.multi[subidx].start.lnum,
- sub->list.multi[subidx].start.col,
- sub->list.multi[subidx].end.lnum,
- sub->list.multi[subidx].end.col,
+ sub->list.multi[subidx].start_lnum,
+ sub->list.multi[subidx].start_col,
+ sub->list.multi[subidx].end_lnum,
+ sub->list.multi[subidx].end_col,
bytelen) == RA_MATCH)
return TRUE;
}
@@ -5399,7 +5412,7 @@
regsubs_T *m;
{
int result;
- int size = 0;
+ size_t size = 0;
int flag = 0;
int go_to_nextline = FALSE;
nfa_thread_T *t;
@@ -5432,6 +5445,7 @@
/* Allocate memory for the lists of nodes. */
size = (nstate + 1) * sizeof(nfa_thread_T);
+
list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
list[0].len = nstate + 1;
list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
@@ -5473,8 +5487,8 @@
{
if (REG_MULTI)
{
- m->norm.list.multi[0].start.lnum = reglnum;
- m->norm.list.multi[0].start.col = (colnr_T)(reginput - regline);
+ m->norm.list.multi[0].start_lnum = reglnum;
+ m->norm.list.multi[0].start_col = (colnr_T)(reginput - regline);
}
else
m->norm.list.line[0].start = reginput;
@@ -5571,7 +5585,7 @@
if (t->subs.norm.in_use <= 0)
col = -1;
else if (REG_MULTI)
- col = t->subs.norm.list.multi[0].start.col;
+ col = t->subs.norm.list.multi[0].start_col;
else
col = (int)(t->subs.norm.list.line[0].start - regline);
nfa_set_code(t->state->c);
@@ -5852,7 +5866,7 @@
* continue with what follows. */
if (REG_MULTI)
/* TODO: multi-line match */
- bytelen = m->norm.list.multi[0].end.col
+ bytelen = m->norm.list.multi[0].end_col
- (int)(reginput - regline);
else
bytelen = (int)(m->norm.list.line[0].end - reginput);
@@ -6732,7 +6746,7 @@
if (add)
{
if (REG_MULTI)
- m->norm.list.multi[0].start.col =
+ m->norm.list.multi[0].start_col =
(colnr_T)(reginput - regline) + clen;
else
m->norm.list.line[0].start = reginput + clen;
@@ -6845,8 +6859,11 @@
{
for (i = 0; i < subs.norm.in_use; i++)
{
- reg_startpos[i] = subs.norm.list.multi[i].start;
- reg_endpos[i] = subs.norm.list.multi[i].end;
+ reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
+ reg_startpos[i].col = subs.norm.list.multi[i].start_col;
+
+ reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
+ reg_endpos[i].col = subs.norm.list.multi[i].end_col;
}
if (reg_startpos[0].lnum < 0)
@@ -6894,13 +6911,13 @@
struct multipos *mpos = &subs.synt.list.multi[i];
/* Only accept single line matches that are valid. */
- if (mpos->start.lnum >= 0
- && mpos->start.lnum == mpos->end.lnum
- && mpos->end.col >= mpos->start.col)
+ if (mpos->start_lnum >= 0
+ && mpos->start_lnum == mpos->end_lnum
+ && mpos->end_col >= mpos->start_col)
re_extmatch_out->matches[i] =
- vim_strnsave(reg_getline(mpos->start.lnum)
- + mpos->start.col,
- mpos->end.col - mpos->start.col);
+ vim_strnsave(reg_getline(mpos->start_lnum)
+ + mpos->start_col,
+ mpos->end_col - mpos->start_col);
}
else
{