it appears that sed won't handle an input line longer than 8k characters.
yet no diagnostic is printed. and this is not mentioned in the
man page.
example
for(i in `{seq 3000 3849}) x = ($x IMG_$i.JPG)
echo $x | sed 's/IMG_([0-9]+)\.JPG:\1:g'
no error is printed, yet the output is too short and the tail
is corrupted (... 3565 IM)
i added a diagnostic for input line too long.
in trying to test this with
cat fileof8192is | sed 's/i/j/g'
i found that a single long line can result in at least
1 "sed: Output line too long." per character of the input
line. i also addressed this problem. just one output
too long message is printed per line.
the diff would be shorter if i weren't paranoid
that the output buffer could lose its null terminator.
i couldn't provoke that situation so perhaps i just haven't
read the source carefully enough.
- erik
/n/dump/2010/0315/sys/src/cmd/sed.c:161,166 - sed.c:161,167
Rune bad; /* Dummy err ptr reference */
Rune *badp = &bad;
+ int tlwarn; /* during sub: have warned too long */
char CGMES[] = "%S command garbled: %S";
char TMMES[] = "Too much text: %S";
/n/dump/2010/0315/sys/src/cmd/sed.c:197,202 - sed.c:198,204
char *text(char *);
Rune *stext(Rune *, Rune *);
int ycomp(SedCom *);
+ void toolong(void);
char * trans(int c);
void putline(Biobuf *bp, Rune *buf, int n);
void ebputc(Biobufhdr*, int);
/n/dump/2010/0315/sys/src/cmd/sed.c:697,705 - sed.c:699,708
int
rline(Rune *buf, Rune *end)
{
- long c;
+ long c, w;
Rune r;
+ w = 0;
while ((c = getrune()) >= 0) {
r = c;
if (r == '\\') {
/n/dump/2010/0315/sys/src/cmd/sed.c:714,719 - sed.c:717,726
}
if (buf <= end)
*buf++ = r;
+ else if(w == 0){
+ fprint(2, "sed: Input line too long.\n");
+ w = 1;
+ }
}
*buf = '\0';
return -1;
/n/dump/2010/0315/sys/src/cmd/sed.c:1022,1027 - sed.c:1029,1035
* bump to the character after a 0-length match to keep from looping.
*/
sflag = 1;
+ tlwarn = 0;
if(ipc->gfl == 0) /* single substitution */
dosub(ipc->rhs);
else
/n/dump/2010/0315/sys/src/cmd/sed.c:1065,1083 - sed.c:1073,1096
errexit();
}
}
- *sp++ = c;
- if (sp >= &genbuf[LBSIZE])
- fprint(2, "sed: Output line too long.\n");
+ if(sp < &genbuf[LBSIZE]){
+ *sp++ = c;
+ if (sp >= &genbuf[LBSIZE])
+ toolong();
+ }
}
lp = loc2;
loc2 = sp - genbuf + linebuf;
while (*sp++ = *lp++)
- if (sp >= &genbuf[LBSIZE])
- fprint(2, "sed: Output line too long.\n");
+ if (sp >= &genbuf[LBSIZE]){
+ toolong();
+ break;
+ }
lp = linebuf;
sp = genbuf;
while (*lp++ = *sp++)
- ;
+ if (sp >= &genbuf[LBSIZE])
+ break;
spend = lp - 1;
}
/n/dump/2010/0315/sys/src/cmd/sed.c:1086,1097 - sed.c:1099,1120
{
while (l1 < l2) {
*sp++ = *l1++;
- if (sp >= &genbuf[LBSIZE])
- fprint(2, "sed: Output line too long.\n");
+ if (sp >= &genbuf[LBSIZE]){
+ toolong();
+ break;
+ }
}
return sp;
}
+ void
+ toolong(void)
+ {
+ if(tlwarn == 0)
+ fprint(2, "sed: Output line too long.\n");
+ tlwarn = 1;
+ }
+
char *
trans(int c)
{
/n/dump/2010/0315/sys/src/cmd/sed.c:1408,1414 - sed.c:1431,1437
Rune *
gline(Rune *addr)
{
- long c;
+ long c, w;
Rune *p;
static long peekc = 0;
/n/dump/2010/0315/sys/src/cmd/sed.c:1417,1422 - sed.c:1440,1446
sflag = 0;
lnum++;
/* Bflush(&fout);********* dumped 4/30/92 - bobf****/
+ w = 0;
do {
p = addr;
for (c = (peekc? peekc: Bgetrune(f)); c >= 0; c = Bgetrune(f)) {
/n/dump/2010/0315/sys/src/cmd/sed.c:1426,1433 - sed.c:1450,1463
*p = '\0';
return p;
}
- if (c && p < lbend)
- *p++ = c;
+ if (c) {
+ if (p < lbend)
+ *p++ = c;
+ else if(w == 0) {
+ w = 1;
+ fprint(2, "sed: Input line too
long.\n");
+ }
+ }
}
/* return partial final line, adding implicit newline */
if(p != addr) {