not that good :'( # echo -n 'aba' | sed -E 's/a|$/x/g' xbx# #
2011/6/18 sven falempin <[email protected]> > Good for me: > > Index: process.c > =================================================================== > RCS file: /cvs/src/usr.bin/sed/process.c,v > retrieving revision 1.15 > diff -u -p -r1.15 process.c > --- process.c 27 Oct 2009 23:59:43 -0000 1.15 > +++ process.c 18 Jun 2011 21:04:39 -0000 > @@ -336,31 +336,18 @@ substitute(struct s_command *cp) > switch (n) { > case 0: /* Global */ > do { > - if (lastempty || match[0].rm_so != match[0].rm_eo) > { > - /* Locate start of replaced string. */ > - re_off = match[0].rm_so; > - /* Copy leading retained string. */ > - cspace(&SS, s, re_off, APPEND); > - /* Add in regular expression. */ > + if (match[0].rm_so) > + cspace(&SS, s, match[0].rm_so, APPEND); > + if (match[0].rm_so == match[0].rm_eo) { > regsub(&SS, s, cp->u.s->new); > - } > - > - /* Move past this match. */ > - if (match[0].rm_so != match[0].rm_eo) { > - s += match[0].rm_eo; > - slen -= match[0].rm_eo; > - lastempty = 0; > + cspace(&SS, s, match[0].rm_eo++, APPEND); > } else { > - if (match[0].rm_so == 0) > - cspace(&SS, s, match[0].rm_so + 1, > - APPEND); > - else > - cspace(&SS, s + match[0].rm_so, 1, > - APPEND); > - s += match[0].rm_so + 1; > - slen -= match[0].rm_so + 1; > - lastempty = 1; > + if (match[0].rm_so || match[0].rm_so != > match[0].rm_eo) > + regsub(&SS, s, cp->u.s->new); > } > + /* Move past this match. */ > + s += match[0].rm_eo; > + slen -= match[0].rm_eo; > } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, > slen)); > /* Copy trailing retained string. */ > if (slen > 0) > > > 2011/6/18 sven falempin <[email protected]> > >> Hello, >> >> Well, i failed to apply your second patch :s (patch ./process.c >> ./yourdiff) >> doing it manually (twice), it pass test but does not correct the initial >> problem. >> # echo 'aba' | sed -E 's/a|$/x/g' >> xbx >> >> Maybe i'm wrong ? >> >> Nevertheless, my attempt is not that much better, it destroy the leading >> \n, i ran the regression test this time. >> >> # echo 'aba' | sed -E 's/a|$/x/g' >> xbxx# >> # >> >> At least, it explain the purpose of lastempty ? >> >> Regards. >> >> Index: process.c >> =================================================================== >> RCS file: /cvs/src/usr.bin/sed/process.c,v >> retrieving revision 1.15 >> diff -u -p -r1.15 process.c >> --- process.c 27 Oct 2009 23:59:43 -0000 1.15 >> +++ process.c 18 Jun 2011 20:50:28 -0000 >> @@ -336,31 +336,15 @@ substitute(struct s_command *cp) >> switch (n) { >> case 0: /* Global */ >> do { >> - if (lastempty || match[0].rm_so != match[0].rm_eo) >> { >> - /* Locate start of replaced string. */ >> - re_off = match[0].rm_so; >> - /* Copy leading retained string. */ >> - cspace(&SS, s, re_off, APPEND); >> - /* Add in regular expression. */ >> + if (match[0].rm_so) >> + cspace(&SS, s, match[0].rm_so, APPEND); >> + if (match[0].rm_so == match[0].rm_eo) >> + cspace(&SS, s, match[0].rm_eo++, APPEND); >> + if (match[0].rm_so || match[0].rm_so != >> match[0].rm_eo) >> regsub(&SS, s, cp->u.s->new); >> - } >> - >> /* Move past this match. */ >> - if (match[0].rm_so != match[0].rm_eo) { >> - s += match[0].rm_eo; >> - slen -= match[0].rm_eo; >> - lastempty = 0; >> - } else { >> - if (match[0].rm_so == 0) >> - cspace(&SS, s, match[0].rm_so + 1, >> - APPEND); >> - else >> - cspace(&SS, s + match[0].rm_so, 1, >> - APPEND); >> - s += match[0].rm_so + 1; >> - slen -= match[0].rm_so + 1; >> - lastempty = 1; >> - } >> + s += match[0].rm_eo; >> + slen -= match[0].rm_eo; >> } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, >> slen)); >> /* Copy trailing retained string. */ >> if (slen > 0) >> >> >> >> 2011/6/18 Ingo Schwarze <[email protected]> >> >>> Hi Otto, >>> >>> from careful code inspection, your patch looks correct. >>> >>> Replacing the ">" by ">=" is wrong, it would breaks this test case: >>> >>> echo abc | ./obj/sed -E 's/(b|())/x/g' >>> >>> However, the code is hard to follow, and i suggest the following >>> refactoring such that people doing a review can more easily check >>> the correctness of the code after application of the patch: >>> >>> 1) X || Y || (!Y && Z) <=> X || Y || Z >>> so leave out the middle + line of your patch. >>> >>> 2) In case of match[0].rm_so == 0, >>> the cspace in the if block you touch has no effect >>> because copying zero bytes would be pointless. >>> Thus, we can pull the cspace before the main if, >>> optionally with an if (match[0].rm_so). >>> That's also much easier to understand: >>> *Always* copy the leading retained string, if any. >>> >>> 3) In the else clause of "Move past this match", >>> the if is completely pointless. >>> For vanishing rm_so, both clauses do the same thing. >>> >>> 4) Pull out the update of s and slen at the end. >>> In the if checking whether the current match is empty, >>> increment rm_eo exactly at the same time as copying the >>> additional character. >>> >>> The resulting code is much shorter and simpler, >>> and hence much easier to audit for correctness. >>> >>> However, i'm a bit scared about touching this code. >>> Breaking this would be really bad. >>> I'm currently running a make build with it. >>> >>> Yours, >>> Ingo >>> >>> >>> > Index: process.c >>> > =================================================================== >>> > RCS file: /cvs/src/usr.bin/sed/process.c,v >>> > retrieving revision 1.15 >>> > diff -u -p -r1.15 process.c >>> > --- process.c 27 Oct 2009 23:59:43 -0000 1.15 >>> > +++ process.c 15 Jun 2011 06:31:08 -0000 >>> > @@ -336,7 +336,9 @@ substitute(struct s_command *cp) >>> > switch (n) { >>> > case 0: /* Global */ >>> > do { >>> > - if (lastempty || match[0].rm_so != >>> match[0].rm_eo) { >>> > + if (lastempty || match[0].rm_so != match[0].rm_eo >>> || >>> > + (match[0].rm_so == match[0].rm_eo && >>> > + match[0].rm_so > 0)) { >>> > /* Locate start of replaced string. */ >>> > re_off = match[0].rm_so; >>> > /* Copy leading retained string. */ >>> >>> >>> Index: process.c >>> =================================================================== >>> RCS file: /cvs/src/usr.bin/sed/process.c,v >>> retrieving revision 1.15 >>> diff -u -p -r1.15 process.c >>> --- process.c 27 Oct 2009 23:59:43 -0000 1.15 >>> +++ process.c 18 Jun 2011 18:23:00 -0000 >>> @@ -336,31 +336,25 @@ substitute(struct s_command *cp) >>> switch (n) { >>> case 0: /* Global */ >>> do { >>> - if (lastempty || match[0].rm_so != >>> match[0].rm_eo) { >>> - /* Locate start of replaced string. */ >>> - re_off = match[0].rm_so; >>> - /* Copy leading retained string. */ >>> - cspace(&SS, s, re_off, APPEND); >>> - /* Add in regular expression. */ >>> + /* Copy leading retained string. */ >>> + if (match[0].rm_so) >>> + cspace(&SS, s, match[0].rm_so, APPEND); >>> + >>> + /* Append replacement. */ >>> + if (lastempty || match[0].rm_so || >>> + match[0].rm_so != match[0].rm_eo) >>> regsub(&SS, s, cp->u.s->new); >>> - } >>> >>> - /* Move past this match. */ >>> - if (match[0].rm_so != match[0].rm_eo) { >>> - s += match[0].rm_eo; >>> - slen -= match[0].rm_eo; >>> - lastempty = 0; >>> - } else { >>> - if (match[0].rm_so == 0) >>> - cspace(&SS, s, match[0].rm_so + >>> 1, >>> - APPEND); >>> - else >>> - cspace(&SS, s + match[0].rm_so, >>> 1, >>> - APPEND); >>> - s += match[0].rm_so + 1; >>> - slen -= match[0].rm_so + 1; >>> + if (match[0].rm_so == match[0].rm_eo) { >>> + cspace(&SS, s + match[0].rm_eo++, 1, >>> APPEND); >>> lastempty = 1; >>> - } >>> + } else >>> + lastempty = 0; >>> + >>> + /* Move past this match. */ >>> + s += match[0].rm_eo; >>> + slen -= match[0].rm_eo; >>> + >>> } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, >>> slen)); >>> /* Copy trailing retained string. */ >>> if (slen > 0) >>> >>> >> >> >> -- >> >> --------------------------------------------------------------------------------------------------------------------- >> () ascii ribbon campaign - against html e-mail >> /\ >> >> > > > -- > > --------------------------------------------------------------------------------------------------------------------- > () ascii ribbon campaign - against html e-mail > /\ > > -- --------------------------------------------------------------------------------------------------------------------- () ascii ribbon campaign - against html e-mail /\

