Hello,
Well, i failed to apply your second patch :s (patch ./process.c ./yourdiff)
doing it manually (twice), it pass test but does not correct the initial
problem.
# echo 'aba' | sed -E 's/a|$/x/g'
xbx
Maybe i'm wrong ?
Nevertheless, my attempt is not that much better, it destroy the leading \n,
i ran the regression test this time.
# echo 'aba' | sed -E 's/a|$/x/g'
xbxx#
#
At least, it explain the purpose of lastempty ?
Regards.
Index: process.c
===================================================================
RCS file: /cvs/src/usr.bin/sed/process.c,v
retrieving revision 1.15
diff -u -p -r1.15 process.c
--- process.c 27 Oct 2009 23:59:43 -0000 1.15
+++ process.c 18 Jun 2011 20:50:28 -0000
@@ -336,31 +336,15 @@ substitute(struct s_command *cp)
switch (n) {
case 0: /* Global */
do {
- if (lastempty || match[0].rm_so != match[0].rm_eo) {
- /* Locate start of replaced string. */
- re_off = match[0].rm_so;
- /* Copy leading retained string. */
- cspace(&SS, s, re_off, APPEND);
- /* Add in regular expression. */
+ if (match[0].rm_so)
+ cspace(&SS, s, match[0].rm_so, APPEND);
+ if (match[0].rm_so == match[0].rm_eo)
+ cspace(&SS, s, match[0].rm_eo++, APPEND);
+ if (match[0].rm_so || match[0].rm_so !=
match[0].rm_eo)
regsub(&SS, s, cp->u.s->new);
- }
-
/* Move past this match. */
- if (match[0].rm_so != match[0].rm_eo) {
- s += match[0].rm_eo;
- slen -= match[0].rm_eo;
- lastempty = 0;
- } else {
- if (match[0].rm_so == 0)
- cspace(&SS, s, match[0].rm_so + 1,
- APPEND);
- else
- cspace(&SS, s + match[0].rm_so, 1,
- APPEND);
- s += match[0].rm_so + 1;
- slen -= match[0].rm_so + 1;
- lastempty = 1;
- }
+ s += match[0].rm_eo;
+ slen -= match[0].rm_eo;
} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
/* Copy trailing retained string. */
if (slen > 0)
2011/6/18 Ingo Schwarze <[email protected]>
> Hi Otto,
>
> from careful code inspection, your patch looks correct.
>
> Replacing the ">" by ">=" is wrong, it would breaks this test case:
>
> echo abc | ./obj/sed -E 's/(b|())/x/g'
>
> However, the code is hard to follow, and i suggest the following
> refactoring such that people doing a review can more easily check
> the correctness of the code after application of the patch:
>
> 1) X || Y || (!Y && Z) <=> X || Y || Z
> so leave out the middle + line of your patch.
>
> 2) In case of match[0].rm_so == 0,
> the cspace in the if block you touch has no effect
> because copying zero bytes would be pointless.
> Thus, we can pull the cspace before the main if,
> optionally with an if (match[0].rm_so).
> That's also much easier to understand:
> *Always* copy the leading retained string, if any.
>
> 3) In the else clause of "Move past this match",
> the if is completely pointless.
> For vanishing rm_so, both clauses do the same thing.
>
> 4) Pull out the update of s and slen at the end.
> In the if checking whether the current match is empty,
> increment rm_eo exactly at the same time as copying the
> additional character.
>
> The resulting code is much shorter and simpler,
> and hence much easier to audit for correctness.
>
> However, i'm a bit scared about touching this code.
> Breaking this would be really bad.
> I'm currently running a make build with it.
>
> Yours,
> Ingo
>
>
> > Index: process.c
> > ===================================================================
> > RCS file: /cvs/src/usr.bin/sed/process.c,v
> > retrieving revision 1.15
> > diff -u -p -r1.15 process.c
> > --- process.c 27 Oct 2009 23:59:43 -0000 1.15
> > +++ process.c 15 Jun 2011 06:31:08 -0000
> > @@ -336,7 +336,9 @@ substitute(struct s_command *cp)
> > switch (n) {
> > case 0: /* Global */
> > do {
> > - if (lastempty || match[0].rm_so != match[0].rm_eo)
> {
> > + if (lastempty || match[0].rm_so != match[0].rm_eo
> ||
> > + (match[0].rm_so == match[0].rm_eo &&
> > + match[0].rm_so > 0)) {
> > /* Locate start of replaced string. */
> > re_off = match[0].rm_so;
> > /* Copy leading retained string. */
>
>
> Index: process.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/process.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 process.c
> --- process.c 27 Oct 2009 23:59:43 -0000 1.15
> +++ process.c 18 Jun 2011 18:23:00 -0000
> @@ -336,31 +336,25 @@ substitute(struct s_command *cp)
> switch (n) {
> case 0: /* Global */
> do {
> - if (lastempty || match[0].rm_so != match[0].rm_eo)
> {
> - /* Locate start of replaced string. */
> - re_off = match[0].rm_so;
> - /* Copy leading retained string. */
> - cspace(&SS, s, re_off, APPEND);
> - /* Add in regular expression. */
> + /* Copy leading retained string. */
> + if (match[0].rm_so)
> + cspace(&SS, s, match[0].rm_so, APPEND);
> +
> + /* Append replacement. */
> + if (lastempty || match[0].rm_so ||
> + match[0].rm_so != match[0].rm_eo)
> regsub(&SS, s, cp->u.s->new);
> - }
>
> - /* Move past this match. */
> - if (match[0].rm_so != match[0].rm_eo) {
> - s += match[0].rm_eo;
> - slen -= match[0].rm_eo;
> - lastempty = 0;
> - } else {
> - if (match[0].rm_so == 0)
> - cspace(&SS, s, match[0].rm_so + 1,
> - APPEND);
> - else
> - cspace(&SS, s + match[0].rm_so, 1,
> - APPEND);
> - s += match[0].rm_so + 1;
> - slen -= match[0].rm_so + 1;
> + if (match[0].rm_so == match[0].rm_eo) {
> + cspace(&SS, s + match[0].rm_eo++, 1,
> APPEND);
> lastempty = 1;
> - }
> + } else
> + lastempty = 0;
> +
> + /* Move past this match. */
> + s += match[0].rm_eo;
> + slen -= match[0].rm_eo;
> +
> } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
> /* Copy trailing retained string. */
> if (slen > 0)
>
>
--
---------------------------------------------------------------------------------------------------------------------
() ascii ribbon campaign - against html e-mail
/\