not that good :'(
# echo -n 'aba' | sed -E 's/a|$/x/g'
xbx#
#

2011/6/18 sven falempin <[email protected]>

> Good for me:
>
> Index: process.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/process.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 process.c
> --- process.c   27 Oct 2009 23:59:43 -0000      1.15
> +++ process.c   18 Jun 2011 21:04:39 -0000
> @@ -336,31 +336,18 @@ substitute(struct s_command *cp)
>          switch (n) {
>         case 0:                                 /* Global */
>                 do {
> -                       if (lastempty || match[0].rm_so != match[0].rm_eo)
> {
> -                               /* Locate start of replaced string. */
> -                               re_off = match[0].rm_so;
> -                               /* Copy leading retained string. */
> -                               cspace(&SS, s, re_off, APPEND);
> -                               /* Add in regular expression. */
> +                       if (match[0].rm_so)
> +                               cspace(&SS, s, match[0].rm_so, APPEND);
> +                       if (match[0].rm_so == match[0].rm_eo) {
>                                 regsub(&SS, s, cp->u.s->new);
> -                       }
> -
> -                       /* Move past this match. */
> -                       if (match[0].rm_so != match[0].rm_eo) {
> -                               s += match[0].rm_eo;
> -                               slen -= match[0].rm_eo;
> -                               lastempty = 0;
> +                               cspace(&SS, s, match[0].rm_eo++, APPEND);
>                         } else {
> -                               if (match[0].rm_so == 0)
> -                                       cspace(&SS, s, match[0].rm_so + 1,
> -                                           APPEND);
> -                               else
> -                                       cspace(&SS, s + match[0].rm_so, 1,
> -                                           APPEND);
> -                               s += match[0].rm_so + 1;
> -                               slen -= match[0].rm_so + 1;
> -                               lastempty = 1;
> +                               if (match[0].rm_so || match[0].rm_so !=
> match[0].rm_eo)
> +                                       regsub(&SS, s, cp->u.s->new);
>                         }
> +                       /* Move past this match. */
> +                       s += match[0].rm_eo;
> +                       slen -= match[0].rm_eo;
>                 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0,
> slen));
>                 /* Copy trailing retained string. */
>                 if (slen > 0)
>
>
> 2011/6/18 sven falempin <[email protected]>
>
>> Hello,
>>
>> Well, i failed to apply your second patch :s (patch ./process.c
>> ./yourdiff)
>> doing it manually (twice), it pass test but does not correct the initial
>> problem.
>> # echo 'aba' | sed -E 's/a|$/x/g'
>> xbx
>>
>> Maybe i'm wrong ?
>>
>> Nevertheless, my attempt is not that much better, it destroy the leading
>> \n, i ran the regression test this time.
>>
>>  # echo 'aba' | sed -E 's/a|$/x/g'
>> xbxx#
>> #
>>
>> At least, it explain the purpose of lastempty ?
>>
>> Regards.
>>
>>  Index: process.c
>> ===================================================================
>> RCS file: /cvs/src/usr.bin/sed/process.c,v
>> retrieving revision 1.15
>> diff -u -p -r1.15 process.c
>> --- process.c   27 Oct 2009 23:59:43 -0000      1.15
>> +++ process.c   18 Jun 2011 20:50:28 -0000
>> @@ -336,31 +336,15 @@ substitute(struct s_command *cp)
>>         switch (n) {
>>         case 0:                                 /* Global */
>>                 do {
>> -                       if (lastempty || match[0].rm_so != match[0].rm_eo)
>> {
>> -                               /* Locate start of replaced string. */
>> -                               re_off = match[0].rm_so;
>> -                               /* Copy leading retained string. */
>> -                               cspace(&SS, s, re_off, APPEND);
>> -                               /* Add in regular expression. */
>> +                       if (match[0].rm_so)
>> +                               cspace(&SS, s, match[0].rm_so, APPEND);
>> +                       if (match[0].rm_so == match[0].rm_eo)
>> +                               cspace(&SS, s, match[0].rm_eo++, APPEND);
>> +                       if (match[0].rm_so || match[0].rm_so !=
>> match[0].rm_eo)
>>                                 regsub(&SS, s, cp->u.s->new);
>> -                       }
>> -
>>                         /* Move past this match. */
>> -                       if (match[0].rm_so != match[0].rm_eo) {
>> -                               s += match[0].rm_eo;
>> -                               slen -= match[0].rm_eo;
>> -                               lastempty = 0;
>> -                       } else {
>> -                               if (match[0].rm_so == 0)
>> -                                       cspace(&SS, s, match[0].rm_so + 1,
>> -                                           APPEND);
>> -                               else
>> -                                       cspace(&SS, s + match[0].rm_so, 1,
>> -                                           APPEND);
>> -                               s += match[0].rm_so + 1;
>> -                               slen -= match[0].rm_so + 1;
>> -                               lastempty = 1;
>> -                       }
>> +                       s += match[0].rm_eo;
>> +                       slen -= match[0].rm_eo;
>>                 } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0,
>> slen));
>>                 /* Copy trailing retained string. */
>>                 if (slen > 0)
>>
>>
>>
>> 2011/6/18 Ingo Schwarze <[email protected]>
>>
>>> Hi Otto,
>>>
>>> from careful code inspection, your patch looks correct.
>>>
>>> Replacing the ">" by ">=" is wrong, it would breaks this test case:
>>>
>>>  echo abc | ./obj/sed -E 's/(b|())/x/g'
>>>
>>> However, the code is hard to follow, and i suggest the following
>>> refactoring such that people doing a review can more easily check
>>> the correctness of the code after application of the patch:
>>>
>>>  1) X || Y || (!Y && Z) <=> X || Y || Z
>>>    so leave out the middle + line of your patch.
>>>
>>>  2) In case of match[0].rm_so == 0,
>>>    the cspace in the if block you touch has no effect
>>>    because copying zero bytes would be pointless.
>>>    Thus, we can pull the cspace before the main if,
>>>    optionally with an if (match[0].rm_so).
>>>    That's also much easier to understand:
>>>    *Always* copy the leading retained string, if any.
>>>
>>>  3) In the else clause of "Move past this match",
>>>    the if is completely pointless.
>>>    For vanishing rm_so, both clauses do the same thing.
>>>
>>>  4) Pull out the update of s and slen at the end.
>>>    In the if checking whether the current match is empty,
>>>    increment rm_eo exactly at the same time as copying the
>>>    additional character.
>>>
>>> The resulting code is much shorter and simpler,
>>> and hence much easier to audit for correctness.
>>>
>>> However, i'm a bit scared about touching this code.
>>> Breaking this would be really bad.
>>> I'm currently running a make build with it.
>>>
>>> Yours,
>>>  Ingo
>>>
>>>
>>> > Index: process.c
>>> > ===================================================================
>>> > RCS file: /cvs/src/usr.bin/sed/process.c,v
>>> > retrieving revision 1.15
>>> > diff -u -p -r1.15 process.c
>>> > --- process.c 27 Oct 2009 23:59:43 -0000      1.15
>>> > +++ process.c 15 Jun 2011 06:31:08 -0000
>>> > @@ -336,7 +336,9 @@ substitute(struct s_command *cp)
>>> >       switch (n) {
>>> >       case 0:                                 /* Global */
>>> >               do {
>>> > -                     if (lastempty || match[0].rm_so !=
>>> match[0].rm_eo) {
>>> > +                     if (lastempty || match[0].rm_so != match[0].rm_eo
>>> ||
>>> > +                         (match[0].rm_so == match[0].rm_eo &&
>>> > +                         match[0].rm_so > 0)) {
>>> >                               /* Locate start of replaced string. */
>>> >                               re_off = match[0].rm_so;
>>> >                               /* Copy leading retained string. */
>>>
>>>
>>> Index: process.c
>>> ===================================================================
>>> RCS file: /cvs/src/usr.bin/sed/process.c,v
>>> retrieving revision 1.15
>>> diff -u -p -r1.15 process.c
>>> --- process.c   27 Oct 2009 23:59:43 -0000      1.15
>>> +++ process.c   18 Jun 2011 18:23:00 -0000
>>> @@ -336,31 +336,25 @@ substitute(struct s_command *cp)
>>>         switch (n) {
>>>        case 0:                                 /* Global */
>>>                do {
>>> -                       if (lastempty || match[0].rm_so !=
>>> match[0].rm_eo) {
>>> -                               /* Locate start of replaced string. */
>>> -                               re_off = match[0].rm_so;
>>> -                               /* Copy leading retained string. */
>>> -                               cspace(&SS, s, re_off, APPEND);
>>> -                               /* Add in regular expression. */
>>> +                       /* Copy leading retained string. */
>>> +                       if (match[0].rm_so)
>>> +                               cspace(&SS, s, match[0].rm_so, APPEND);
>>> +
>>> +                       /* Append replacement. */
>>> +                       if (lastempty || match[0].rm_so ||
>>> +                           match[0].rm_so != match[0].rm_eo)
>>>                                regsub(&SS, s, cp->u.s->new);
>>> -                       }
>>>
>>> -                       /* Move past this match. */
>>> -                       if (match[0].rm_so != match[0].rm_eo) {
>>> -                               s += match[0].rm_eo;
>>> -                               slen -= match[0].rm_eo;
>>> -                               lastempty = 0;
>>> -                       } else {
>>> -                               if (match[0].rm_so == 0)
>>> -                                       cspace(&SS, s, match[0].rm_so +
>>> 1,
>>> -                                           APPEND);
>>> -                               else
>>> -                                       cspace(&SS, s + match[0].rm_so,
>>> 1,
>>> -                                           APPEND);
>>> -                               s += match[0].rm_so + 1;
>>> -                               slen -= match[0].rm_so + 1;
>>> +                       if (match[0].rm_so == match[0].rm_eo) {
>>> +                               cspace(&SS, s + match[0].rm_eo++, 1,
>>> APPEND);
>>>                                lastempty = 1;
>>> -                       }
>>> +                       } else
>>> +                               lastempty = 0;
>>> +
>>> +                       /* Move past this match. */
>>> +                       s += match[0].rm_eo;
>>> +                       slen -= match[0].rm_eo;
>>> +
>>>                } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0,
>>> slen));
>>>                /* Copy trailing retained string. */
>>>                if (slen > 0)
>>>
>>>
>>
>>
>> --
>>
>> ---------------------------------------------------------------------------------------------------------------------
>> () ascii ribbon campaign - against html e-mail
>> /\
>>
>>
>
>
> --
>
> ---------------------------------------------------------------------------------------------------------------------
> () ascii ribbon campaign - against html e-mail
> /\
>
>


-- 
---------------------------------------------------------------------------------------------------------------------
() ascii ribbon campaign - against html e-mail
/\

Reply via email to