the diff, sorry for spaming, last send diff was bad:
<<
Index: process.c
===================================================================
RCS file: /cvs/src/usr.bin/sed/process.c,v
retrieving revision 1.15
diff -u -p -r1.15 process.c
--- process.c 27 Oct 2009 23:59:43 -0000 1.15
+++ process.c 18 Jun 2011 21:33:51 -0000
@@ -336,31 +336,19 @@ substitute(struct s_command *cp)
switch (n) {
case 0: /* Global */
do {
- if (lastempty || match[0].rm_so != match[0].rm_eo) {
- /* Locate start of replaced string. */
- re_off = match[0].rm_so;
- /* Copy leading retained string. */
- cspace(&SS, s, re_off, APPEND);
- /* Add in regular expression. */
+ if (match[0].rm_so)
+ cspace(&SS, s, match[0].rm_so, APPEND);
+ if (match[0].rm_so == match[0].rm_eo) {
+ match[0].rm_eo++;
regsub(&SS, s, cp->u.s->new);
- }
-
- /* Move past this match. */
- if (match[0].rm_so != match[0].rm_eo) {
- s += match[0].rm_eo;
- slen -= match[0].rm_eo;
- lastempty = 0;
+ cspace(&SS, s, match[0].rm_eo, APPEND);
} else {
- if (match[0].rm_so == 0)
- cspace(&SS, s, match[0].rm_so + 1,
- APPEND);
- else
- cspace(&SS, s + match[0].rm_so, 1,
- APPEND);
- s += match[0].rm_so + 1;
- slen -= match[0].rm_so + 1;
- lastempty = 1;
+ if (match[0].rm_so || match[0].rm_so !=
match[0].rm_eo)
+ regsub(&SS, s, cp->u.s->new);
}
+ /* Move past this match. */
+ s += match[0].rm_eo;
+ slen -= match[0].rm_eo;
} while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
/* Copy trailing retained string. */
if (slen > 0)
>>
2011/6/18 sven falempin <[email protected]>
> not that good :'(
> # echo -n 'aba' | sed -E 's/a|$/x/g'
> xbx#
> #
>
>
> 2011/6/18 sven falempin <[email protected]>
>
>> Good for me:
>>
>> Index: process.c
>> ===================================================================
>> RCS file: /cvs/src/usr.bin/sed/process.c,v
>> retrieving revision 1.15
>> diff -u -p -r1.15 process.c
>> --- process.c 27 Oct 2009 23:59:43 -0000 1.15
>> +++ process.c 18 Jun 2011 21:04:39 -0000
>> @@ -336,31 +336,18 @@ substitute(struct s_command *cp)
>> switch (n) {
>> case 0: /* Global */
>> do {
>> - if (lastempty || match[0].rm_so != match[0].rm_eo)
>> {
>> - /* Locate start of replaced string. */
>> - re_off = match[0].rm_so;
>> - /* Copy leading retained string. */
>> - cspace(&SS, s, re_off, APPEND);
>> - /* Add in regular expression. */
>> + if (match[0].rm_so)
>> + cspace(&SS, s, match[0].rm_so, APPEND);
>> + if (match[0].rm_so == match[0].rm_eo) {
>> regsub(&SS, s, cp->u.s->new);
>> - }
>> -
>> - /* Move past this match. */
>> - if (match[0].rm_so != match[0].rm_eo) {
>> - s += match[0].rm_eo;
>> - slen -= match[0].rm_eo;
>> - lastempty = 0;
>> + cspace(&SS, s, match[0].rm_eo++, APPEND);
>> } else {
>> - if (match[0].rm_so == 0)
>> - cspace(&SS, s, match[0].rm_so + 1,
>> - APPEND);
>> - else
>> - cspace(&SS, s + match[0].rm_so, 1,
>> - APPEND);
>> - s += match[0].rm_so + 1;
>> - slen -= match[0].rm_so + 1;
>> - lastempty = 1;
>> + if (match[0].rm_so || match[0].rm_so !=
>> match[0].rm_eo)
>> + regsub(&SS, s, cp->u.s->new);
>> }
>> + /* Move past this match. */
>> + s += match[0].rm_eo;
>> + slen -= match[0].rm_eo;
>> } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0,
>> slen));
>> /* Copy trailing retained string. */
>> if (slen > 0)
>>
>>
>> 2011/6/18 sven falempin <[email protected]>
>>
>>> Hello,
>>>
>>> Well, i failed to apply your second patch :s (patch ./process.c
>>> ./yourdiff)
>>> doing it manually (twice), it pass test but does not correct the initial
>>> problem.
>>> # echo 'aba' | sed -E 's/a|$/x/g'
>>> xbx
>>>
>>> Maybe i'm wrong ?
>>>
>>> Nevertheless, my attempt is not that much better, it destroy the leading
>>> \n, i ran the regression test this time.
>>>
>>> # echo 'aba' | sed -E 's/a|$/x/g'
>>> xbxx#
>>> #
>>>
>>> At least, it explain the purpose of lastempty ?
>>>
>>> Regards.
>>>
>>> Index: process.c
>>> ===================================================================
>>> RCS file: /cvs/src/usr.bin/sed/process.c,v
>>> retrieving revision 1.15
>>> diff -u -p -r1.15 process.c
>>> --- process.c 27 Oct 2009 23:59:43 -0000 1.15
>>> +++ process.c 18 Jun 2011 20:50:28 -0000
>>> @@ -336,31 +336,15 @@ substitute(struct s_command *cp)
>>> switch (n) {
>>> case 0: /* Global */
>>> do {
>>> - if (lastempty || match[0].rm_so !=
>>> match[0].rm_eo) {
>>> - /* Locate start of replaced string. */
>>> - re_off = match[0].rm_so;
>>> - /* Copy leading retained string. */
>>> - cspace(&SS, s, re_off, APPEND);
>>> - /* Add in regular expression. */
>>> + if (match[0].rm_so)
>>> + cspace(&SS, s, match[0].rm_so, APPEND);
>>> + if (match[0].rm_so == match[0].rm_eo)
>>> + cspace(&SS, s, match[0].rm_eo++, APPEND);
>>> + if (match[0].rm_so || match[0].rm_so !=
>>> match[0].rm_eo)
>>> regsub(&SS, s, cp->u.s->new);
>>> - }
>>> -
>>> /* Move past this match. */
>>> - if (match[0].rm_so != match[0].rm_eo) {
>>> - s += match[0].rm_eo;
>>> - slen -= match[0].rm_eo;
>>> - lastempty = 0;
>>> - } else {
>>> - if (match[0].rm_so == 0)
>>> - cspace(&SS, s, match[0].rm_so +
>>> 1,
>>> - APPEND);
>>> - else
>>> - cspace(&SS, s + match[0].rm_so,
>>> 1,
>>> - APPEND);
>>> - s += match[0].rm_so + 1;
>>> - slen -= match[0].rm_so + 1;
>>> - lastempty = 1;
>>> - }
>>> + s += match[0].rm_eo;
>>> + slen -= match[0].rm_eo;
>>> } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0,
>>> slen));
>>> /* Copy trailing retained string. */
>>> if (slen > 0)
>>>
>>>
>>>
>>> 2011/6/18 Ingo Schwarze <[email protected]>
>>>
>>>> Hi Otto,
>>>>
>>>> from careful code inspection, your patch looks correct.
>>>>
>>>> Replacing the ">" by ">=" is wrong, it would breaks this test case:
>>>>
>>>> echo abc | ./obj/sed -E 's/(b|())/x/g'
>>>>
>>>> However, the code is hard to follow, and i suggest the following
>>>> refactoring such that people doing a review can more easily check
>>>> the correctness of the code after application of the patch:
>>>>
>>>> 1) X || Y || (!Y && Z) <=> X || Y || Z
>>>> so leave out the middle + line of your patch.
>>>>
>>>> 2) In case of match[0].rm_so == 0,
>>>> the cspace in the if block you touch has no effect
>>>> because copying zero bytes would be pointless.
>>>> Thus, we can pull the cspace before the main if,
>>>> optionally with an if (match[0].rm_so).
>>>> That's also much easier to understand:
>>>> *Always* copy the leading retained string, if any.
>>>>
>>>> 3) In the else clause of "Move past this match",
>>>> the if is completely pointless.
>>>> For vanishing rm_so, both clauses do the same thing.
>>>>
>>>> 4) Pull out the update of s and slen at the end.
>>>> In the if checking whether the current match is empty,
>>>> increment rm_eo exactly at the same time as copying the
>>>> additional character.
>>>>
>>>> The resulting code is much shorter and simpler,
>>>> and hence much easier to audit for correctness.
>>>>
>>>> However, i'm a bit scared about touching this code.
>>>> Breaking this would be really bad.
>>>> I'm currently running a make build with it.
>>>>
>>>> Yours,
>>>> Ingo
>>>>
>>>>
>>>> > Index: process.c
>>>> > ===================================================================
>>>> > RCS file: /cvs/src/usr.bin/sed/process.c,v
>>>> > retrieving revision 1.15
>>>> > diff -u -p -r1.15 process.c
>>>> > --- process.c 27 Oct 2009 23:59:43 -0000 1.15
>>>> > +++ process.c 15 Jun 2011 06:31:08 -0000
>>>> > @@ -336,7 +336,9 @@ substitute(struct s_command *cp)
>>>> > switch (n) {
>>>> > case 0: /* Global */
>>>> > do {
>>>> > - if (lastempty || match[0].rm_so !=
>>>> match[0].rm_eo) {
>>>> > + if (lastempty || match[0].rm_so !=
>>>> match[0].rm_eo ||
>>>> > + (match[0].rm_so == match[0].rm_eo &&
>>>> > + match[0].rm_so > 0)) {
>>>> > /* Locate start of replaced string. */
>>>> > re_off = match[0].rm_so;
>>>> > /* Copy leading retained string. */
>>>>
>>>>
>>>> Index: process.c
>>>> ===================================================================
>>>> RCS file: /cvs/src/usr.bin/sed/process.c,v
>>>> retrieving revision 1.15
>>>> diff -u -p -r1.15 process.c
>>>> --- process.c 27 Oct 2009 23:59:43 -0000 1.15
>>>> +++ process.c 18 Jun 2011 18:23:00 -0000
>>>> @@ -336,31 +336,25 @@ substitute(struct s_command *cp)
>>>> switch (n) {
>>>> case 0: /* Global */
>>>> do {
>>>> - if (lastempty || match[0].rm_so !=
>>>> match[0].rm_eo) {
>>>> - /* Locate start of replaced string. */
>>>> - re_off = match[0].rm_so;
>>>> - /* Copy leading retained string. */
>>>> - cspace(&SS, s, re_off, APPEND);
>>>> - /* Add in regular expression. */
>>>> + /* Copy leading retained string. */
>>>> + if (match[0].rm_so)
>>>> + cspace(&SS, s, match[0].rm_so, APPEND);
>>>> +
>>>> + /* Append replacement. */
>>>> + if (lastempty || match[0].rm_so ||
>>>> + match[0].rm_so != match[0].rm_eo)
>>>> regsub(&SS, s, cp->u.s->new);
>>>> - }
>>>>
>>>> - /* Move past this match. */
>>>> - if (match[0].rm_so != match[0].rm_eo) {
>>>> - s += match[0].rm_eo;
>>>> - slen -= match[0].rm_eo;
>>>> - lastempty = 0;
>>>> - } else {
>>>> - if (match[0].rm_so == 0)
>>>> - cspace(&SS, s, match[0].rm_so +
>>>> 1,
>>>> - APPEND);
>>>> - else
>>>> - cspace(&SS, s + match[0].rm_so,
>>>> 1,
>>>> - APPEND);
>>>> - s += match[0].rm_so + 1;
>>>> - slen -= match[0].rm_so + 1;
>>>> + if (match[0].rm_so == match[0].rm_eo) {
>>>> + cspace(&SS, s + match[0].rm_eo++, 1,
>>>> APPEND);
>>>> lastempty = 1;
>>>> - }
>>>> + } else
>>>> + lastempty = 0;
>>>> +
>>>> + /* Move past this match. */
>>>> + s += match[0].rm_eo;
>>>> + slen -= match[0].rm_eo;
>>>> +
>>>> } while (slen > 0 && regexec_e(re, s, REG_NOTBOL, 0,
>>>> slen));
>>>> /* Copy trailing retained string. */
>>>> if (slen > 0)
>>>>
>>>>
>>>
>>>
>>> --
>>>
>>> ---------------------------------------------------------------------------------------------------------------------
>>> () ascii ribbon campaign - against html e-mail
>>> /\
>>>
>>>
>>
>>
>> --
>>
>> ---------------------------------------------------------------------------------------------------------------------
>> () ascii ribbon campaign - against html e-mail
>> /\
>>
>>
>
>
> --
>
> ---------------------------------------------------------------------------------------------------------------------
> () ascii ribbon campaign - against html e-mail
> /\
>
>
--
---------------------------------------------------------------------------------------------------------------------
() ascii ribbon campaign - against html e-mail
/\