Hi Martijn,

Martijn van Duren wrote on Sat, May 28, 2016 at 10:58:27PM +0200:

> Here's part two of the sed fix.  It applies the just added
> REG_NOTBOL|REG_STARTEND change to sed, so that begin of word
> matches directly after a previous match, ending in not a
> word, can match.
> 
> It passes regress and an earlier version of this patch (based on
> an earlier attempt of the libregex patch) passed a full ports build
> thanks to aja@ for testing.
> 
> before:
> $ echo x,x,x,x,x,x, | sed 's/\<x,/y,/g'
> y,x,y,x,y,x,
> after:
> $ echo x,x,x,x,x,x, | sed 's/\<x,/y,/g'
> y,y,y,y,y,y,
> 
> OK?

Yes, OK schwarze@, with or without any of the three following tweaks,
however you prefer.

Yours,
  Ingo


> Index: process.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/process.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 process.c
> --- process.c 26 Oct 2015 14:08:47 -0000      1.27
> +++ process.c 28 May 2016 20:56:30 -0000
> @@ -61,7 +61,8 @@ static SPACE HS, PS, SS;
>  static inline int     applies(struct s_command *);
>  static void           flush_appends(void);
>  static void           lputs(char *);
> -static inline int     regexec_e(regex_t *, const char *, int, int, size_t);
> +static inline int     regexec_e(regex_t *, const char *, int, int, size_t,
> +                          size_t);
>  static void           regsub(SPACE *, char *, char *);
>  static int            substitute(struct s_command *);
>  
> @@ -267,7 +268,7 @@ new:              if (!nflag && !pd)
>   * (lastline, linenumber, ps).
>   */
>  #define      MATCH(a)                                                \
> -     (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :       \
> +     (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :    \
>           (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
>  
>  /*
> @@ -335,6 +336,7 @@ substitute(struct s_command *cp)
>       regex_t *re;
>       regoff_t slen;
>       int n, lastempty;
> +     size_t le = 0;
>       char *s;
>  
>       s = ps;
> @@ -346,7 +348,7 @@ substitute(struct s_command *cp)
>                           cp->u.s->maxbref);
>               }
>       }
> -     if (!regexec_e(re, s, 0, 0, psl))
> +     if (!regexec_e(re, s, 0, 0, 0, psl))
>               return (0);
>  
>       SS.len = 0;                             /* Clean substitute space. */

Bikeshed:
Consider making that

  +     if(!regexec_e(re, ps, 0, 0, 0, psl))

for clarity.  You are no longer using "s" for the regexec_e()
at the end of the loop either.  No functional change.

> @@ -356,28 +358,30 @@ substitute(struct s_command *cp)
>  
>       do {
>               /* Copy the leading retained string. */
> -             if (n <= 1 && match[0].rm_so)
> -                     cspace(&SS, s, match[0].rm_so, APPEND);
> +             if (n <= 1 && (match[0].rm_so - le))

Bikeshed:
Consider making that

  +     if (n <= 1 && (match[0].rm_so > le))

I'd find that easier to understand in the condition.
No functional change.

> +                     cspace(&SS, s, match[0].rm_so - le, APPEND);
>  
>               /* Skip zero-length matches right after other matches. */
> -             if (lastempty || match[0].rm_so ||
> +             if (lastempty || (match[0].rm_so - le) ||
>                   match[0].rm_so != match[0].rm_eo) {
>                       if (n <= 1) {
>                               /* Want this match: append replacement. */
> -                             regsub(&SS, s, cp->u.s->new);
> +                             regsub(&SS, ps, cp->u.s->new);
>                               if (n == 1)
>                                       n = -1;
>                       } else {
>                               /* Want a later match: append original. */
> -                             if (match[0].rm_eo)
> -                                     cspace(&SS, s, match[0].rm_eo, APPEND);
> +                             if (match[0].rm_eo - le)
> +                                     cspace(&SS, s, match[0].rm_eo - le,
> +                                         APPEND);
>                               n--;
>                       }
>               }
>  
>               /* Move past this match. */
> -             s += match[0].rm_eo;
> -             slen -= match[0].rm_eo;
> +             s += (match[0].rm_eo - le);
> +             slen -= (match[0].rm_eo - le);

Bikeshed:
Consider making that

  +             s = ps + match[0].rm_eo;
  +             slen = psl - match[0].rm_eo;

Seems much easier to understand.  No functional change.

> +             le = match[0].rm_eo;
>  
>               /*
>                * After a zero-length match, advance one byte,
> @@ -388,13 +392,16 @@ substitute(struct s_command *cp)
>                               slen = -1;
>                       else
>                               slen--;
> -                     if (*s != '\0')
> +                     if (*s != '\0') {
>                               cspace(&SS, s++, 1, APPEND);
> +                             le++;
> +                     }
>                       lastempty = 1;
>               } else
>                       lastempty = 0;
>  
> -     } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
> +     } while (n >= 0 && slen >= 0 &&
> +         regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
>  
>       /* Did not find the requested number of matches. */
>       if (n > 1)
> @@ -509,7 +516,7 @@ lputs(char *s)
>  
>  static inline int
>  regexec_e(regex_t *preg, const char *string, int eflags,
> -    int nomatch, size_t slen)
> +    int nomatch, size_t start, size_t stop)
>  {
>       int eval;
>  
> @@ -520,8 +527,8 @@ regexec_e(regex_t *preg, const char *str
>               defpreg = preg;
>  
>       /* Set anchors */
> -     match[0].rm_so = 0;
> -     match[0].rm_eo = slen;
> +     match[0].rm_so = start;
> +     match[0].rm_eo = stop;
>  
>       eval = regexec(defpreg, string,
>           nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);

Reply via email to