Hi Martijn,
Martijn van Duren wrote on Sat, May 28, 2016 at 10:58:27PM +0200:
> Here's part two of the sed fix. It applies the just added
> REG_NOTBOL|REG_STARTEND change to sed, so that begin of word
> matches directly after a previous match, ending in not a
> word, can match.
>
> It passes regress and an earlier version of this patch (based on
> an earlier attempt of the libregex patch) passed a full ports build
> thanks to aja@ for testing.
>
> before:
> $ echo x,x,x,x,x,x, | sed 's/\<x,/y,/g'
> y,x,y,x,y,x,
> after:
> $ echo x,x,x,x,x,x, | sed 's/\<x,/y,/g'
> y,y,y,y,y,y,
>
> OK?
Yes, OK schwarze@, with or without any of the three following tweaks,
however you prefer.
Yours,
Ingo
> Index: process.c
> ===================================================================
> RCS file: /cvs/src/usr.bin/sed/process.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 process.c
> --- process.c 26 Oct 2015 14:08:47 -0000 1.27
> +++ process.c 28 May 2016 20:56:30 -0000
> @@ -61,7 +61,8 @@ static SPACE HS, PS, SS;
> static inline int applies(struct s_command *);
> static void flush_appends(void);
> static void lputs(char *);
> -static inline int regexec_e(regex_t *, const char *, int, int, size_t);
> +static inline int regexec_e(regex_t *, const char *, int, int, size_t,
> + size_t);
> static void regsub(SPACE *, char *, char *);
> static int substitute(struct s_command *);
>
> @@ -267,7 +268,7 @@ new: if (!nflag && !pd)
> * (lastline, linenumber, ps).
> */
> #define MATCH(a) \
> - (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
> + (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) : \
> (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
>
> /*
> @@ -335,6 +336,7 @@ substitute(struct s_command *cp)
> regex_t *re;
> regoff_t slen;
> int n, lastempty;
> + size_t le = 0;
> char *s;
>
> s = ps;
> @@ -346,7 +348,7 @@ substitute(struct s_command *cp)
> cp->u.s->maxbref);
> }
> }
> - if (!regexec_e(re, s, 0, 0, psl))
> + if (!regexec_e(re, s, 0, 0, 0, psl))
> return (0);
>
> SS.len = 0; /* Clean substitute space. */
Bikeshed:
Consider making that
+ if(!regexec_e(re, ps, 0, 0, 0, psl))
for clarity. You are no longer using "s" for the regexec_e()
at the end of the loop either. No functional change.
> @@ -356,28 +358,30 @@ substitute(struct s_command *cp)
>
> do {
> /* Copy the leading retained string. */
> - if (n <= 1 && match[0].rm_so)
> - cspace(&SS, s, match[0].rm_so, APPEND);
> + if (n <= 1 && (match[0].rm_so - le))
Bikeshed:
Consider making that
+ if (n <= 1 && (match[0].rm_so > le))
I'd find that easier to understand in the condition.
No functional change.
> + cspace(&SS, s, match[0].rm_so - le, APPEND);
>
> /* Skip zero-length matches right after other matches. */
> - if (lastempty || match[0].rm_so ||
> + if (lastempty || (match[0].rm_so - le) ||
> match[0].rm_so != match[0].rm_eo) {
> if (n <= 1) {
> /* Want this match: append replacement. */
> - regsub(&SS, s, cp->u.s->new);
> + regsub(&SS, ps, cp->u.s->new);
> if (n == 1)
> n = -1;
> } else {
> /* Want a later match: append original. */
> - if (match[0].rm_eo)
> - cspace(&SS, s, match[0].rm_eo, APPEND);
> + if (match[0].rm_eo - le)
> + cspace(&SS, s, match[0].rm_eo - le,
> + APPEND);
> n--;
> }
> }
>
> /* Move past this match. */
> - s += match[0].rm_eo;
> - slen -= match[0].rm_eo;
> + s += (match[0].rm_eo - le);
> + slen -= (match[0].rm_eo - le);
Bikeshed:
Consider making that
+ s = ps + match[0].rm_eo;
+ slen = psl - match[0].rm_eo;
Seems much easier to understand. No functional change.
> + le = match[0].rm_eo;
>
> /*
> * After a zero-length match, advance one byte,
> @@ -388,13 +392,16 @@ substitute(struct s_command *cp)
> slen = -1;
> else
> slen--;
> - if (*s != '\0')
> + if (*s != '\0') {
> cspace(&SS, s++, 1, APPEND);
> + le++;
> + }
> lastempty = 1;
> } else
> lastempty = 0;
>
> - } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
> + } while (n >= 0 && slen >= 0 &&
> + regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
>
> /* Did not find the requested number of matches. */
> if (n > 1)
> @@ -509,7 +516,7 @@ lputs(char *s)
>
> static inline int
> regexec_e(regex_t *preg, const char *string, int eflags,
> - int nomatch, size_t slen)
> + int nomatch, size_t start, size_t stop)
> {
> int eval;
>
> @@ -520,8 +527,8 @@ regexec_e(regex_t *preg, const char *str
> defpreg = preg;
>
> /* Set anchors */
> - match[0].rm_so = 0;
> - match[0].rm_eo = slen;
> + match[0].rm_so = start;
> + match[0].rm_eo = stop;
>
> eval = regexec(defpreg, string,
> nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);