Hello tech@,
Here's part two of the sed fix. Here's part two of the sed fix. It
applies the just added REG_NOTBOL|REG_STARTEND change to sed, so that
begin of word matches directly after a previous match, ending in not a
word, can match.
It passes regress and an earlier version of this patch (based on an
earlier attempt of the libregex patch) passed a full ports build thanks
to aja@ for testing.
before:
$ echo x,x,x,x,x,x, | sed 's/\<x,/y,/g'
y,x,y,x,y,x,
after:
$ echo x,x,x,x,x,x, | sed 's/\<x,/y,/g'
y,y,y,y,y,y,
OK?
martijn@
? obj
Index: process.c
===================================================================
RCS file: /cvs/src/usr.bin/sed/process.c,v
retrieving revision 1.27
diff -u -p -r1.27 process.c
--- process.c 26 Oct 2015 14:08:47 -0000 1.27
+++ process.c 28 May 2016 20:56:30 -0000
@@ -61,7 +61,8 @@ static SPACE HS, PS, SS;
static inline int applies(struct s_command *);
static void flush_appends(void);
static void lputs(char *);
-static inline int regexec_e(regex_t *, const char *, int, int, size_t);
+static inline int regexec_e(regex_t *, const char *, int, int, size_t,
+ size_t);
static void regsub(SPACE *, char *, char *);
static int substitute(struct s_command *);
@@ -267,7 +268,7 @@ new: if (!nflag && !pd)
* (lastline, linenumber, ps).
*/
#define MATCH(a) \
- (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) : \
+ (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) : \
(a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
/*
@@ -335,6 +336,7 @@ substitute(struct s_command *cp)
regex_t *re;
regoff_t slen;
int n, lastempty;
+ size_t le = 0;
char *s;
s = ps;
@@ -346,7 +348,7 @@ substitute(struct s_command *cp)
cp->u.s->maxbref);
}
}
- if (!regexec_e(re, s, 0, 0, psl))
+ if (!regexec_e(re, s, 0, 0, 0, psl))
return (0);
SS.len = 0; /* Clean substitute space. */
@@ -356,28 +358,30 @@ substitute(struct s_command *cp)
do {
/* Copy the leading retained string. */
- if (n <= 1 && match[0].rm_so)
- cspace(&SS, s, match[0].rm_so, APPEND);
+ if (n <= 1 && (match[0].rm_so - le))
+ cspace(&SS, s, match[0].rm_so - le, APPEND);
/* Skip zero-length matches right after other matches. */
- if (lastempty || match[0].rm_so ||
+ if (lastempty || (match[0].rm_so - le) ||
match[0].rm_so != match[0].rm_eo) {
if (n <= 1) {
/* Want this match: append replacement. */
- regsub(&SS, s, cp->u.s->new);
+ regsub(&SS, ps, cp->u.s->new);
if (n == 1)
n = -1;
} else {
/* Want a later match: append original. */
- if (match[0].rm_eo)
- cspace(&SS, s, match[0].rm_eo, APPEND);
+ if (match[0].rm_eo - le)
+ cspace(&SS, s, match[0].rm_eo - le,
+ APPEND);
n--;
}
}
/* Move past this match. */
- s += match[0].rm_eo;
- slen -= match[0].rm_eo;
+ s += (match[0].rm_eo - le);
+ slen -= (match[0].rm_eo - le);
+ le = match[0].rm_eo;
/*
* After a zero-length match, advance one byte,
@@ -388,13 +392,16 @@ substitute(struct s_command *cp)
slen = -1;
else
slen--;
- if (*s != '\0')
+ if (*s != '\0') {
cspace(&SS, s++, 1, APPEND);
+ le++;
+ }
lastempty = 1;
} else
lastempty = 0;
- } while (n >= 0 && slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
+ } while (n >= 0 && slen >= 0 &&
+ regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
/* Did not find the requested number of matches. */
if (n > 1)
@@ -509,7 +516,7 @@ lputs(char *s)
static inline int
regexec_e(regex_t *preg, const char *string, int eflags,
- int nomatch, size_t slen)
+ int nomatch, size_t start, size_t stop)
{
int eval;
@@ -520,8 +527,8 @@ regexec_e(regex_t *preg, const char *str
defpreg = preg;
/* Set anchors */
- match[0].rm_so = 0;
- match[0].rm_eo = slen;
+ match[0].rm_so = start;
+ match[0].rm_eo = stop;
eval = regexec(defpreg, string,
nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);