Hi Martijn,

Martijn van Duren wrote on Tue, May 10, 2016 at 02:43:54PM +0200:

> Index: ./lib/libc/regex/engine.c
> ===================================================================
> RCS file: /cvs/src/lib/libc/regex/engine.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 engine.c
> --- ./lib/libc/regex/engine.c 28 Dec 2015 23:01:22 -0000      1.19
> +++ ./lib/libc/regex/engine.c 2 May 2016 08:50:20 -0000
> @@ -674,7 +674,7 @@ fast(struct match *m, char *start, char 
>       states fresh = m->fresh;
>       states tmp = m->tmp;
>       char *p = start;
> -     int c = (start == m->beginp) ? OUT : *(start-1);
> +     int c = (start == m->offp) ? OUT : *(start-1);
>       int lastc;      /* previous c */
>       int flagch;
>       int i;
> @@ -758,7 +758,7 @@ slow(struct match *m, char *start, char 
>       states empty = m->empty;
>       states tmp = m->tmp;
>       char *p = start;
> -     int c = (start == m->beginp) ? OUT : *(start-1);
> +     int c = (start == m->offp) ? OUT : *(start-1);
>       int lastc;      /* previous c */
>       int flagch;
>       int i;

i hate to say that this change appears to cause a regression.

The regexec(3) manual explicitly says:

  REG_STARTEND  The string is considered to start at [...]
                Note that a non-zero rm_so does not imply REG_NOTBOL;
                REG_STARTEND affects only the location of the string,
                not how it is matched.

Right now, the library actually implements that.  The test program
appended below produces the following output, as documented:

  rt: regcomp: OK
  rt: mismatch: regexec() failed to match
  rt: BOL match: OK
  rt: ST match: OK

With your change, the library now fails to match:

  rt: regcomp: OK
  rt: mismatch: regexec() failed to match
  rt: BOL match: OK
  rt: ST match: regexec() failed to match

I don't think that change is intentional, or is it?

I'll have a look whether it is possible to conditionally pass
REG_NOTBOL from sed(1) to solve your original issue.  I didn't
look into the sed(1) code yet because i wanted to report this
regression as soon as i found it.

Yours,
  Ingo


#include <sys/types.h>
#include <err.h>
#include <regex.h>

static regex_t           re;

static int
report(int errcode, const char *msg)
{
        const size_t     errbuf_size = 2048;
        char             errbuf[errbuf_size];
        size_t           sz;

        if (errcode) {
                sz = regerror(errcode, &re, errbuf, errbuf_size);
                warnx("%s: %s%s", msg, errbuf,
                    sz > errbuf_size ? "[...]" : "");
        } else
                warnx("%s: OK", msg);
        return errcode;
}

int
main(void)
{
        regmatch_t       pmatch;

        if (report(regcomp(&re, "^y", REG_EXTENDED), "regcomp"))
                return 1;

        report(regexec(&re, "xy", 0, NULL, 0), "mismatch");
        report(regexec(&re, "yz", 0, NULL, 0), "BOL match");

        pmatch.rm_so = 1;
        pmatch.rm_eo = 2;

        report(regexec(&re, "xyz", 0, &pmatch, REG_STARTEND), "ST match");
        return 0;
}

Reply via email to