I've beaten my head against this wall for a bit longer, and came up with several places where pcre2 changed return types for void *what query interogations (especially from int to uint32, badness on x86_64-linux).
The attached patch picks up these bad void * type assignments. Still no tremendous improvement, missing something blatantly obvious, I expect. On Mon, Dec 5, 2016 at 10:59 PM, William A Rowe Jr <wr...@rowe-clan.net> wrote: > I've written the following patch to trunk to allow us to configure, > compile and link against PCRE2 (10.x). The autoconf in particular is > streamlined for cross-compilation detection, while retaining the ability to > override the path to (and name of) pcre[2]-config. > > It isn't in a commit-ready state due to t/TEST t/apache/expr.t failures > (among others), and the defects appear to revolve around the way substring > patterns are recorded. > > Attached the test failure cases (many similar test patterns do succeed, > interestingly.) One test looks outright wrong. I'd rather not beat my head > against these if the answer is blatantly obvious. > > If anyone has patience for exploring this further, any help is welcomed. > Philip starts with this assertion; "The original, very widely deployed PCRE > library, originally released in 1997, is at version 8.39, and the API and > feature set are stable—future releases will be for bugfixes only. All new > future features will be to PCRE2, not the original PCRE 8.x series." But he > has gone on to state that many fuzzing error cases which are handled > correctly in PCRE2 cannot be realistically fixed in PCRE 8.x. I've placed > this up there with other parsing rewrites in httpd, that starting over is > simply the correct answer, and I'd like to see if we can have httpd 3.0 > choosing PCRE2 over PCRE in the near future (and perhaps backport this if > we determine behavior is consistent.) > > Cheers, > > Bill > >
Index: configure.in =================================================================== --- configure.in (revision 1773161) +++ configure.in (working copy) @@ -223,18 +223,18 @@ AC_ARG_WITH(pcre, APACHE_HELP_STRING(--with-pcre=PATH,Use external PCRE library)) -AC_PATH_PROG(PCRE_CONFIG, pcre-config, false) -if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then - PCRE_CONFIG=$with_pcre/bin/pcre-config -elif test -x "$with_pcre"; then - PCRE_CONFIG=$with_pcre -fi +AC_CHECK_TARGET_TOOLS(PCRE_CONFIG, [pcre2-config pcre-config], + [`which $with_pcre 2>/dev/null`], + [$with_pcre/bin:$with_pcre]) -if test "$PCRE_CONFIG" != "false"; then +if test "x$PCRE_CONFIG" != "x"; then if $PCRE_CONFIG --version >/dev/null 2>&1; then :; else - AC_MSG_ERROR([Did not find pcre-config script at $PCRE_CONFIG]) + AC_MSG_ERROR([Did not find working script at $PCRE_CONFIG]) fi case `$PCRE_CONFIG --version` in + [1[0-9].*]) + AC_DEFINE(HAVE_PCRE2, 1, [Detected PCRE2]) + ;; [[1-5].*]) AC_MSG_ERROR([Need at least pcre version 6.7]) ;; @@ -244,10 +244,10 @@ esac AC_MSG_NOTICE([Using external PCRE library from $PCRE_CONFIG]) APR_ADDTO(PCRE_INCLUDES, [`$PCRE_CONFIG --cflags`]) - APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs`]) + APR_ADDTO(PCRE_LIBS, [`$PCRE_CONFIG --libs8 2>/dev/null || $PCRE_CONFIG --libs`]) APR_ADDTO(HTTPD_LIBS, [\$(PCRE_LIBS)]) else - AC_MSG_ERROR([pcre-config for libpcre not found. PCRE is required and available from http://pcre.org/]) + AC_MSG_ERROR([pcre(2)-config for libpcre not found. PCRE is required and available from http://pcre.org/]) fi APACHE_SUBST(PCRE_LIBS) Index: server/util_pcre.c =================================================================== --- server/util_pcre.c (revision 1773161) +++ server/util_pcre.c (working copy) @@ -46,10 +46,18 @@ #include "httpd.h" #include "apr_strings.h" #include "apr_tables.h" + +#ifdef HAVE_PCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" +#define PCREn(x) PCRE2_ ## x +#else #include "pcre.h" +#define PCREn(x) PCRE_ ## x +#endif /* PCRE_DUPNAMES is only present since version 6.7 of PCRE */ -#ifndef PCRE_DUPNAMES +#if !defined(PCRE_DUPNAMES) && !defined(HAVE_PCRE2) #error PCRE Version 6.7 or later required! #else @@ -74,11 +82,19 @@ AP_DECLARE(const char *) ap_pcre_version_string(int which) { +#ifdef HAVE_PCRE2 + static char buf[80]; +#endif switch (which) { case AP_REG_PCRE_COMPILED: - return APR_STRINGIFY(PCRE_MAJOR) "." APR_STRINGIFY(PCRE_MINOR) " " APR_STRINGIFY(PCRE_DATE); + return APR_STRINGIFY(PCREn(MAJOR)) "." APR_STRINGIFY(PCREn(MINOR)) " " APR_STRINGIFY(PCREn(DATE)); case AP_REG_PCRE_LOADED: +#ifdef HAVE_PCRE2 + pcre2_config(PCRE2_CONFIG_VERSION, buf); + return buf; +#else return pcre_version(); +#endif default: return "Unknown"; } @@ -118,7 +134,11 @@ AP_DECLARE(void) ap_regfree(ap_regex_t *preg) { +#ifdef HAVE_PCRE2 + pcre2_code_free(preg->re_pcre); +#else (pcre_free)(preg->re_pcre); +#endif } @@ -139,34 +159,48 @@ */ AP_DECLARE(int) ap_regcomp(ap_regex_t * preg, const char *pattern, int cflags) { +#ifdef HAVE_PCRE2 + uint32_t capcount; + size_t erroffset; +#else const char *errorptr; int erroffset; +#endif int errcode = 0; - int options = PCRE_DUPNAMES; + int options = PCREn(DUPNAMES); if ((cflags & AP_REG_ICASE) != 0) - options |= PCRE_CASELESS; + options |= PCREn(CASELESS); if ((cflags & AP_REG_NEWLINE) != 0) - options |= PCRE_MULTILINE; + options |= PCREn(MULTILINE); if ((cflags & AP_REG_DOTALL) != 0) - options |= PCRE_DOTALL; + options |= PCREn(DOTALL); - preg->re_pcre = - pcre_compile2(pattern, options, &errcode, &errorptr, &erroffset, NULL); +#ifdef HAVE_PCRE2 + preg->re_pcre = pcre2_compile((const unsigned char *)pattern, + PCRE2_ZERO_TERMINATED, options, &errcode, + &erroffset, NULL); +#else + preg->re_pcre = pcre_compile2(pattern, options, &errcode, + &errorptr, &erroffset, NULL); +#endif + preg->re_erroffset = erroffset; - if (preg->re_pcre == NULL) { - /* - * There doesn't seem to be constants defined for compile time error - * codes. 21 is "failed to get memory" according to pcreapi(3). - */ + /* Internal ERR21 is "failed to get memory" according to pcreapi(3) */ if (errcode == 21) return AP_REG_ESPACE; return AP_REG_INVARG; } +#ifdef HAVE_PCRE2 + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_CAPTURECOUNT, &capcount); + preg->re_nsub = capcount; +#else pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); + PCRE_INFO_CAPTURECOUNT, &(preg->re_nsub)); +#endif return 0; } @@ -198,18 +232,29 @@ { int rc; int options = 0; - int *ovector = NULL; +#ifdef HAVE_PCRE2 + pcre2_match_data *matchdata = NULL; + size_t *ovector = NULL; +#else int small_ovector[POSIX_MALLOC_THRESHOLD * 3]; int allocated_ovector = 0; + int *ovector = NULL; +#endif if ((eflags & AP_REG_NOTBOL) != 0) - options |= PCRE_NOTBOL; + options |= PCREn(NOTBOL); if ((eflags & AP_REG_NOTEOL) != 0) - options |= PCRE_NOTEOL; + options |= PCREn(NOTEOL); ((ap_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only has meaning after compile */ if (nmatch > 0) { +#ifdef HAVE_PCRE2 + matchdata = pcre2_match_data_create(nmatch, NULL); + if (matchdata == NULL) + return AP_REG_ESPACE; + ovector = pcre2_get_ovector_pointer(matchdata); +#else if (nmatch <= POSIX_MALLOC_THRESHOLD) { ovector = &(small_ovector[0]); } @@ -219,10 +264,17 @@ return AP_REG_ESPACE; allocated_ovector = 1; } +#endif } +#ifdef HAVE_PCRE2 + rc = pcre2_match((const pcre2_code *)preg->re_pcre, + (const unsigned char *)buff, len, + 0, options, matchdata, NULL); +#else rc = pcre_exec((const pcre *)preg->re_pcre, NULL, buff, (int)len, 0, options, ovector, nmatch * 3); +#endif if (rc == 0) rc = nmatch; /* All captured slots were filled in */ @@ -233,39 +285,51 @@ pmatch[i].rm_so = ovector[i * 2]; pmatch[i].rm_eo = ovector[i * 2 + 1]; } - if (allocated_ovector) - free(ovector); for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; + } + +#ifdef HAVE_PCRE2 + if (matchdata) + pcre2_match_data_free(matchdata); +#else + if (allocated_ovector) + free(ovector); +#endif + + if (rc >= 0) { return 0; } - else { - if (allocated_ovector) - free(ovector); +#ifdef HAVE_PCRE2 + if (rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF8_ERR21) + return AP_REG_INVARG; +#endif switch (rc) { - case PCRE_ERROR_NOMATCH: + case PCREn(ERROR_NOMATCH): return AP_REG_NOMATCH; - case PCRE_ERROR_NULL: + case PCREn(ERROR_NULL): return AP_REG_INVARG; - case PCRE_ERROR_BADOPTION: + case PCREn(ERROR_BADOPTION): return AP_REG_INVARG; - case PCRE_ERROR_BADMAGIC: + case PCREn(ERROR_BADMAGIC): return AP_REG_INVARG; - case PCRE_ERROR_UNKNOWN_NODE: - return AP_REG_ASSERT; - case PCRE_ERROR_NOMEMORY: + case PCREn(ERROR_NOMEMORY): return AP_REG_ESPACE; -#ifdef PCRE_ERROR_MATCHLIMIT - case PCRE_ERROR_MATCHLIMIT: +#if defined(HAVE_PCRE2) || defined(PCRE_ERROR_MATCHLIMIT) + case PCREn(ERROR_MATCHLIMIT): return AP_REG_ESPACE; #endif -#ifdef PCRE_ERROR_BADUTF8 - case PCRE_ERROR_BADUTF8: +#if defined(PCRE_ERROR_UNKNOWN_NODE) + case PCRE_ERROR_UNKNOWN_NODE: + return AP_REG_ASSERT; +#endif +#if defined(PCRE_ERROR_BADUTF8) + case PCREn(ERROR_BADUTF8): return AP_REG_INVARG; #endif -#ifdef PCRE_ERROR_BADUTF8_OFFSET - case PCRE_ERROR_BADUTF8_OFFSET: +#if defined(PCRE_ERROR_BADUTF8_OFFSET) + case PCREn(ERROR_BADUTF8_OFFSET): return AP_REG_INVARG; #endif default: @@ -278,17 +342,29 @@ apr_array_header_t *names, const char *prefix, int upper) { + char *nametable; + +#ifdef HAVE_PCRE2 + uint32_t namecount; + uint32_t nameentrysize; + uint32_t i; + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMECOUNT, &namecount); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMEENTRYSIZE, &nameentrysize); + pcre2_pattern_info((const pcre2_code *)preg->re_pcre, + PCRE2_INFO_NAMETABLE, &nametable); +#else int namecount; int nameentrysize; int i; - char *nametable; - pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMECOUNT, &namecount); + PCRE_INFO_NAMECOUNT, &namecount); pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); + PCRE_INFO_NAMEENTRYSIZE, &nameentrysize); pcre_fullinfo((const pcre *)preg->re_pcre, NULL, - PCRE_INFO_NAMETABLE, &nametable); + PCRE_INFO_NAMETABLE, &nametable); +#endif for (i = 0; i < namecount; i++) { const char *offset = nametable + i * nameentrysize;