After a discussion on irc, I've started lookign at adding pcre support
to apr-util. The patch to start this off is below...
Not perfect and not quite complete, but I said I'd post early on this
and let others look.
david
Property changes on: regex
___________________________________________________________________
Name: svn:ignore
+ .libs
Index: regex/apr_regex.c
===================================================================
--- regex/apr_regex.c (revision 0)
+++ regex/apr_regex.c (revision 0)
@@ -0,0 +1,252 @@
+/**************************************************
+ * Perl-Compatible Regular Expressions *
+ **************************************************/
+
+/*
+This is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language. See
+the file Tech.Notes for some information on the internals.
+
+This module is a wrapper that provides a POSIX API to the underlying PCRE
+functions.
+
+Written by: Philip Hazel <[EMAIL PROTECTED]>
+
+ Copyright (c) 1997-2004 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright
notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#include "apr_strings.h"
+#include "apr_regex.h"
+
+#include "apu_config.h"
+
+#if APU_HAVE_PCRE
+
+#include "pcre.h"
+
+#define APR_WANT_STRFUNC
+#include "apr_want.h"
+
+/* Table of error strings corresponding to POSIX error codes; must be
+ * kept in synch with apr_regex.h's APR_REG_E* definitions. */
+
+static const char *const pstring[] = {
+ "", /* Dummy for value 0 */
+ "internal error", /* APR_REG_ASSERT */
+ "failed to get memory", /* APR_REG_ESPACE */
+ "bad argument", /* APR_REG_INVARG */
+ "match failed" /* APR_REG_NOMATCH */
+};
+
+APR_DECLARE(apr_size_t) apr_regerror(int errcode, const apr_regex_t *preg,
+ char *errbuf, apr_size_t errbuf_size)
+{
+ const char *message, *addmessage;
+ apr_size_t length, addlength;
+
+ message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
+ "unknown error code" : pstring[errcode];
+ length = strlen(message) + 1;
+
+ addmessage = " at offset ";
+ addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
+ strlen(addmessage) + 6 : 0;
+
+ if (errbuf_size > 0) {
+ if (addlength > 0 && errbuf_size >= length + addlength)
+ apr_snprintf(errbuf, sizeof errbuf,
+ "%s%s%-6d", message, addmessage,
+ (int)preg->re_erroffset);
+ else {
+ strncpy(errbuf, message, errbuf_size - 1);
+ errbuf[errbuf_size-1] = 0;
+ }
+ }
+
+ return length + addlength;
+}
+
+/*************************************************
+* Free store held by a regex *
+*************************************************/
+
+APR_DECLARE(void) apr_regfree(apr_regex_t *preg)
+{
+ (pcre_free)(preg->re_pcre);
+}
+
+/*************************************************
+* Compile a regular expression *
+*************************************************/
+/*
+Arguments:
+ preg points to a structure for recording the compiled expression
+ pattern the pattern to compile
+ cflags compilation flags
+
+Returns: 0 on success
+ various non-zero codes on failure
+*/
+
+APR_DECLARE(apr_status_t) apr_regcomp(apr_regex_t **preg, const char
*pattern,
+ int cflags, apr_pool_t *p)
+{
+ const char *errorptr;
+ int erroffset;
+ int options = 0;
+ pcre *re = NULL;
+ apr_regex_t *are = NULL;
+
+ if (!p)
+ return APR_ENOPOOL;
+ if (!pattern)
+ return APR_EINVAL;
+
+ if ((cflags & APR_REG_ICASE) != 0)
+ options |= PCRE_CASELESS;
+ if ((cflags & APR_REG_NEWLINE) != 0)
+ options |= PCRE_MULTILINE;
+
+ re = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
+ if (re == NULL)
+ return APR_REG_INVARG;
+
+ are = (apr_regex_t *)apr_pcalloc(p, sizeof(*are));
+ if (!are)
+ return APR_ENOMEM;
+
+ are->re_pcre = re;
+ are->re_erroffset = erroffset;
+ are->pool = p;
+ are->re_nsub = pcre_info((const pcre *)are->re_pcre, NULL, NULL);
+ *preg = are;
+
+ return APR_SUCCESS;
+}
+
+/*************************************************
+* Match a regular expression *
+*************************************************/
+
+/* Unfortunately, PCRE requires 3 ints of working space for each captured
+substring, so we have to get and release working store instead of just
using
+the POSIX structures as was done in earlier releases when PCRE needed
only 2
+ints. However, if the number of possible capturing brackets is small, use a
+block of store on the stack, to reduce the use of malloc/free. The
threshold is
+in a macro that can be changed at configure time. */
+
+APR_DECLARE(apr_status_t) apr_regexec(apr_regmatch_t **pmatch,
+ const apr_regex_t *preg,
+ const char *string,
+ apr_size_t nmatch,
+ int eflags)
+{
+ int rc;
+ int options = PCRE_NOTEMPTY;
+ int *ovector = NULL;
+ apr_regmatch_t *matches = NULL;
+
+ *pmatch = matches;
+
+ if ((eflags & APR_REG_NOTBOL) != 0)
+ options |= PCRE_NOTBOL;
+ if ((eflags & APR_REG_NOTEOL) != 0)
+ options |= PCRE_NOTEOL;
+
+ ((apr_regex_t *)preg)->re_erroffset = (apr_size_t)(-1); /* Only
has meaning after compile */
+
+ ovector = (int *)apr_palloc(preg->pool, sizeof(int) * nmatch * 3);
+ if (!ovector)
+ return APR_ENOMEM;
+ matches = (apr_regmatch_t *)apr_palloc(preg->pool, sizeof(*pmatch)
* nmatch);
+ if (!matches)
+ return APR_ENOMEM;
+
+ rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string,
+ (int)strlen(string),
+ 0, options, ovector, nmatch * 3);
+
+ /* All captured slots were filled in */
+ if (rc == 0)
+ rc = nmatch;
+
+ if (rc >= 0) {
+ int i;
+
+ /* Fill in returned slots with the data */
+ for (i = 0; i < rc; i++) {
+ matches[i].rm_so = ovector[i * 2];
+ matches[i].rm_eo = ovector[i * 2 + 1];
+ }
+
+ /* Fill in remaining clots with -1 for both settings */
+ for (; i < nmatch; i++)
+ matches[i].rm_so = matches[i].rm_eo = -1;
+
+ *pmatch = matches;
+ return APR_SUCCESS;
+ }
+
+ switch(rc) {
+ case PCRE_ERROR_NOMATCH:
+ return APR_REG_NOMATCH;
+ case PCRE_ERROR_NULL:
+ return APR_REG_INVARG;
+ case PCRE_ERROR_BADOPTION:
+ return APR_REG_INVARG;
+ case PCRE_ERROR_BADMAGIC:
+ return APR_REG_INVARG;
+ case PCRE_ERROR_UNKNOWN_NODE:
+ return APR_REG_ASSERT;
+ case PCRE_ERROR_NOMEMORY:
+ return APR_REG_ESPACE;
+#ifdef PCRE_ERROR_MATCHLIMIT
+ case PCRE_ERROR_MATCHLIMIT:
+ return APR_REG_ESPACE;
+#endif
+#ifdef PCRE_ERROR_BADUTF8
+ case PCRE_ERROR_BADUTF8:
+ return APR_REG_INVARG;
+#endif
+#ifdef PCRE_ERROR_BADUTF8_OFFSET
+ case PCRE_ERROR_BADUTF8_OFFSET:
+ return APR_REG_INVARG;
+#endif
+ }
+ return APR_REG_ASSERT;
+}
+
+/* End of pcreposix.c */
+#else /* APU_HAVE_PCRE */
+
+#error
+
+#endif /* ! APU_HAVE_PCRE */
Index: test/Makefile.in
===================================================================
--- test/Makefile.in (revision 423238)
+++ test/Makefile.in (working copy)
@@ -93,7 +93,7 @@
testall_OBJECTS = teststrmatch.lo testuri.lo testuuid.lo abts.lo
testutil.lo \
testbuckets.lo testpass.lo testmd4.lo testmd5.lo testldap.lo \
- testdaterfc.lo testdbd.lo
+ testdaterfc.lo testdbd.lo testregex.lo
testall_LDADD = $(TARGET_LIB_PATH)
testall: $(testall_OBJECTS) $(testall_LDADD)
$(LINK) $(APRUTIL_LDFLAGS) $(testall_OBJECTS) $(testall_LDADD)
$(PROGRAM_DEPENDENCIES)
Index: test/testregex.c
===================================================================
--- test/testregex.c (revision 0)
+++ test/testregex.c (revision 0)
@@ -0,0 +1,83 @@
+/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
+ * applicable.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "testutil.h"
+#include "apr_general.h"
+#include "apr_strings.h"
+#include "apr_regex.h"
+
+struct regexTest {
+ const char *pattern;
+ int options;
+ apr_status_t rv;
+} regexTests[] = {
+ { "[A-Za-z]*" , 0, APR_SUCCESS },
+};
+
+struct testCase {
+ int nPattern;
+ const char *string;
+ int nmatches;
+ int options;
+ int rv;
+} cases[] = {
+ { 0, "abc123", 1, 0, APR_SUCCESS },
+ { 0, "abcdefghijk", 1, 0, APR_SUCCESS },
+ { 0, "123456", 1, 0, APR_REG_NOMATCH },
+};
+
+void runtests(abts_case *tc, void *data)
+{
+ struct regexTest *rt;
+ int i;
+ apr_status_t rv;
+ apr_regex_t *re;
+ apr_regmatch_t *matches;
+
+ for (i = 0; i < sizeof(regexTests) / sizeof(*rt); i++) {
+ int j;
+ rt = ®exTests[i];
+ rv = apr_regcomp(&re, rt->pattern, rt->options, p);
+ ABTS_INT_EQUAL(tc, rv, rt->rv);
+
+ if (rv == APR_SUCCESS) {
+ struct testCase *t;
+ int k;
+ for (j= 0; j < sizeof(cases) / sizeof(*t); j++) {
+ t = &cases[j];
+ if (t->nPattern != i)
+ continue;
+ rv = apr_regexec(&matches, re, t->string, t->nmatches,
+ t->options);
+ ABTS_INT_EQUAL(tc, rv, t->rv);
+ }
+ apr_regfree(re);
+ }
+ }
+}
+
+abts_suite *testregex(abts_suite *suite)
+{
+ suite = ADD_SUITE(suite);
+
+ abts_run_test(suite, runtests, NULL);
+
+ return suite;
+}
+
Index: test/abts_tests.h
===================================================================
--- test/abts_tests.h (revision 423238)
+++ test/abts_tests.h (working copy)
@@ -32,7 +32,8 @@
{testmd5},
{testldap},
{testdbd},
- {testdaterfc}
+ {testdaterfc},
+ {testregex},
};
#endif /* APR_TEST_INCLUDES */
Index: test/testutil.h
===================================================================
--- test/testutil.h (revision 423238)
+++ test/testutil.h (working copy)
@@ -53,5 +53,6 @@
abts_suite *testldap(abts_suite *suite);
abts_suite *testdbd(abts_suite *suite);
abts_suite *testdaterfc(abts_suite *suite);
+abts_suite *testregex(abts_suite *suite);
#endif /* APR_TEST_INCLUDES */
Index: build.conf
===================================================================
--- build.conf (revision 423238)
+++ build.conf (working copy)
@@ -21,6 +21,7 @@
xlate/*.c
dbd/*.c
ssl/*.c
+ regex/*.c
# we have no platform-specific subdirs
platform_dirs =
Index: configure.in
===================================================================
--- configure.in (revision 423238)
+++ configure.in (working copy)
@@ -15,6 +15,7 @@
sinclude(build/find_apr.m4)
sinclude(build/dbm.m4)
sinclude(build/dbd.m4)
+sinclude(build/pcre.m4)
sinclude(build/ssl.m4)
dnl Generate ./config.nice for reproducing runs of configure
@@ -160,6 +161,7 @@
fi
APU_FIND_SSL
+APU_FIND_PCRE
so_ext=$APR_SO_EXT
lib_target=$APR_LIB_TARGET
Index: build/pcre.m4
===================================================================
--- build/pcre.m4 (revision 0)
+++ build/pcre.m4 (revision 0)
@@ -0,0 +1,73 @@
+dnl -------------------------------------------------------- -*-
autoconf -*-
+dnl Copyright 2006 The Apache Software Foundation or its licensors, as
+dnl applicable.
+dnl
+dnl Licensed under the Apache License, Version 2.0 (the "License");
+dnl you may not use this file except in compliance with the License.
+dnl You may obtain a copy of the License at
+dnl
+dnl http://www.apache.org/licenses/LICENSE-2.0
+dnl
+dnl Unless required by applicable law or agreed to in writing, software
+dnl distributed under the License is distributed on an "AS IS" BASIS,
+dnl WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied.
+dnl See the License for the specific language governing permissions and
+dnl limitations under the License.
+
+dnl
+dnl PCRE support
+dnl
+dnl Unless we are asked NOT to search for PCRE by the user
+dnl passing --with-pcre=no to configure, we search the de
+dnl
+
+dnl APU_FIND_PCRE: look for PCRE libraries and headers
+dnl
+AC_DEFUN([APU_FIND_PCRE], [
+ apu_have_pcre=0
+
+ AC_ARG_WITH([PCRE], [
+ --with-pcre
+ ], [
+ if test "$withval" = "yes"; then
+ apu_have_pcre=1
+
+ fi
+ ], [
+ APU_FIND_LINKEDPCRE
+ ])
+
+ if test "$apu_have_pcre" = "1"; then
+ AC_DEFINE([APU_HAVE_PCRE], 1, [Define that we have libpcre available])
+ fi
+
+])
+dnl
+
+AC_DEFUN([APU_FIND_PCRE], [
+ echo $ac_n "Checking for PCRE...${nl}"
+
+ AC_ARG_WITH(pcre, AC_HELP_STRING(--with-pcre=PATH, Path to PCRE library))
+
+ if test -d "$with_pcre" && test -x "$with_pcre/bin/pcre-config"; then
+ PCRE_CONFIG=$with_pcre/bin/pcre-config
+ elif test -x "$with_pcre"; then
+ PCRE_CONFIG=$with_pcre
+ else
+ echo $ac_n "Searching for pcre-config in PATH....${nl}"
+ AC_PATH_PROG(PCRE_CONFIG, pcre-config, false)
+ fi
+
+ if $PCRE_CONFIG --version >/dev/null 2>&1; then :;
+ echo $ac_n "Found ${PCRE_CONFIG}${nl}"
+ else
+ AC_MSG_ERROR([Did not find pcre-config script at ${PCRE_CONFIG}])
+ fi
+
+ AC_DEFINE([APU_HAVE_PCRE], 1, [Define that we have libpcre available])
+ APR_ADDTO(APRUTIL_CPPFLAGS, [`$PCRE_CONFIG --cflags`])
+ APR_ADDTO(APRUTIL_LDFLAGS, [`$PCRE_CONFIG --libs`])
+])
+dnl
+
+