Hi,

this patch attempts to bring pcre2 support to address the fact that pcre2
is installed more and more over pcre due to its security flaws. If
accepted, it will be for 1.8, but I think somehow this is the good timing
rather than now as pcre2 is not entirely spread in some of the main Linux
distributions (current debian stable, centos without EPEL repository, ...).

I willingly kept it the changes smooth, simple and close to the pcre's part.

Regards.
From d0e5c5a85af2e06749f0b2f3958692b1cdc92af1 Mon Sep 17 00:00:00 2001
From: David Carlier <devne...@gmail.com>
Date: Mon, 21 Nov 2016 21:25:58 +0000
Subject: [PATCH] MEDIUM: regex: pcre2 support

this adds a support of the newest pcre2 library,
more secure than its older sibling in a cost of a
more complex API.
It works pretty similarly to pcre's part to keep
the overall change smooth,  except :

- we define the string class supported at compile time.
- after matching the ovec data is properly sized, althought
we do not take advantage of it here.
- the lack of jit support is treated less 'dramatically'
as pcre2_jit_compile in this case is 'no-op'.
---
 Makefile               | 54 +++++++++++++++++++++++++++++
 include/common/regex.h | 32 +++++++++++++++++-
 src/haproxy.c          | 24 ++++++++++++-
 src/regex.c            | 92 +++++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 196 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index b68d2c5..a290cb7 100644
--- a/Makefile
+++ b/Makefile
@@ -14,11 +14,14 @@
 #   USE_NETFILTER        : enable netfilter on Linux. Automatic.
 #   USE_PCRE             : enable use of libpcre for regex. Recommended.
 #   USE_PCRE_JIT         : enable JIT for faster regex on libpcre >= 8.32
+#   USE_PCRE2            : enable use of libpcre2 for regex.
+#   USE_PCRE2_JIT        : enable JIT for faster regex on libpcre2
 #   USE_POLL             : enable poll(). Automatic.
 #   USE_PRIVATE_CACHE    : disable shared memory cache of ssl sessions.
 #   USE_PTHREAD_PSHARED  : enable pthread process shared mutex on sslcache.
 #   USE_REGPARM          : enable regparm optimization. Recommended on x86.
 #   USE_STATIC_PCRE      : enable static libpcre. Recommended.
+#   USE_STATIC_PCRE2     : enable static libpcre2.
 #   USE_TPROXY           : enable transparent proxy. Automatic.
 #   USE_LINUX_TPROXY     : enable full transparent proxy. Automatic.
 #   USE_LINUX_SPLICE     : enable kernel 2.6 splicing. Automatic.
@@ -671,6 +674,9 @@ OPTIONS_LDFLAGS += $(if $(WURFL_LIB),-L$(WURFL_LIB)) -lwurfl
 endif
 
 ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),)
+ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
+$(error cannot compile both PCRE and PCRE2 support)
+endif
 # PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths,
 # by appending /include and /lib respectively. If your system does not use the
 # same sub-directories, simply force these variables instead of PCREDIR. It is
@@ -702,6 +708,54 @@ BUILD_OPTIONS   += $(call ignore_implicit,USE_PCRE_JIT)
 endif
 endif
 
+ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),)
+PCRE2DIR	:= $(shell pcre2-config --prefix 2>/dev/null || echo /usr/local)
+ifneq ($(PCRE2DIR),)
+PCRE2_INC       := $(PCRE2DIR)/include
+PCRE2_LIB       := $(PCRE2DIR)/lib
+
+ifeq ($(PCRE2_WIDTH),)
+PCRE2_WIDTH	= 8
+endif
+
+ifneq ($(PCRE2_WIDTH),8)
+ifneq ($(PCRE2_WIDTH),16)
+ifneq ($(PCRE2_WIDTH),32)
+$(error PCRE2_WIDTH needs to be set to either 8,16 or 32)
+endif
+endif
+endif
+
+
+PCRE2_LDFLAGS	:= $(shell pcre2-config --libs$(PCRE2_WIDTH) 2>/dev/null || echo -L/usr/local/lib -lpcre2-$(PCRE2_WIDTH))
+
+ifeq ($(PCRE2_LDFLAGS),)
+$(error libpcre2-$(PCRE2_WIDTH) not found)
+else
+ifeq ($(PCRE2_WIDTH),8)
+PCRE2_LDFLAGS	+= -lpcre2-posix
+endif
+endif
+
+OPTIONS_CFLAGS	+= -DUSE_PCRE2 -DPCRE2_CODE_UNIT_WIDTH=$(PCRE2_WIDTH)
+OPTIONS_CFLAGS  += $(if $(PCRE2_INC), -I$(PCRE2_INC))
+
+ifneq ($(USE_STATIC_PCRE2),)
+OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic
+BUILD_OPTIONS   += $(call ignore_implicit,USE_STATIC_PCRE2)
+else
+OPTIONS_LDFLAGS += $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS)
+BUILD_OPTIONS   += $(call ignore_implicit,USE_PCRE2)
+endif
+
+ifneq ($(USE_PCRE2_JIT),)
+OPTIONS_CFLAGS  += -DUSE_PCRE2_JIT
+BUILD_OPTIONS   += $(call ignore_implicit,USE_PCRE2_JIT)
+endif
+
+endif
+endif
+
 # TCP Fast Open
 ifneq ($(USE_TFO),)
 OPTIONS_CFLAGS  += -DUSE_TFO
diff --git a/include/common/regex.h b/include/common/regex.h
index 8a1703f..2f171b3 100644
--- a/include/common/regex.h
+++ b/include/common/regex.h
@@ -36,7 +36,11 @@
 #define PCRE_STUDY_JIT_COMPILE 0
 #endif
 
-#else /* no PCRE */
+#elif USE_PCRE2
+#include <pcre2.h>
+#include <pcre2posix.h>
+
+#else /* no PCRE, nor PCRE2 */
 #include <regex.h>
 #endif
 
@@ -49,6 +53,8 @@ struct my_regex {
 #error "The PCRE lib doesn't support JIT. Change your lib, or remove the option USE_PCRE_JIT."
 #endif
 #endif
+#elif USE_PCRE2
+	pcre2_code *reg;
 #else /* no PCRE */
 	regex_t regex;
 #endif
@@ -95,6 +101,17 @@ static inline int regex_exec(const struct my_regex *preg, char *subject) {
 	if (pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, 0, NULL, 0) < 0)
 		return 0;
 	return 1;
+#elif defined(USE_PCRE2)
+	pcre2_match_data *pm;
+	int ret;
+
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject),
+		0, 0, pm, NULL);
+	pcre2_match_data_free(pm);
+	if (ret < 0)
+		return 0;
+	return 1;
 #else
 	int match;
 	match = regexec(&preg->regex, subject, 0, NULL, 0);
@@ -115,6 +132,17 @@ static inline int regex_exec2(const struct my_regex *preg, char *subject, int le
 	if (pcre_exec(preg->reg, preg->extra, subject, length, 0, 0, NULL, 0) < 0)
 		return 0;
 	return 1;
+#elif defined(USE_PCRE2)
+	pcre2_match_data *pm;
+	int ret;
+
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length,
+		0, 0, pm, NULL);
+	pcre2_match_data_free(pm);
+	if (ret < 0)
+		return 0;
+	return 1;
 #else
 	int match;
 	char old_char = subject[length];
@@ -143,6 +171,8 @@ static inline void regex_free(struct my_regex *preg) {
 #else /* PCRE_CONFIG_JIT */
 	pcre_free(preg->extra);
 #endif /* PCRE_CONFIG_JIT */
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+	pcre2_code_free(preg->reg);
 #else
 	regfree(&preg->regex);
 #endif
diff --git a/src/haproxy.c b/src/haproxy.c
index 728c8e5..c9745b7 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -389,8 +389,30 @@ void display_build_opts()
 	printf("no (USE_PCRE_JIT not set)");
 #endif
 	printf("\n");
+#endif
+
+#ifdef USE_PCRE2
+	printf("Built with PCRE2 version : %s\n", (HAP_XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
+		HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
+		HAP_XSTRING(PCRE2_MAJOR.PCRE2_MINOR) HAP_XSTRING(PCRE2_PRERELEASE PCRE2_DATE));
+	printf("\nPCRE2 library supports JIT : ");
+#ifdef USE_PCRE2_JIT
+	{
+		int r;
+		pcre2_config(PCRE2_CONFIG_JIT, &r);
+		if (r)
+			printf("yes");
+		else
+			printf("no (libpcre2 build without JIT?)");
+	}
 #else
-	printf("Built without PCRE support (using libc's regex instead)\n");
+	printf("no (USE_PCRE2_JIT not set)");
+#endif
+	printf("\n");
+#endif
+
+#if !defined(USE_PCRE) && !defined(USE_PCRE2)
+	printf("Built without PCRE or PCRE2 support (using libc's regex instead)\n");
 #endif
 
 #ifdef USE_LUA
diff --git a/src/regex.c b/src/regex.c
index be4fe5b..9115c1e 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -157,9 +157,14 @@ const char *chain_regex(struct hdr_exp **head, struct my_regex *preg,
  */
 int regex_exec_match(const struct my_regex *preg, const char *subject,
                      size_t nmatch, regmatch_t pmatch[], int flags) {
-#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
 	int ret;
+#ifdef USE_PCRE2
+	PCRE2_SIZE *matches;
+	pcre2_match_data *pm;
+#else
 	int matches[MAX_MATCH * 3];
+#endif
 	int enmatch;
 	int i;
 	int options;
@@ -168,15 +173,20 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
 	 * match i the maximum value for match, in fact this
 	 * limit is not applyied.
 	 */
+
 	enmatch = nmatch;
 	if (enmatch > MAX_MATCH)
 		enmatch = MAX_MATCH;
 
 	options = 0;
 	if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+		options |= PCRE2_NOTBOL;
+#else
 		options |= PCRE_NOTBOL;
+#endif
 
-	/* The value returned by pcre_exec() is one more than the highest numbered
+	/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
 	 * pair that has been set. For example, if two substrings have been captured,
 	 * the returned value is 3. If there are no capturing subpatterns, the return
 	 * value from a successful match is 1, indicating that just the first pair of
@@ -185,9 +195,22 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
 	 * It seems that this function returns 0 if it detect more matches than avalaible
 	 * space in the matches array.
 	 */
+#ifdef USE_PCRE2
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)strlen(subject), 0, options, pm, NULL);
+
+	if (ret < 0) {
+		pcre2_match_data_free(pm);
+		return 0;
+	}
+
+	matches = pcre2_get_ovector_pointer(pm);
+#else
 	ret = pcre_exec(preg->reg, preg->extra, subject, strlen(subject), 0, options, matches, enmatch * 3);
+
 	if (ret < 0)
 		return 0;
+#endif
 
 	if (ret == 0)
 		ret = enmatch;
@@ -203,6 +226,9 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
 		pmatch[i].rm_so = -1;
 		pmatch[i].rm_eo = -1;
 	}
+#ifdef USE_PCRE2
+	pcre2_match_data_free(pm);
+#endif
 	return 1;
 #else
 	int match;
@@ -225,9 +251,14 @@ int regex_exec_match(const struct my_regex *preg, const char *subject,
  */
 int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
                       size_t nmatch, regmatch_t pmatch[], int flags) {
-#if defined(USE_PCRE) || defined(USE_PCRE_JIT)
+#if defined(USE_PCRE) || defined(USE_PCRE_JIT) || defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
 	int ret;
+#ifdef USE_PCRE2
+	PCRE2_SIZE *matches;
+	pcre2_match_data *pm;
+#else
 	int matches[MAX_MATCH * 3];
+#endif
 	int enmatch;
 	int i;
 	int options;
@@ -242,9 +273,13 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
 
 	options = 0;
 	if (flags & REG_NOTBOL)
+#ifdef USE_PCRE2
+		options |= PCRE2_NOTBOL;
+#else
 		options |= PCRE_NOTBOL;
+#endif
 
-	/* The value returned by pcre_exec() is one more than the highest numbered
+	/* The value returned by pcre_exec()/pcre2_match() is one more than the highest numbered
 	 * pair that has been set. For example, if two substrings have been captured,
 	 * the returned value is 3. If there are no capturing subpatterns, the return
 	 * value from a successful match is 1, indicating that just the first pair of
@@ -253,9 +288,21 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
 	 * It seems that this function returns 0 if it detect more matches than avalaible
 	 * space in the matches array.
 	 */
+#ifdef USE_PCRE2
+	pm = pcre2_match_data_create_from_pattern(preg->reg, NULL);
+	ret = pcre2_match(preg->reg, (PCRE2_SPTR)subject, (PCRE2_SIZE)length, 0, options, pm, NULL);
+
+	if (ret < 0) {
+		pcre2_match_data_free(pm);
+		return 0;
+	}
+
+	matches = pcre2_get_ovector_pointer(pm);
+#else
 	ret = pcre_exec(preg->reg, preg->extra, subject, length, 0, options, matches, enmatch * 3);
 	if (ret < 0)
 		return 0;
+#endif
 
 	if (ret == 0)
 		ret = enmatch;
@@ -271,6 +318,9 @@ int regex_exec_match2(const struct my_regex *preg, char *subject, int length,
 		pmatch[i].rm_so = -1;
 		pmatch[i].rm_eo = -1;
 	}
+#ifdef USE_PCRE2
+	pcre2_match_data_free(pm);
+#endif
 	return 1;
 #else
 	char old_char = subject[length];
@@ -310,6 +360,40 @@ int regex_comp(const char *str, struct my_regex *regex, int cs, int cap, char **
 		memprintf(err, "failed to compile regex '%s' (error=%s)", str, error);
 		return 0;
 	}
+#elif defined(USE_PCRE2) || defined(USE_PCRE2_JIT)
+	int flags = 0;
+	int errn;
+#if defined(USE_PCRE2_JIT)
+	int jit;
+#endif
+	PCRE2_UCHAR error[256];
+	PCRE2_SIZE erroffset;
+
+	if (!cs)
+		flags |= PCRE2_CASELESS;
+	if (!cap)
+		flags |= PCRE2_NO_AUTO_CAPTURE;
+
+	regex->reg = pcre2_compile((PCRE2_SPTR)str, PCRE2_ZERO_TERMINATED, flags, &errn, &erroffset, NULL);
+	if (!regex->reg) {
+		pcre2_get_error_message(errn, error, sizeof(error));
+		memprintf(err, "regex '%s' is invalid (error=%s, erroffset=%zu)", str, error, erroffset);
+		return 0;
+	}
+
+#if defined(USE_PCRE2_JIT)
+	jit = pcre2_jit_compile(regex->reg, PCRE2_JIT_COMPLETE);
+	/*
+	 * We end if it is an error not related to lack of JIT support
+	 * in a case of JIT support missing pcre2_jit_compile is "no-op"
+	 */
+	if (jit < 0 && jit != PCRE2_ERROR_JIT_BADOPTION) {
+		pcre2_code_free(regex->reg);
+		memprintf(err, "regex '%s' jit compilation failed", str);
+		return 0;
+	}
+#endif
+
 #else
 	int flags = REG_EXTENDED;
 
-- 
2.7.4

Reply via email to