Repository: trafficserver Updated Branches: refs/heads/master 4ef8d3914 -> 7b9689f49
TS-3143: Create new Regex class that uses PCRE JIT Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/7b9689f4 Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/7b9689f4 Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/7b9689f4 Branch: refs/heads/master Commit: 7b9689f4976faf4cc597d2b1513329029e3862b1 Parents: 4ef8d39 Author: Phil Sorber <[email protected]> Authored: Fri Oct 17 14:36:54 2014 -0600 Committer: Phil Sorber <[email protected]> Committed: Tue Oct 28 22:33:49 2014 -0600 ---------------------------------------------------------------------- .gitignore | 2 + CHANGES | 2 + lib/ts/Makefile.am | 6 ++- lib/ts/Regex.cc | 131 ++++++++++++++++++++++++++++++++++++---------- lib/ts/Regex.h | 28 +++++++--- lib/ts/test_Regex.cc | 65 +++++++++++++++++++++++ 6 files changed, 198 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index 46a263f..fa4fadc 100644 --- a/.gitignore +++ b/.gitignore @@ -67,6 +67,8 @@ lib/ts/test_atomic lib/ts/test_freelist lib/ts/test_Map lib/ts/test_Vec +lib/ts/test_geometry +lib/ts/test_Regex lib/perl/lib/Apache/TS.pm iocore/net/test_certlookup http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 4d733f9..99710a1 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,8 @@ -*- coding: utf-8 -*- Changes with Apache Traffic Server 5.2.0 + *) [TS-3143] Create new Regex class that uses PCRE JIT. + *) [TS-3115] Add server response time logging fields. Author: Acácio Centeno <[email protected]> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/Makefile.am ---------------------------------------------------------------------- diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am index 00ff352..a46eb7f 100644 --- a/lib/ts/Makefile.am +++ b/lib/ts/Makefile.am @@ -21,7 +21,7 @@ library_includedir=$(includedir)/ts library_include_HEADERS = apidefs.h noinst_PROGRAMS = mkdfa CompileParseRules -check_PROGRAMS = test_atomic test_freelist test_arena test_List test_Map test_Vec test_geometry +check_PROGRAMS = test_arena test_atomic test_freelist test_geometry test_List test_Map test_Regex test_Vec TESTS = $(check_PROGRAMS) AM_CPPFLAGS = -I$(top_srcdir)/lib @@ -215,6 +215,10 @@ test_Map_SOURCES = test_Map.cc test_Map_LDADD = libtsutil.la @LIBTCL@ @LIBPCRE@ test_Map_LDFLAGS = @EXTRA_CXX_LDFLAGS@ @LIBTOOL_LINK_FLAGS@ +test_Regex_SOURCES = test_Regex.cc +test_Regex_LDADD = libtsutil.la @LIBTCL@ @LIBPCRE@ +test_Regex_LDFLAGS = @EXTRA_CXX_LDFLAGS@ @LIBTOOL_LINK_FLAGS@ + test_Vec_SOURCES = test_Vec.cc test_Vec_LDADD = libtsutil.la @LIBTCL@ @LIBPCRE@ test_Vec_LDFLAGS = @EXTRA_CXX_LDFLAGS@ @LIBTOOL_LINK_FLAGS@ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/Regex.cc ---------------------------------------------------------------------- diff --git a/lib/ts/Regex.cc b/lib/ts/Regex.cc index e2b9503..3b08138 100644 --- a/lib/ts/Regex.cc +++ b/lib/ts/Regex.cc @@ -24,16 +24,106 @@ #include "libts.h" #include "Regex.h" +#ifdef PCRE_CONFIG_JIT +struct RegexThreadKey +{ + RegexThreadKey() { + pthread_key_create(&this->key, (void (*)(void *)) &pcre_jit_stack_free); + } + + pthread_key_t key; +}; + +static RegexThreadKey k; + +static pcre_jit_stack * +get_jit_stack(void *data ATS_UNUSED) +{ + pcre_jit_stack *jit_stack; + + if ((jit_stack = (pcre_jit_stack *) pthread_getspecific(k.key)) == NULL) { + jit_stack = pcre_jit_stack_alloc(ats_pagesize(), 1024 * 1024); // 1 page min and 1MB max + pthread_setspecific(k.key, (void *)jit_stack); + } + + return jit_stack; +} +#endif + +bool +Regex::compile(const char *pattern, unsigned flags) +{ + const char *error; + int erroffset; + int options = 0; + int study_opts = 0; + + if (regex) + return false; + + if (flags & RE_CASE_INSENSITIVE) { + options |= PCRE_CASELESS; + } + + if (flags & RE_ANCHORED) { + options |= PCRE_ANCHORED; + } + + regex = pcre_compile(pattern, options, &error, &erroffset, NULL); + if (error) { + regex = NULL; + return false; + } + +#ifdef PCRE_CONFIG_JIT + study_opts |= PCRE_STUDY_JIT_COMPILE; +#endif + + regex_extra = pcre_study(regex, study_opts, &error); + +#ifdef PCRE_CONFIG_JIT + if (regex_extra) + pcre_assign_jit_stack(regex_extra, &get_jit_stack, NULL); +#endif + + return true; +} + +bool +Regex::exec(const char *str) +{ + return exec(str, strlen(str)); +} + +bool +Regex::exec(const char *str, int length) +{ + int ovector[30], rv; + + rv = pcre_exec(regex, regex_extra, str, length , 0, 0, ovector, countof(ovector)); + return rv > 0 ? true : false; +} + +Regex::~Regex() +{ + if (regex_extra) +#ifdef PCRE_CONFIG_JIT + pcre_free_study(regex_extra); +#else + pcre_free(regex_extra); +#endif + if (regex) + pcre_free(regex); +} + DFA::~DFA() { dfa_pattern * p = _my_patterns; dfa_pattern * t; while(p) { - if (p->_pe) - pcre_free(p->_pe); if (p->_re) - pcre_free(p->_re); + delete p->_re; if(p->_p) ats_free(p->_p); t = p->_next; @@ -45,31 +135,20 @@ DFA::~DFA() dfa_pattern * DFA::build(const char *pattern, unsigned flags) { - const char *error; - int erroffset; dfa_pattern* ret; - int options = PCRE_ANCHORED; - - ret = (dfa_pattern*)ats_malloc(sizeof(dfa_pattern)); - ret->_p = NULL; - - if (flags & RE_CASE_INSENSITIVE) { - options |= PCRE_CASELESS; - } - - if (flags & RE_UNANCHORED) { - options &= ~PCRE_ANCHORED; - } + int rv; - ret->_re = pcre_compile(pattern, options, &error, &erroffset, NULL); - if (error) { - ats_free(ret); - return NULL; + if (!(flags & RE_UNANCHORED)) { + flags |= RE_ANCHORED; } - ret->_pe = pcre_study(ret->_re, 0, &error); + ret = (dfa_pattern*)ats_malloc(sizeof(dfa_pattern)); + ret->_p = NULL; - if (error) { + ret->_re = new Regex(); + rv = ret->_re->compile(pattern, flags); + if (rv == -1) { + delete ret->_re; ats_free(ret); return NULL; } @@ -96,11 +175,9 @@ DFA::compile(const char **patterns, int npatterns, unsigned flags) dfa_pattern *ret = NULL; dfa_pattern *end = NULL; int i; - //char buf[128]; for (i = 0; i < npatterns; i++) { pattern = patterns[i]; - //snprintf(buf,128,"%s",pattern); ret = build(pattern,flags); if (!ret) { continue; @@ -135,12 +212,10 @@ int DFA::match(const char *str, int length) const { int rc; - int ovector[30]; - //int wspace[20]; dfa_pattern * p = _my_patterns; while(p) { - rc = pcre_exec(p->_re, p->_pe, str, length , 0, 0, ovector, 30/*,wspace,20*/); + rc = p->_re->exec(str, length); if (rc > 0) { return p->_idx; } http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/Regex.h ---------------------------------------------------------------------- diff --git a/lib/ts/Regex.h b/lib/ts/Regex.h index f3f82ab..ac45958 100644 --- a/lib/ts/Regex.h +++ b/lib/ts/Regex.h @@ -32,17 +32,32 @@ #include <pcre.h> #endif - enum REFlags { RE_CASE_INSENSITIVE = 0x0001, // default is case sensitive - RE_UNANCHORED = 0x0002 // default is to anchor at the first matching position + RE_UNANCHORED = 0x0002, // default (for DFA) is to anchor at the first matching position + RE_ANCHORED = 0x0004, // default (for Regex) is unanchored +}; + +class Regex +{ +public: + Regex():regex(NULL), regex_extra(NULL) { + } + bool compile(const char *pattern, unsigned flags = 0); + // It is safe to call exec() concurrently on the same object instance + bool exec(const char *str); + bool exec(const char *str, int length); + ~Regex(); + +private: + pcre *regex; + pcre_extra *regex_extra; }; typedef struct __pat { int _idx; - pcre *_re; - pcre_extra *_pe; + Regex *_re; char *_p; __pat * _next; } dfa_pattern; @@ -52,12 +67,12 @@ class DFA public: DFA():_my_patterns(0) { } - + ~DFA(); int compile(const char *pattern, unsigned flags = 0); int compile(const char **patterns, int npatterns, unsigned flags = 0); - + int match(const char *str) const; int match(const char *str, int length) const; @@ -67,5 +82,4 @@ private: dfa_pattern * _my_patterns; }; - #endif /* __TS_REGEX_H__ */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/test_Regex.cc ---------------------------------------------------------------------- diff --git a/lib/ts/test_Regex.cc b/lib/ts/test_Regex.cc new file mode 100644 index 0000000..8ed4323 --- /dev/null +++ b/lib/ts/test_Regex.cc @@ -0,0 +1,65 @@ +/* + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <ink_assert.h> +#include <ink_defs.h> +#include "Regex.h" + +typedef struct { + char subject[100]; + bool match; +} subject_match_t; + +typedef struct { + char regex[100]; + subject_match_t tests[4]; +} test_t; + +static const test_t test_data[] = { + {"^foo", {{"foo", true}, + {"bar", false}, + {"foobar", true}, + {"foobarbaz", true}}}, + {"foo$", {{"foo", true}, + {"bar", false}, + {"foobar", false}, + {"foobarbaz", false}}}, +}; + +static void test_basic() +{ + for (unsigned int i = 0; i < countof(test_data); i++) { + Regex r; + + printf("Regex: %s\n", test_data[i].regex); + r.compile(test_data[i].regex); + for (unsigned int j = 0; j < countof(test_data[i].tests); j++) { + printf("Subject: %s Result: %s\n", test_data[i].tests[j].subject, test_data[i].tests[j].match ? "true" : "false"); + ink_assert(r.exec(test_data[i].tests[j].subject) == test_data[i].tests[j].match); + } + } +} + +int main(int /* argc ATS_UNUSED */, char **/* argv ATS_UNUSED */) +{ + test_basic(); + printf("test_Regex PASSED\n"); +}
