Repository: trafficserver
Updated Branches:
  refs/heads/master 4ef8d3914 -> 7b9689f49


TS-3143: Create new Regex class that uses PCRE JIT


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/7b9689f4
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/7b9689f4
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/7b9689f4

Branch: refs/heads/master
Commit: 7b9689f4976faf4cc597d2b1513329029e3862b1
Parents: 4ef8d39
Author: Phil Sorber <[email protected]>
Authored: Fri Oct 17 14:36:54 2014 -0600
Committer: Phil Sorber <[email protected]>
Committed: Tue Oct 28 22:33:49 2014 -0600

----------------------------------------------------------------------
 .gitignore           |   2 +
 CHANGES              |   2 +
 lib/ts/Makefile.am   |   6 ++-
 lib/ts/Regex.cc      | 131 ++++++++++++++++++++++++++++++++++++----------
 lib/ts/Regex.h       |  28 +++++++---
 lib/ts/test_Regex.cc |  65 +++++++++++++++++++++++
 6 files changed, 198 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 46a263f..fa4fadc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -67,6 +67,8 @@ lib/ts/test_atomic
 lib/ts/test_freelist
 lib/ts/test_Map
 lib/ts/test_Vec
+lib/ts/test_geometry
+lib/ts/test_Regex
 lib/perl/lib/Apache/TS.pm
 
 iocore/net/test_certlookup

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 4d733f9..99710a1 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,8 @@
                                                          -*- coding: utf-8 -*-
 Changes with Apache Traffic Server 5.2.0
 
+  *) [TS-3143] Create new Regex class that uses PCRE JIT.
+
   *) [TS-3115] Add server response time logging fields.
    Author: Acácio Centeno <[email protected]>
 

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/Makefile.am
----------------------------------------------------------------------
diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am
index 00ff352..a46eb7f 100644
--- a/lib/ts/Makefile.am
+++ b/lib/ts/Makefile.am
@@ -21,7 +21,7 @@ library_includedir=$(includedir)/ts
 library_include_HEADERS = apidefs.h
 
 noinst_PROGRAMS = mkdfa CompileParseRules
-check_PROGRAMS = test_atomic test_freelist test_arena test_List test_Map 
test_Vec test_geometry
+check_PROGRAMS = test_arena test_atomic test_freelist test_geometry test_List 
test_Map test_Regex test_Vec
 TESTS = $(check_PROGRAMS)
 
 AM_CPPFLAGS = -I$(top_srcdir)/lib
@@ -215,6 +215,10 @@ test_Map_SOURCES = test_Map.cc
 test_Map_LDADD = libtsutil.la @LIBTCL@ @LIBPCRE@
 test_Map_LDFLAGS = @EXTRA_CXX_LDFLAGS@ @LIBTOOL_LINK_FLAGS@
 
+test_Regex_SOURCES = test_Regex.cc
+test_Regex_LDADD = libtsutil.la @LIBTCL@ @LIBPCRE@
+test_Regex_LDFLAGS = @EXTRA_CXX_LDFLAGS@ @LIBTOOL_LINK_FLAGS@
+
 test_Vec_SOURCES = test_Vec.cc
 test_Vec_LDADD = libtsutil.la @LIBTCL@ @LIBPCRE@
 test_Vec_LDFLAGS = @EXTRA_CXX_LDFLAGS@ @LIBTOOL_LINK_FLAGS@

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/Regex.cc
----------------------------------------------------------------------
diff --git a/lib/ts/Regex.cc b/lib/ts/Regex.cc
index e2b9503..3b08138 100644
--- a/lib/ts/Regex.cc
+++ b/lib/ts/Regex.cc
@@ -24,16 +24,106 @@
 #include "libts.h"
 #include "Regex.h"
 
+#ifdef PCRE_CONFIG_JIT
+struct RegexThreadKey
+{
+  RegexThreadKey() {
+    pthread_key_create(&this->key, (void (*)(void *)) &pcre_jit_stack_free);
+  }
+
+  pthread_key_t key;
+};
+
+static RegexThreadKey k;
+
+static pcre_jit_stack *
+get_jit_stack(void *data ATS_UNUSED)
+{
+  pcre_jit_stack *jit_stack;
+
+  if ((jit_stack = (pcre_jit_stack *) pthread_getspecific(k.key)) == NULL) {
+    jit_stack = pcre_jit_stack_alloc(ats_pagesize(), 1024 * 1024); // 1 page 
min and 1MB max
+    pthread_setspecific(k.key, (void *)jit_stack);
+  }
+
+  return jit_stack;
+}
+#endif
+
+bool
+Regex::compile(const char *pattern, unsigned flags)
+{
+  const char *error;
+  int erroffset;
+  int options = 0;
+  int study_opts = 0;
+
+  if (regex)
+    return false;
+
+  if (flags & RE_CASE_INSENSITIVE) {
+    options |= PCRE_CASELESS;
+  }
+
+  if (flags & RE_ANCHORED) {
+    options |= PCRE_ANCHORED;
+  }
+
+  regex = pcre_compile(pattern, options, &error, &erroffset, NULL);
+  if (error) {
+    regex = NULL;
+    return false;
+  }
+
+#ifdef PCRE_CONFIG_JIT
+  study_opts |= PCRE_STUDY_JIT_COMPILE;
+#endif
+
+  regex_extra = pcre_study(regex, study_opts, &error);
+
+#ifdef PCRE_CONFIG_JIT
+    if (regex_extra)
+      pcre_assign_jit_stack(regex_extra, &get_jit_stack, NULL);
+#endif
+
+  return true;
+}
+
+bool
+Regex::exec(const char *str)
+{
+  return exec(str, strlen(str));
+}
+
+bool
+Regex::exec(const char *str, int length)
+{
+  int ovector[30], rv;
+
+  rv = pcre_exec(regex, regex_extra, str, length , 0, 0, ovector, 
countof(ovector));
+  return rv > 0 ? true : false;
+}
+
+Regex::~Regex()
+{
+  if (regex_extra)
+#ifdef PCRE_CONFIG_JIT
+    pcre_free_study(regex_extra);
+#else
+    pcre_free(regex_extra);
+#endif
+  if (regex)
+    pcre_free(regex);
+}
+
 DFA::~DFA()
 {
   dfa_pattern * p = _my_patterns;
   dfa_pattern * t;
 
   while(p) {
-    if (p->_pe)
-      pcre_free(p->_pe);
     if (p->_re)
-      pcre_free(p->_re);
+      delete p->_re;
     if(p->_p)
       ats_free(p->_p);
     t = p->_next;
@@ -45,31 +135,20 @@ DFA::~DFA()
 dfa_pattern *
 DFA::build(const char *pattern, unsigned flags)
 {
-  const char *error;
-  int erroffset;
   dfa_pattern* ret;
-  int options = PCRE_ANCHORED;
-
-  ret = (dfa_pattern*)ats_malloc(sizeof(dfa_pattern));
-  ret->_p = NULL;
-
-  if (flags & RE_CASE_INSENSITIVE) {
-    options |= PCRE_CASELESS;
-  }
-
-  if (flags & RE_UNANCHORED) {
-    options &= ~PCRE_ANCHORED;
-  }
+  int rv;
 
-  ret->_re = pcre_compile(pattern, options, &error, &erroffset, NULL);
-  if (error) {
-    ats_free(ret);
-    return NULL;
+  if (!(flags & RE_UNANCHORED)) {
+    flags |= RE_ANCHORED;
   }
 
-  ret->_pe = pcre_study(ret->_re, 0, &error);
+  ret = (dfa_pattern*)ats_malloc(sizeof(dfa_pattern));
+  ret->_p = NULL;
 
-  if (error) {
+  ret->_re = new Regex();
+  rv = ret->_re->compile(pattern, flags);
+  if (rv == -1) {
+    delete ret->_re;
     ats_free(ret);
     return NULL;
   }
@@ -96,11 +175,9 @@ DFA::compile(const char **patterns, int npatterns, unsigned 
flags)
   dfa_pattern *ret = NULL;
   dfa_pattern *end = NULL;
   int i;
-  //char buf[128];
 
   for (i = 0; i < npatterns; i++) {
     pattern = patterns[i];
-    //snprintf(buf,128,"%s",pattern);
     ret = build(pattern,flags);
     if (!ret) {
       continue;
@@ -135,12 +212,10 @@ int
 DFA::match(const char *str, int length) const
 {
   int rc;
-  int ovector[30];
-  //int wspace[20];
   dfa_pattern * p = _my_patterns;
 
   while(p) {
-    rc = pcre_exec(p->_re, p->_pe, str, length , 0, 0, ovector, 
30/*,wspace,20*/);
+    rc = p->_re->exec(str, length);
     if (rc > 0) {
       return p->_idx;
     }

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/Regex.h
----------------------------------------------------------------------
diff --git a/lib/ts/Regex.h b/lib/ts/Regex.h
index f3f82ab..ac45958 100644
--- a/lib/ts/Regex.h
+++ b/lib/ts/Regex.h
@@ -32,17 +32,32 @@
 #include <pcre.h>
 #endif
 
-
 enum REFlags
 {
   RE_CASE_INSENSITIVE = 0x0001, // default is case sensitive
-  RE_UNANCHORED = 0x0002        // default is to anchor at the first matching 
position
+  RE_UNANCHORED = 0x0002,       // default (for DFA) is to anchor at the first 
matching position
+  RE_ANCHORED = 0x0004,         // default (for Regex) is unanchored
+};
+
+class Regex
+{
+public:
+  Regex():regex(NULL), regex_extra(NULL) {
+  }
+  bool compile(const char *pattern, unsigned flags = 0);
+  // It is safe to call exec() concurrently on the same object instance
+  bool exec(const char *str);
+  bool exec(const char *str, int length);
+  ~Regex();
+
+private:
+  pcre *regex;
+  pcre_extra *regex_extra;
 };
 
 typedef struct __pat {
   int _idx;
-  pcre *_re;
-  pcre_extra *_pe;
+  Regex *_re;
   char *_p;
   __pat * _next;
 } dfa_pattern;
@@ -52,12 +67,12 @@ class DFA
 public:
   DFA():_my_patterns(0) {
   }
-  
+
   ~DFA();
 
   int compile(const char *pattern, unsigned flags = 0);
   int compile(const char **patterns, int npatterns, unsigned flags = 0);
-  
+
   int match(const char *str) const;
   int match(const char *str, int length) const;
 
@@ -67,5 +82,4 @@ private:
   dfa_pattern * _my_patterns;
 };
 
-
 #endif /* __TS_REGEX_H__ */

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b9689f4/lib/ts/test_Regex.cc
----------------------------------------------------------------------
diff --git a/lib/ts/test_Regex.cc b/lib/ts/test_Regex.cc
new file mode 100644
index 0000000..8ed4323
--- /dev/null
+++ b/lib/ts/test_Regex.cc
@@ -0,0 +1,65 @@
+/*
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+*/
+
+#include <ink_assert.h>
+#include <ink_defs.h>
+#include "Regex.h"
+
+typedef struct {
+  char subject[100];
+  bool match;
+} subject_match_t;
+
+typedef struct {
+  char regex[100];
+  subject_match_t tests[4];
+} test_t;
+
+static const test_t test_data[] = {
+  {"^foo", {{"foo", true},
+            {"bar", false},
+            {"foobar", true},
+            {"foobarbaz", true}}},
+  {"foo$", {{"foo", true},
+            {"bar", false},
+            {"foobar", false},
+            {"foobarbaz", false}}},
+};
+
+static void test_basic()
+{
+  for (unsigned int i = 0; i < countof(test_data); i++) {
+    Regex r;
+
+    printf("Regex: %s\n", test_data[i].regex);
+    r.compile(test_data[i].regex);
+    for (unsigned int j = 0; j < countof(test_data[i].tests); j++) {
+      printf("Subject: %s Result: %s\n", test_data[i].tests[j].subject, 
test_data[i].tests[j].match ? "true" : "false");
+      ink_assert(r.exec(test_data[i].tests[j].subject) == 
test_data[i].tests[j].match);
+    }
+  }
+}
+
+int main(int /* argc ATS_UNUSED */, char **/* argv ATS_UNUSED */)
+{
+  test_basic();
+  printf("test_Regex PASSED\n");
+}

Reply via email to