Updated Branches:
  refs/heads/3.0.x efb863663 -> 325610e76

TS-1459: Backport regex_remap


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/325610e7
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/325610e7
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/325610e7

Branch: refs/heads/3.0.x
Commit: 325610e763d038f21fc23db23ac43fd798193233
Parents: efb8636
Author: Brian Geffon <[email protected]>
Authored: Tue Sep 18 11:41:28 2012 -0700
Committer: Brian Geffon <[email protected]>
Committed: Tue Sep 18 11:41:28 2012 -0700

----------------------------------------------------------------------
 CHANGES                            |    3 +
 STATUS                             |    6 -
 configure.ac                       |    2 +
 plugins/Makefile.am                |    2 +-
 plugins/regex_remap/Makefile.am    |   22 +
 plugins/regex_remap/README         |  146 +++++
 plugins/regex_remap/regex_remap.cc |  901 +++++++++++++++++++++++++++++++
 7 files changed, 1075 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index dab8344..65a9f54 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,4 +1,7 @@
                                                          -*- coding: utf-8 -*-
+Changes with Apache Traffic Server 3.0.6
+  *) [TS-1459] Backport regex_remap
+
 Changes with Apache Traffic Server 3.0.5
 
   *) [TS-1116] Fix build issues with clang + gcc4.7

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/STATUS
----------------------------------------------------------------------
diff --git a/STATUS b/STATUS
index 0562686..b016161 100644
--- a/STATUS
+++ b/STATUS
@@ -42,17 +42,11 @@ A list of all bugs open for the next v3.0.6 release can be 
found at
   http://s.apache.org/ts-3.0.6
 
 PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
-
   *) Don't assume root privileges during make install
    Trunk patch: 
https://git-wip-us.apache.org/repos/asf?p=trafficserver.git;a=commit;h=a862e283734c86d46a57624f9a05f09cfbbee175
    Jira: https://issues.apache.org/jira/browse/TS-1460
    +1: zwoop, igalic, humbedooh
 
-  *) Backport regex_remap
-   patch: 
https://issues.apache.org/jira/secure/attachment/12544604/TS-1459.patch
-   Jira: https://issues.apache.org/jira/browse/TS-1459
-   +1: briang, igalic, jpeach,zwoop
-
   *) Disable gzip compression by default
    Commit: 05da7a90ab16f7bc4c6a3e5c7f4880ef80e9b316
    Jira: https://issues.apache.org/jira/browse/TS-1466

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index f913b21..00cc727 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1240,6 +1240,8 @@ AC_CONFIG_FILES([cop/Makefile])
 # production plugins
 AC_CONFIG_FILES([plugins/Makefile])
 AC_CONFIG_FILES([plugins/conf_remap/Makefile])
+AC_CONFIG_FILES([plugins/regex_remap/Makefile])
+
 # various tools
 AC_CONFIG_FILES([tools/Makefile])
 # example plugins

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/Makefile.am
----------------------------------------------------------------------
diff --git a/plugins/Makefile.am b/plugins/Makefile.am
index 59e1923..b46f44f 100644
--- a/plugins/Makefile.am
+++ b/plugins/Makefile.am
@@ -14,4 +14,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-SUBDIRS = conf_remap
+SUBDIRS = conf_remap regex_remap

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/regex_remap/Makefile.am
----------------------------------------------------------------------
diff --git a/plugins/regex_remap/Makefile.am b/plugins/regex_remap/Makefile.am
new file mode 100644
index 0000000..c79fa7f
--- /dev/null
+++ b/plugins/regex_remap/Makefile.am
@@ -0,0 +1,22 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+CXXFLAGS+=-I$(top_builddir)/proxy/api -I$(top_srcdir)/proxy/api 
-I$(top_srcdir)/lib/ts -I$(top_builddir)/lib/ts
+
+pkglibdir = ${pkglibexecdir}
+pkglib_LTLIBRARIES = regex_remap.la
+regex_remap_la_SOURCES = regex_remap.cc
+regex_remap_la_LDFLAGS = -module -avoid-version -shared

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/regex_remap/README
----------------------------------------------------------------------
diff --git a/plugins/regex_remap/README b/plugins/regex_remap/README
new file mode 100644
index 0000000..c965848
--- /dev/null
+++ b/plugins/regex_remap/README
@@ -0,0 +1,146 @@
+This is a plugin for the Apache Traffic Server, that allows you to
+configure mapping rules based on regular expressions. This is similar to
+what you can accomplish using mod_rewrite in apache, but obviously not
+as flexible or sophisticated (yet).
+
+To use this plugin, configure a remap.config rule like
+
+   map http://a.com http://b.com @plugin=regex_remap.so @pparam=maps.reg
+
+
+An optional argument (@@pparam) with the string "profile" will enable
+profiling of this regex remap rule, e.g.
+
+  ... @pparam=maps.reg @pparam=profile
+
+
+Profiling is very low overhead, and the information is dumped to
+traffic.out, typically in /usr/local/var/logs/trafficserver/traffic.out.
+In order to force a profile dump, you can do
+
+    $ sudo touch /usr/local/etc/trafficserver/remap.config
+    $ sudo traffic_line -x
+
+
+By default, only the path and query string of the URL is
+provided for the regular expressions to match. The following optional
+parameters can be used to modify the plugin instance behavior:
+
+    @pparam=[no-]full-url            [default: off]
+    @pparam=[no-]method              [default: off]
+    @pparam=[no-]query-string        [default: on]
+    @pparam=[no-]matrix-parameters   [default: off]
+
+
+
+If you want the full (original) URL, use the parameter @pparam=full-url.
+For example:
+
+    ... @pparam=maps.reg @pparam=full-url
+
+
+The string that you will need to match against looks like
+
+    http://server/path?query=bar
+
+
+If you also wish to match on the HTTP method used (e.g. "GET"), you must
+use the option @pparam=method. For example:
+
+   ... @pparam=maps.reg @pparam=method
+
+
+With this enabled, the string that you will need to match will look like
+
+    GET/path?query=bar
+
+
+The "method" parameter can also be used in combination with "full-url",
+and the string to match against will then look like
+
+   GEThttp://server.com/path?query=bar
+
+
+The methods are always all upper-case, and always followed by one single
+space. There is no space between the method and the rest of the URL (or
+URI path).
+
+By default, the query string is part of the string that is matched again,
+to turn this off use the option 'no-query-string', e.g.
+
+   ... @pparam=maps.reg @pparam=no-query-string
+
+
+Finally, you can also include the matrix parameters in the string, using the
+option 'matrix-parameters', e.g.
+
+   ... @pparam=maps.reg @pparam=matrix-parameters
+
+
+Note that the path to the plugin must be absolute, and by default it is
+
+    /usr/local/libexec/trafficserver/regex_remap.so
+
+
+The config file (maps.reg above) can be placed anywhere, but unless you
+specify an absolute path (as above), it will default to the directory
+
+  /usr/local/etc/regex_remap
+
+
+A typical regex would look like
+
+    ^/(ogre.*)/more     http://www.ogre.com/$h/$0/$1
+
+
+The regular expression must not contain any white spaces!
+
+When the regular expression is matched, only the URL path + query string is
+matched (without any of the optional configuration options). The path
+will always start with a "/". Various substitution strings are allowed
+on the right hand side:
+
+    $0     - The entire matched string
+    $1-9   - Regular expression groups ($1 first group etc.)
+    $h     - The original host header from the request
+    $f     - The host as used in the "from" portion of the remap rule
+    $t     - The host as used in the "to" portion of the remap rule
+    $p     - The original port number
+    $s     - The scheme (e.g. http) of the request
+    $P     - The entire path of the request
+    $q     - The query part of the request
+    $r     - The path parameters of the request (not implemented yet)
+    $c     - The cookie string from the request
+    $i     - The client IP for this request
+
+
+You can also provide options, similar to how you configure your
+remap.config. The following options are available
+
+    @status=<nnn>               - Force the response code to <nnn>
+    @active_timeout=<nnn>       - Active timeout (in ms)
+    @no_activity_timeout=<nnn>  - No activity timeout (in ms)
+    @connect_timeout=<nnn>      - Connect timeouts (in ms)
+    @dns_timeout=<nnn>          - Connect timeouts (in ms)
+
+
+For example, this can be useful to force a particular response for some
+URLs, e.g.
+
+    ^/(ogre.*)/bad      http://www.examle.com/  @status=404
+
+
+Or, to force a 302 redirect
+
+    ^/oldurl/(.*)$      http://news.example.com/new/$1 @status=302
+
+
+Note: Setting the status to 301 or 302 will force the new URL to be used
+as a redirect (Location:).
+
+
+RELEASES
+--------
+
+Version 2.0
+  - Initial Open Source release.

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/regex_remap/regex_remap.cc
----------------------------------------------------------------------
diff --git a/plugins/regex_remap/regex_remap.cc 
b/plugins/regex_remap/regex_remap.cc
new file mode 100644
index 0000000..2cfea6d
--- /dev/null
+++ b/plugins/regex_remap/regex_remap.cc
@@ -0,0 +1,901 @@
+/** @file
+
+    ATS plugin to do (simple) regular expression remap rules
+
+    @section license License
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#define UNUSED __attribute__ ((unused))
+static char UNUSED rcsId__regex_remap_cc[] = "@(#) $Id$ built on " __DATE__ " 
" __TIME__;
+
+#include "ts/ts.h"
+#include "ts/remap.h"
+#include "ink_config.h"
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+
+#ifdef HAVE_PCRE_PCRE_H
+#include <pcre/pcre.h>
+#else
+#include <pcre.h>
+#endif
+
+#include <ctype.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <fstream>
+#include <string>
+
+// Get some specific stuff from libts, yes, we can do that now that we build 
inside the core.
+#include "ink_platform.h"
+#include "ink_atomic.h"
+#include "ink_time.h"
+
+static const char* PLUGIN_NAME = "regex_remap";
+
+// Constants
+static const int OVECCOUNT = 30; // We support $0 - $9 x2 ints, and this needs 
to be 1.5x that
+static const int MAX_SUBS = 32;   // No more than 32 substitution variables in 
the subst string
+
+// TODO: This should be "autoconf'ed" or something ...
+#define DEFAULT_PATH "/usr/local/etc/regex_remap/"
+
+// Substitutions other than regex matches
+enum ExtraSubstitutions {
+  SUB_HOST = 11,
+  SUB_FROM_HOST = 12,
+  SUB_TO_HOST = 13,
+  SUB_PORT = 14,
+  SUB_SCHEME = 15,
+  SUB_PATH = 16,
+  SUB_QUERY = 17,
+  SUB_MATRIX = 18,
+  SUB_CLIENT_IP = 19,
+};
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Class holding one request URL's component, to simplify the code and
+// length calculations (we need all of them).
+//
+struct UrlComponents
+{
+  UrlComponents()
+    : scheme(NULL), host(NULL), path(NULL), query(NULL), matrix(NULL), port(0),
+      scheme_len(0), host_len(0), path_len(0), query_len(0), matrix_len(0), 
url_len(0)
+  {}
+
+  void populate(TSRemapRequestInfo *rri)
+  {
+    scheme = TSUrlSchemeGet(rri->requestBufp, rri->requestUrl, &scheme_len);
+    host = TSUrlHostGet(rri->requestBufp, rri->requestUrl, &host_len);
+    path = TSUrlPathGet(rri->requestBufp, rri->requestUrl, &path_len);
+    query = TSUrlHttpQueryGet(rri->requestBufp, rri->requestUrl, &query_len);
+    matrix = TSUrlHttpParamsGet(rri->requestBufp, rri->requestUrl, 
&matrix_len);
+    port = TSUrlPortGet(rri->requestBufp, rri->requestUrl);
+
+    url_len = scheme_len + host_len + path_len + query_len + matrix_len + 32;
+  }
+
+  const char* scheme;
+  const char* host;
+  const char* path;
+  const char* query;
+  const char* matrix;
+  int port;
+
+  int scheme_len;
+  int host_len;
+  int path_len;
+  int query_len;
+  int matrix_len;
+
+  int url_len; // Full length, of all components
+};
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Class encapsulating one regular expression (and the linked list).
+//
+class RemapRegex
+{
+ public:
+  RemapRegex(const std::string& reg, const std::string& sub, const 
std::string& opt) :
+    _num_subs(-1), _rex(NULL), _extra(NULL), _order(-1), _simple(false),
+    _active_timeout(-1), _no_activity_timeout(-1), _connect_timeout(-1), 
_dns_timeout(-1)
+  {
+    TSDebug(PLUGIN_NAME, "Calling constructor");
+
+    _status = static_cast<TSHttpStatus>(0);
+
+    if (!reg.empty()) {
+      if (reg == ".") {
+        TSDebug(PLUGIN_NAME, "Rule is simple, and fast!");
+        _simple = true;
+      }
+      _rex_string = TSstrdup(reg.c_str());
+    } else
+      _rex_string = NULL;
+
+    if (!sub.empty()) {
+      _subst = TSstrdup(sub.c_str());
+      _subst_len = sub.length();
+    } else {
+      _subst = NULL;
+      _subst_len = 0;
+    }
+
+    _hits = 0;
+
+    memset(_sub_pos, 0, sizeof(_sub_pos));
+    memset(_sub_ix, 0, sizeof(_sub_ix));
+    _next = NULL;
+
+    // Parse options
+    std::string::size_type start = opt.find_first_of("@");
+    std::string::size_type pos1, pos2;
+
+    while (start != std::string::npos) {
+      std::string opt_val;
+
+      ++start;
+      pos1 = opt.find_first_of("=", start);
+      if (pos1 == std::string::npos) {
+        TSError("Malformed options: %s", opt.c_str());
+        break;
+      }
+      ++pos1;
+      pos2 = opt.find_first_of(" \t\n", pos1);
+      if (pos2 == std::string::npos)
+        pos2 = opt.length();
+      opt_val = opt.substr(pos1, pos2-pos1);
+
+      if (opt.compare(start, 6, "status") == 0) {
+        _status = static_cast<TSHttpStatus>(atoi(opt_val.c_str()));
+      } else if (opt.compare(start, 14, "active_timeout") == 0) {
+        _active_timeout = atoi(opt_val.c_str());
+      } else if (opt.compare(start, 19, "no_activity_timeout") == 0) {
+        _no_activity_timeout = atoi(opt_val.c_str());
+      } else if (opt.compare(start, 15, "connect_timeout") == 0) {
+        _connect_timeout = atoi(opt_val.c_str());
+      } else if (opt.compare(start, 11, "dns_timeout") == 0) {
+        _dns_timeout = atoi(opt_val.c_str());
+      } else {
+        TSError("Unknown options: %s", opt.c_str());
+      }
+      start = opt.find_first_of("@", pos2);
+    }
+  };
+
+  ~RemapRegex()
+  {
+    TSDebug(PLUGIN_NAME, "Calling destructor");
+    if (_rex_string)
+      TSfree(_rex_string);
+    if (_subst)
+      TSfree(_subst);
+
+    if (_rex)
+      pcre_free(_rex);
+    if (_extra)
+      pcre_free(_extra);
+  };
+
+  // For profiling information
+  inline void
+  print(int ix, int max, const char* now)
+  {
+    fprintf(stderr, "[%s]:\tRegex %d ( %s ): %.2f%%\n", now, ix, _rex_string, 
100.0 * _hits / max);
+  }
+
+  inline void
+  increment()
+  {
+    ink_atomic_increment(&(_hits), 1);
+  }
+
+  // Compile and study the regular expression.
+  int
+  compile(const char** error, int* erroffset)
+  {
+    char* str;
+    int ccount;
+
+    _rex = pcre_compile(_rex_string,          // the pattern
+                        0,                    // default options
+                        error,                // for error message
+                        erroffset,            // for error offset
+                        NULL);                // use default character tables
+
+    if (NULL == _rex)
+      return -1;
+
+    _extra = pcre_study(_rex, 0, error);
+    if ((_extra == NULL) && (*error != 0))
+      return -1;
+
+    if (pcre_fullinfo(_rex, _extra, PCRE_INFO_CAPTURECOUNT, &ccount) != 0)
+      return -1;
+
+    // Get some info for the string substitutions
+    str = _subst;
+    _num_subs = 0;
+
+    while (str && *str) {
+      if ('$' == *str) {
+        int ix = -1;
+
+        if (isdigit(*(str+1))) {
+          ix = *(str + 1) - '0';
+        } else {
+          switch (*(str + 1)) {
+          case 'h':
+            ix = SUB_HOST;
+            break;
+          case 'f':
+            ix = SUB_FROM_HOST;
+            break;
+          case 't':
+            ix = SUB_TO_HOST;
+            break;
+          case 'p':
+            ix = SUB_PORT;
+            break;
+          case 's':
+            ix = SUB_SCHEME;
+            break;
+          case 'P':
+            ix = SUB_PATH;
+            break;
+          case 'q':
+            ix = SUB_QUERY;
+            break;
+          case 'm':
+            ix = SUB_MATRIX;
+            break;
+          case 'i':
+            ix = SUB_CLIENT_IP;
+            break;
+          default:
+            break;
+          }
+        }
+
+        if (ix > -1) {
+          if ((ix < 10) && (ix > ccount)) {
+            TSDebug(PLUGIN_NAME, "Trying to use unavailable substitution, 
check the regex!");
+            return -1; // No substitutions available other than $0
+          }
+
+          _sub_ix[_num_subs] = ix;
+          _sub_pos[_num_subs] = (str - _subst);
+          str += 2;
+          ++_num_subs;
+        } else { // Not a valid substitution character, so just ignore it
+          ++str;
+        }
+      } else {
+        ++str;
+      }
+    }
+    return 0;
+  };
+
+  // Perform the regular expression matching against a string.
+  int
+  match(const char* str, int len, int ovector[])
+  {
+    return pcre_exec(_rex,                 // the compiled pattern
+                     _extra,               // Extra data from study (maybe)
+                     str,                  // the subject string
+                     len,                  // the length of the subject
+                     0,                    // start at offset 0 in the subject
+                     0,                    // default options
+                     ovector,              // output vector for substring 
information
+                     OVECCOUNT);           // number of elements in the output 
vector
+  };
+
+  // Get the lengths of the matching string(s), taking into account variable 
substitutions.
+  // We also calculate a total length for the new string, which is the max 
length the
+  // substituted string can have (use it to allocate a buffer before calling 
substitute() ).
+  int
+  get_lengths(const int ovector[], int lengths[], TSRemapRequestInfo *rri, 
UrlComponents *req_url)
+  {
+    int len = _subst_len + 1;   // Bigger then necessary
+
+    for (int i=0; i < _num_subs; i++) {
+      int ix = _sub_ix[i];
+
+      if (ix < 10) {
+        lengths[ix] = ovector[2*ix+1] - ovector[2*ix]; // -1 - -1 == 0
+        len += lengths[ix];
+      } else {
+        int tmp_len;
+
+        switch (ix) {
+        case SUB_HOST:
+          len += req_url->host_len;
+          break;
+        case SUB_FROM_HOST:
+          TSUrlHostGet(rri->requestBufp, rri->mapFromUrl, &tmp_len);
+          len += tmp_len;
+          break;
+        case SUB_TO_HOST:
+          TSUrlHostGet(rri->requestBufp, rri->mapToUrl, &tmp_len);
+          len += tmp_len;
+          break;
+        case SUB_PORT:
+          len += 6; // One extra for snprintf()
+          break;
+        case SUB_SCHEME:
+          len += req_url->scheme_len;
+          break;
+        case SUB_PATH:
+          len += req_url->path_len;
+          break;
+        case SUB_QUERY:
+          len += req_url->query_len;
+          break;
+        case SUB_MATRIX:
+          len += req_url->matrix_len;
+          break;
+        case SUB_CLIENT_IP:
+          len += 15; // Allow for 255.255.255.255
+          break;
+        default:
+          break;
+        }
+      }
+    }
+
+    return len;
+  };
+
+  // Perform substitution on the $0 - $9 variables in the "src" string. $0 is 
the entire
+  // regex that was matches, while $1 - $9 are the corresponding groups. 
Return the final
+  // length of the string as written to dest (not including the trailing '0').
+  int
+  substitute(char dest[], const char *src, const int ovector[], const int 
lengths[],
+             TSRemapRequestInfo *rri, UrlComponents *req_url, struct sockaddr 
const* addr)
+  {
+    if (_num_subs > 0) {
+      char* p1 = dest;
+      char* p2 = _subst;
+      int prev = 0;
+
+      for (int i=0; i < _num_subs; i++) {
+        int ix = _sub_ix[i];
+
+        memcpy(p1, p2, _sub_pos[i] - prev);
+        p1 += (_sub_pos[i] - prev);
+        if (ix < 10) {
+          memcpy(p1, src + ovector[2*ix], lengths[ix]);
+          p1 += lengths[ix];
+        } else {
+          const char* str = NULL;
+          int len = 0;
+
+          switch (ix) {
+          case SUB_HOST:
+            str = req_url->host;
+            len = req_url->host_len;
+            break;
+          case SUB_FROM_HOST:
+            str = TSUrlHostGet(rri->requestBufp, rri->mapFromUrl, &len);
+            break;
+          case SUB_TO_HOST:
+            str = TSUrlHostGet(rri->requestBufp, rri->mapToUrl, &len);
+            break;
+          case SUB_PORT:
+            p1 += snprintf(p1, 6, "%u", req_url->port);
+            break;
+          case SUB_SCHEME:
+            str = req_url->scheme;
+            len = req_url->scheme_len;
+            break;
+          case SUB_PATH:
+            str = req_url->path;
+            len = req_url->path_len;
+            break;
+          case SUB_QUERY:
+            str = req_url->query;
+            len = req_url->query_len;
+            break;
+          case SUB_MATRIX:
+            str = req_url->matrix;
+            len = req_url->matrix_len;
+            break;
+          case SUB_CLIENT_IP:
+            {
+              // TODO: Finish implementing with the addr from above
+              // p1 += snprintf(p1, 15, "%d.%d.%d.%d", ip[0], ip[1], ip[2], 
ip[3]);
+            }
+            break;
+          default:
+            break;
+          }
+          // If one of the rules fetched a read-only string, copy it in.
+          if (str && len > 0) {
+            memcpy(p1, str, len);
+            p1 += len;
+          }
+        }
+        p2 += (_sub_pos[i] - prev + 2);
+        prev = _sub_pos[i] + 2;
+      }
+      memcpy(p1, p2, _subst_len - (p2 - _subst));
+      p1 += _subst_len - (p2 - _subst);
+      *p1 = 0; // Make sure it's NULL terminated (for safety).
+      return p1 - dest;
+    } else {
+      memcpy(dest, _subst, _subst_len + 1); // No substitutions in the string, 
copy it all
+      return _subst_len;
+    }
+
+    return 0; // Shouldn't happen.
+  };
+
+  // setter / getters for members the linked list.
+  inline void set_next(RemapRegex* next) { _next = next; };
+  inline RemapRegex* next() const { return _next; };
+
+  // setter / getters for order number within the linked list
+  inline void set_order(int order) { _order = order; };
+  inline int order() { return _order; };
+
+  // Various getters
+  inline const char* regex() const { return _rex_string;  };
+  inline const char* substitution() const { return _subst;  };
+  inline int substitutions_used() const { return _num_subs; }
+
+  inline bool is_simple() const { return _simple; }
+
+  inline TSHttpStatus status_option() const { return _status; };
+  inline int active_timeout_option() const  { return _active_timeout; };
+  inline int no_activity_timeout_option() const  { return 
_no_activity_timeout; };
+  inline int connect_timeout_option() const  { return _connect_timeout; };
+  inline int dns_timeout_option() const  { return _dns_timeout; };
+
+ private:
+  char* _rex_string;
+  char* _subst;
+  int _subst_len;
+  int _num_subs;
+  int _hits;
+
+  pcre* _rex;
+  pcre_extra* _extra;
+  int _sub_pos[MAX_SUBS];
+  int _sub_ix[MAX_SUBS];
+  RemapRegex* _next;
+  int _order;
+  TSHttpStatus _status;
+  bool _simple;
+  int _active_timeout;
+  int _no_activity_timeout;
+  int _connect_timeout;
+  int _dns_timeout;
+};
+
+struct RemapInstance
+{
+  RemapInstance() :
+    first(NULL), last(NULL), profile(false), method(false), query_string(true),
+    matrix_params(false), hits(0), misses(0),
+    filename("unknown")
+  { };
+
+  RemapRegex* first;
+  RemapRegex* last;
+  bool profile;
+  bool method;
+  bool query_string;
+  bool matrix_params;
+  int hits;
+  int misses;
+  std::string filename;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Helpers for memory management (to make sure pcre uses the TS APIs).
+//
+inline void*
+ts_malloc(size_t s)
+{
+  return TSmalloc(s);
+}
+
+inline void
+ts_free(void *s)
+{
+  return TSfree(s);
+}
+
+void
+setup_memory_allocation()
+{
+  pcre_malloc = &ts_malloc;
+  pcre_free = &ts_free;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Initialize the plugin.
+//
+TSReturnCode
+TSRemapInit(TSRemapInterface* api_info, char *errbuf, int errbuf_size)
+{
+  if (!api_info) {
+    strncpy(errbuf, "[tsremap_init] - Invalid TSRemapInterface argument", 
errbuf_size - 1);
+    return TS_ERROR;
+  }
+
+  if (api_info->tsremap_version < TSREMAP_VERSION) {
+    snprintf(errbuf, errbuf_size - 1, "[TSRemapInit] - Incorrect API version 
%ld.%ld",
+             api_info->tsremap_version >> 16, (api_info->tsremap_version & 
0xffff));
+    return TS_ERROR;
+  }
+
+  setup_memory_allocation();
+  TSDebug(PLUGIN_NAME, "plugin is succesfully initialized");
+  return TS_SUCCESS;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// We don't have any specific "instances" here, at least not yet.
+//
+TSReturnCode
+TSRemapNewInstance(int argc, char* argv[], void** ih, char* errbuf, int 
errbuf_size)
+{
+  const char* error;
+  int erroffset;
+  RemapInstance* ri = new RemapInstance();
+
+  std::ifstream f;
+  int lineno = 0;
+  int count = 0;
+
+  *ih = (void*)ri;
+  if (ri == NULL) {
+    TSError("Unable to create remap instance");
+    return TS_ERROR;
+  }
+
+  // Really simple (e.g. basic) config parser
+  for (int i=2; i < argc; ++i) {
+    if (strncmp(argv[i], "profile", 7) == 0) {
+      ri->profile = true;
+    } else if (strncmp(argv[i], "no-profile", 10) == 0) {
+      ri->profile = false;
+    } else if (strncmp(argv[i], "method", 6) == 0) {
+      ri->method = true;
+    } else if (strncmp(argv[i], "no-method", 9) == 0) {
+      ri->method = true;
+    } else if (strncmp(argv[i], "query-string", 12) == 0) {
+      ri->query_string = true;
+    } else if (strncmp(argv[i], "no-query-string", 15) == 0) {
+      ri->query_string = false;
+    } else if (strncmp(argv[i], "matrix-parameters", 15) == 0) {
+      ri->matrix_params = true;
+    } else if (strncmp(argv[i], "no-matrix-parameters", 18) == 0) {
+      ri->matrix_params = false;
+    } else {
+      if (0 != access(argv[2], R_OK)) {
+        ri->filename = DEFAULT_PATH;
+        ri->filename += argv[2];
+      } else {
+        ri->filename = argv[2];
+      }
+
+      f.open((ri->filename).c_str(), std::ios::in);
+      if (!f.is_open()) { // Try with the default path instead
+        TSError("unable to open %s", (ri->filename).c_str());
+        return TS_ERROR;
+      }
+      TSDebug(PLUGIN_NAME, "loading regular expression maps from %s", 
(ri->filename).c_str());
+
+      while (!f.eof()) {
+        std::string line, regex, subst, options;
+        std::string::size_type pos1, pos2;
+
+        getline(f, line);
+        ++lineno;
+        if (line.empty())
+          continue;
+        pos1 = line.find_first_not_of(" \t\n");
+        if (line[pos1] == '#')
+          continue;  // Skip comment lines
+
+        if (pos1 != std::string::npos) {
+          pos2 = line.find_first_of(" \t\n", pos1);
+          if (pos2 != std::string::npos) {
+            regex = line.substr(pos1, pos2-pos1);
+            pos1 = line.find_first_not_of(" \t\n#", pos2);
+            if (pos1 != std::string::npos) {
+              pos2 = line.find_first_of(" \t\n", pos1);
+              if (pos2 == std::string::npos)
+                pos2 = line.length();
+              subst = line.substr(pos1, pos2-pos1);
+              pos1 = line.find_first_not_of(" \t\n#", pos2);
+              if (pos1 != std::string::npos) {
+                pos2 = line.find_first_of("\n#", pos1);
+                if (pos2 == std::string::npos)
+                  pos2 = line.length();
+                options = line.substr(pos1, pos2-pos1);
+              }
+            }
+          }
+        }
+
+        if (regex.empty()) {
+          // No regex found on this line
+          TSError("no regexp found in %s: line %d", (ri->filename).c_str(), 
lineno);
+          continue;
+        }
+        if (subst.empty() && options.empty()) {
+          // No substitution found on this line (and no options)
+          TSError("no substitution string found in %s: line %d", 
(ri->filename).c_str(), lineno);
+          continue;
+        }
+
+        // Got a regex and substitution string
+        RemapRegex* cur = new RemapRegex(regex, subst, options);
+
+        if (cur == NULL) {
+          TSError("can't create a new regex remap rule");
+          continue;
+        }
+
+        if (cur->compile(&error, &erroffset) < 0) {
+          TSError("PCRE failed in %s (line %d) at offset %d: %s", 
(ri->filename).c_str(), lineno, erroffset, error);
+          delete(cur);
+        } else {
+          TSDebug(PLUGIN_NAME, "added regex=%s with substitution=%s and 
options `%s'",
+                   regex.c_str(), subst.c_str(), options.c_str());
+          cur->set_order(++count);
+          if (ri->first == NULL)
+            ri->first = cur;
+          else
+            ri->last->set_next(cur);
+          ri->last = cur;
+        }
+      }
+    }
+  }
+
+  // Make sure we got something...
+  if (ri->first == NULL) {
+    TSError("Got no regular expressions from the maps");
+    return TS_ERROR;
+  }
+
+  return TS_SUCCESS;
+}
+
+
+void
+TSRemapDeleteInstance(void* ih)
+{
+  RemapInstance* ri = static_cast<RemapInstance*>(ih);
+  RemapRegex* re;
+  RemapRegex* tmp;
+
+  if (ri->profile) {
+    char now[64];
+    const ink_time_t tim = time(NULL);
+
+    if (ink_ctime_r(&tim, now))
+      now[strlen(now) - 1] = '\0';
+    else {
+      memcpy(now, "unknown time", 12);
+      *(now + 12) = '\0';
+    }
+
+    fprintf(stderr, "[%s]: Profiling information for regex_remap file 
`%s':\n", now, (ri->filename).c_str());
+    fprintf(stderr, "[%s]:\tTotal hits (matches): %d\n", now, ri->hits);
+    fprintf(stderr, "[%s]:\tTotal missed (no regex matches): %d\n", now, 
ri->misses);
+
+    if (ri->hits > 0) { // Avoid divide by zeros...
+      int ix = 1;
+
+      re = ri->first;
+      while (re) {
+        re->print(ix, ri->hits, now);
+        re = re->next();
+        ++ix;
+      }
+    }
+  }
+
+  re = ri->first;
+  while (re) {
+    tmp = re;
+    re = re->next();
+    delete tmp;
+  }
+
+  delete ri;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// This is the main "entry" point for the plugin, called for every request.
+//
+TSRemapStatus
+TSRemapDoRemap(void* ih, TSHttpTxn txnp, TSRemapRequestInfo *rri)
+{
+  if (NULL == ih) {
+    TSDebug(PLUGIN_NAME, "Falling back to default URL on regex remap without 
rules");
+    return TSREMAP_NO_REMAP;
+  }
+
+  // Populate the request url
+  UrlComponents req_url;
+  req_url.populate(rri);
+
+  RemapInstance* ri = (RemapInstance*)ih;
+  int ovector[OVECCOUNT];
+  int lengths[OVECCOUNT/2 + 1];
+  int dest_len;
+  TSRemapStatus retval = TSREMAP_DID_REMAP;
+  RemapRegex* re = ri->first;
+  int match_len = 0;
+  char *match_buf;
+
+  match_buf = (char*)alloca(req_url.url_len + 32);
+
+  if (ri->method) { // Prepend the URI path or URL with the HTTP method
+    TSMBuffer mBuf;
+    TSMLoc reqHttpHdrLoc;
+    const char *method;
+
+    // Note that Method can not be longer than 16 bytes, or we'll simply 
truncate it
+    if (TS_SUCCESS == TSHttpTxnClientReqGet(static_cast<TSHttpTxn>(txnp), 
&mBuf, &reqHttpHdrLoc)) {
+      method = TSHttpHdrMethodGet(mBuf, reqHttpHdrLoc, &match_len);
+      if (method && (match_len > 0)) {
+        if (match_len > 16)
+          match_len = 16;
+        memcpy(match_buf, method, match_len);
+      }
+    }
+  }
+
+  *(match_buf + match_len) = '/';
+  if (req_url.path && req_url.path_len > 0) {
+    memcpy(match_buf + match_len + 1, req_url.path, req_url.path_len);
+    match_len += (req_url.path_len + 1);
+  }
+
+  if (ri->matrix_params && req_url.matrix && req_url.matrix_len > 0) {
+    *(match_buf + match_len) = ';';
+    memcpy(match_buf + match_len + 1 , req_url.matrix, req_url.matrix_len);
+    match_len += (req_url.matrix_len + 1);
+  }
+
+  if (ri->query_string  && req_url.query && req_url.query_len > 0) {
+    *(match_buf + match_len) = '?';
+    memcpy(match_buf + match_len + 1 , req_url.query, req_url.query_len);
+    match_len += (req_url.query_len + 1);
+  }
+  match_buf[match_len] = '\0'; // NULL terminate the match string
+  TSDebug(PLUGIN_NAME, "Target match string is `%s'", match_buf);
+
+  // Apply the regular expressions, in order. First one wins.
+  while (re) {
+    // Since we check substitutions on parse time, we don't need to reset 
ovector
+    if (re->is_simple() || (re->match(match_buf, match_len, ovector) != -1)) {
+      int new_len = re->get_lengths(ovector, lengths, rri, &req_url);
+
+      // Set timeouts
+      if (re->active_timeout_option() > (-1)) {
+        TSDebug(PLUGIN_NAME, "Setting active timeout to %d", 
re->active_timeout_option());
+        TSHttpTxnActiveTimeoutSet(txnp, re->active_timeout_option());
+      }
+      if (re->no_activity_timeout_option() > (-1)) {
+        TSDebug(PLUGIN_NAME, "Setting no activity timeout to %d", 
re->no_activity_timeout_option());
+        TSHttpTxnNoActivityTimeoutSet(txnp, re->no_activity_timeout_option());
+      }
+      if (re->connect_timeout_option() > (-1)) {
+        TSDebug(PLUGIN_NAME, "Setting connect timeout to %d", 
re->connect_timeout_option());
+        TSHttpTxnConnectTimeoutSet(txnp, re->connect_timeout_option());
+      }
+      if (re->dns_timeout_option() > (-1)) {
+        TSDebug(PLUGIN_NAME, "Setting DNS timeout to %d", 
re->dns_timeout_option());
+        TSHttpTxnDNSTimeoutSet(txnp, re->dns_timeout_option());
+      }
+
+      // Update profiling if requested
+      if (ri->profile) {
+        re->increment();
+        ink_atomic_increment(&(ri->hits), 1);
+      }
+
+      if (new_len > 0) {
+        char* dest;
+        struct sockaddr const* addr = TSHttpTxnClientAddrGet(txnp);
+
+        dest = (char*)alloca(new_len+8);
+        dest_len = re->substitute(dest, match_buf, ovector, lengths, rri, 
&req_url, addr);
+
+        TSDebug(PLUGIN_NAME, "New URL is estimated to be %d bytes long, or 
less", new_len);
+        TSDebug(PLUGIN_NAME, "New URL is %s (length %d)", dest, dest_len);
+        TSDebug(PLUGIN_NAME, "    matched rule %d [%s]", re->order(), 
re->regex());
+
+        // Check for a quick response, if the status option is set
+        if (re->status_option() > 0) {
+          if (re->status_option() != TS_HTTP_STATUS_MOVED_PERMANENTLY &&
+              re->status_option() != TS_HTTP_STATUS_MOVED_TEMPORARILY) {
+            // Don't set the URL / Location for this.
+            TSHttpTxnSetHttpRetStatus(txnp, re->status_option());
+            break;
+          }
+
+          TSDebug(PLUGIN_NAME, "Redirecting URL, status=%d", 
re->status_option());
+          TSHttpTxnSetHttpRetStatus(txnp, re->status_option());
+          rri->redirect = 1;
+        }
+
+        // Now parse the new URL, which can also be the redirect URL
+        if (dest_len > 0) {
+          const char *start = dest;
+
+          // Setup the new URL
+          if (TS_PARSE_ERROR == TSUrlParse(rri->requestBufp, rri->requestUrl, 
&start, start + dest_len)) {
+            TSHttpTxnSetHttpRetStatus(txnp, 
TS_HTTP_STATUS_INTERNAL_SERVER_ERROR);
+            TSError("can't parse substituted URL string");
+          }
+        }
+        break;
+      }
+    }
+
+    // Try the next regex
+    re = re->next();
+    if (re == NULL) {
+      retval = TSREMAP_NO_REMAP; // No match
+      if (ri->profile)
+        ink_atomic_increment(&(ri->misses), 1);
+    }
+  }
+
+  return retval;
+}
+
+
+
+/*
+  local variables:
+  mode: C++
+  indent-tabs-mode: nil
+  c-basic-offset: 2
+  c-comment-only-line-offset: 0
+  c-file-offsets: ((statement-block-intro . +)
+  (label . 0)
+  (statement-cont . +)
+  (innamespace . 0))
+  end:
+
+  Indent with: /usr/bin/indent -ncs -nut -npcs -l 120 logstats.cc
+*/

Reply via email to