Updated Branches: refs/heads/3.0.x efb863663 -> 325610e76
TS-1459: Backport regex_remap Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/325610e7 Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/325610e7 Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/325610e7 Branch: refs/heads/3.0.x Commit: 325610e763d038f21fc23db23ac43fd798193233 Parents: efb8636 Author: Brian Geffon <[email protected]> Authored: Tue Sep 18 11:41:28 2012 -0700 Committer: Brian Geffon <[email protected]> Committed: Tue Sep 18 11:41:28 2012 -0700 ---------------------------------------------------------------------- CHANGES | 3 + STATUS | 6 - configure.ac | 2 + plugins/Makefile.am | 2 +- plugins/regex_remap/Makefile.am | 22 + plugins/regex_remap/README | 146 +++++ plugins/regex_remap/regex_remap.cc | 901 +++++++++++++++++++++++++++++++ 7 files changed, 1075 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index dab8344..65a9f54 100644 --- a/CHANGES +++ b/CHANGES @@ -1,4 +1,7 @@ -*- coding: utf-8 -*- +Changes with Apache Traffic Server 3.0.6 + *) [TS-1459] Backport regex_remap + Changes with Apache Traffic Server 3.0.5 *) [TS-1116] Fix build issues with clang + gcc4.7 http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/STATUS ---------------------------------------------------------------------- diff --git a/STATUS b/STATUS index 0562686..b016161 100644 --- a/STATUS +++ b/STATUS @@ -42,17 +42,11 @@ A list of all bugs open for the next v3.0.6 release can be found at http://s.apache.org/ts-3.0.6 PATCHES ACCEPTED TO BACKPORT FROM TRUNK: - *) Don't assume root privileges during make install Trunk patch: https://git-wip-us.apache.org/repos/asf?p=trafficserver.git;a=commit;h=a862e283734c86d46a57624f9a05f09cfbbee175 Jira: https://issues.apache.org/jira/browse/TS-1460 +1: zwoop, igalic, humbedooh - *) Backport regex_remap - patch: https://issues.apache.org/jira/secure/attachment/12544604/TS-1459.patch - Jira: https://issues.apache.org/jira/browse/TS-1459 - +1: briang, igalic, jpeach,zwoop - *) Disable gzip compression by default Commit: 05da7a90ab16f7bc4c6a3e5c7f4880ef80e9b316 Jira: https://issues.apache.org/jira/browse/TS-1466 http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/configure.ac ---------------------------------------------------------------------- diff --git a/configure.ac b/configure.ac index f913b21..00cc727 100644 --- a/configure.ac +++ b/configure.ac @@ -1240,6 +1240,8 @@ AC_CONFIG_FILES([cop/Makefile]) # production plugins AC_CONFIG_FILES([plugins/Makefile]) AC_CONFIG_FILES([plugins/conf_remap/Makefile]) +AC_CONFIG_FILES([plugins/regex_remap/Makefile]) + # various tools AC_CONFIG_FILES([tools/Makefile]) # example plugins http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/Makefile.am ---------------------------------------------------------------------- diff --git a/plugins/Makefile.am b/plugins/Makefile.am index 59e1923..b46f44f 100644 --- a/plugins/Makefile.am +++ b/plugins/Makefile.am @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -SUBDIRS = conf_remap +SUBDIRS = conf_remap regex_remap http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/regex_remap/Makefile.am ---------------------------------------------------------------------- diff --git a/plugins/regex_remap/Makefile.am b/plugins/regex_remap/Makefile.am new file mode 100644 index 0000000..c79fa7f --- /dev/null +++ b/plugins/regex_remap/Makefile.am @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CXXFLAGS+=-I$(top_builddir)/proxy/api -I$(top_srcdir)/proxy/api -I$(top_srcdir)/lib/ts -I$(top_builddir)/lib/ts + +pkglibdir = ${pkglibexecdir} +pkglib_LTLIBRARIES = regex_remap.la +regex_remap_la_SOURCES = regex_remap.cc +regex_remap_la_LDFLAGS = -module -avoid-version -shared http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/regex_remap/README ---------------------------------------------------------------------- diff --git a/plugins/regex_remap/README b/plugins/regex_remap/README new file mode 100644 index 0000000..c965848 --- /dev/null +++ b/plugins/regex_remap/README @@ -0,0 +1,146 @@ +This is a plugin for the Apache Traffic Server, that allows you to +configure mapping rules based on regular expressions. This is similar to +what you can accomplish using mod_rewrite in apache, but obviously not +as flexible or sophisticated (yet). + +To use this plugin, configure a remap.config rule like + + map http://a.com http://b.com @plugin=regex_remap.so @pparam=maps.reg + + +An optional argument (@@pparam) with the string "profile" will enable +profiling of this regex remap rule, e.g. + + ... @pparam=maps.reg @pparam=profile + + +Profiling is very low overhead, and the information is dumped to +traffic.out, typically in /usr/local/var/logs/trafficserver/traffic.out. +In order to force a profile dump, you can do + + $ sudo touch /usr/local/etc/trafficserver/remap.config + $ sudo traffic_line -x + + +By default, only the path and query string of the URL is +provided for the regular expressions to match. The following optional +parameters can be used to modify the plugin instance behavior: + + @pparam=[no-]full-url [default: off] + @pparam=[no-]method [default: off] + @pparam=[no-]query-string [default: on] + @pparam=[no-]matrix-parameters [default: off] + + + +If you want the full (original) URL, use the parameter @pparam=full-url. +For example: + + ... @pparam=maps.reg @pparam=full-url + + +The string that you will need to match against looks like + + http://server/path?query=bar + + +If you also wish to match on the HTTP method used (e.g. "GET"), you must +use the option @pparam=method. For example: + + ... @pparam=maps.reg @pparam=method + + +With this enabled, the string that you will need to match will look like + + GET/path?query=bar + + +The "method" parameter can also be used in combination with "full-url", +and the string to match against will then look like + + GEThttp://server.com/path?query=bar + + +The methods are always all upper-case, and always followed by one single +space. There is no space between the method and the rest of the URL (or +URI path). + +By default, the query string is part of the string that is matched again, +to turn this off use the option 'no-query-string', e.g. + + ... @pparam=maps.reg @pparam=no-query-string + + +Finally, you can also include the matrix parameters in the string, using the +option 'matrix-parameters', e.g. + + ... @pparam=maps.reg @pparam=matrix-parameters + + +Note that the path to the plugin must be absolute, and by default it is + + /usr/local/libexec/trafficserver/regex_remap.so + + +The config file (maps.reg above) can be placed anywhere, but unless you +specify an absolute path (as above), it will default to the directory + + /usr/local/etc/regex_remap + + +A typical regex would look like + + ^/(ogre.*)/more http://www.ogre.com/$h/$0/$1 + + +The regular expression must not contain any white spaces! + +When the regular expression is matched, only the URL path + query string is +matched (without any of the optional configuration options). The path +will always start with a "/". Various substitution strings are allowed +on the right hand side: + + $0 - The entire matched string + $1-9 - Regular expression groups ($1 first group etc.) + $h - The original host header from the request + $f - The host as used in the "from" portion of the remap rule + $t - The host as used in the "to" portion of the remap rule + $p - The original port number + $s - The scheme (e.g. http) of the request + $P - The entire path of the request + $q - The query part of the request + $r - The path parameters of the request (not implemented yet) + $c - The cookie string from the request + $i - The client IP for this request + + +You can also provide options, similar to how you configure your +remap.config. The following options are available + + @status=<nnn> - Force the response code to <nnn> + @active_timeout=<nnn> - Active timeout (in ms) + @no_activity_timeout=<nnn> - No activity timeout (in ms) + @connect_timeout=<nnn> - Connect timeouts (in ms) + @dns_timeout=<nnn> - Connect timeouts (in ms) + + +For example, this can be useful to force a particular response for some +URLs, e.g. + + ^/(ogre.*)/bad http://www.examle.com/ @status=404 + + +Or, to force a 302 redirect + + ^/oldurl/(.*)$ http://news.example.com/new/$1 @status=302 + + +Note: Setting the status to 301 or 302 will force the new URL to be used +as a redirect (Location:). + + +RELEASES +-------- + +Version 2.0 + - Initial Open Source release. http://git-wip-us.apache.org/repos/asf/trafficserver/blob/325610e7/plugins/regex_remap/regex_remap.cc ---------------------------------------------------------------------- diff --git a/plugins/regex_remap/regex_remap.cc b/plugins/regex_remap/regex_remap.cc new file mode 100644 index 0000000..2cfea6d --- /dev/null +++ b/plugins/regex_remap/regex_remap.cc @@ -0,0 +1,901 @@ +/** @file + + ATS plugin to do (simple) regular expression remap rules + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#define UNUSED __attribute__ ((unused)) +static char UNUSED rcsId__regex_remap_cc[] = "@(#) $Id$ built on " __DATE__ " " __TIME__; + +#include "ts/ts.h" +#include "ts/remap.h" +#include "ink_config.h" + +#include <sys/types.h> +#include <stdio.h> +#include <time.h> +#include <string.h> + +#ifdef HAVE_PCRE_PCRE_H +#include <pcre/pcre.h> +#else +#include <pcre.h> +#endif + +#include <ctype.h> +#include <unistd.h> + +#include <iostream> +#include <fstream> +#include <string> + +// Get some specific stuff from libts, yes, we can do that now that we build inside the core. +#include "ink_platform.h" +#include "ink_atomic.h" +#include "ink_time.h" + +static const char* PLUGIN_NAME = "regex_remap"; + +// Constants +static const int OVECCOUNT = 30; // We support $0 - $9 x2 ints, and this needs to be 1.5x that +static const int MAX_SUBS = 32; // No more than 32 substitution variables in the subst string + +// TODO: This should be "autoconf'ed" or something ... +#define DEFAULT_PATH "/usr/local/etc/regex_remap/" + +// Substitutions other than regex matches +enum ExtraSubstitutions { + SUB_HOST = 11, + SUB_FROM_HOST = 12, + SUB_TO_HOST = 13, + SUB_PORT = 14, + SUB_SCHEME = 15, + SUB_PATH = 16, + SUB_QUERY = 17, + SUB_MATRIX = 18, + SUB_CLIENT_IP = 19, +}; + + +/////////////////////////////////////////////////////////////////////////////// +// Class holding one request URL's component, to simplify the code and +// length calculations (we need all of them). +// +struct UrlComponents +{ + UrlComponents() + : scheme(NULL), host(NULL), path(NULL), query(NULL), matrix(NULL), port(0), + scheme_len(0), host_len(0), path_len(0), query_len(0), matrix_len(0), url_len(0) + {} + + void populate(TSRemapRequestInfo *rri) + { + scheme = TSUrlSchemeGet(rri->requestBufp, rri->requestUrl, &scheme_len); + host = TSUrlHostGet(rri->requestBufp, rri->requestUrl, &host_len); + path = TSUrlPathGet(rri->requestBufp, rri->requestUrl, &path_len); + query = TSUrlHttpQueryGet(rri->requestBufp, rri->requestUrl, &query_len); + matrix = TSUrlHttpParamsGet(rri->requestBufp, rri->requestUrl, &matrix_len); + port = TSUrlPortGet(rri->requestBufp, rri->requestUrl); + + url_len = scheme_len + host_len + path_len + query_len + matrix_len + 32; + } + + const char* scheme; + const char* host; + const char* path; + const char* query; + const char* matrix; + int port; + + int scheme_len; + int host_len; + int path_len; + int query_len; + int matrix_len; + + int url_len; // Full length, of all components +}; + + + +/////////////////////////////////////////////////////////////////////////////// +// Class encapsulating one regular expression (and the linked list). +// +class RemapRegex +{ + public: + RemapRegex(const std::string& reg, const std::string& sub, const std::string& opt) : + _num_subs(-1), _rex(NULL), _extra(NULL), _order(-1), _simple(false), + _active_timeout(-1), _no_activity_timeout(-1), _connect_timeout(-1), _dns_timeout(-1) + { + TSDebug(PLUGIN_NAME, "Calling constructor"); + + _status = static_cast<TSHttpStatus>(0); + + if (!reg.empty()) { + if (reg == ".") { + TSDebug(PLUGIN_NAME, "Rule is simple, and fast!"); + _simple = true; + } + _rex_string = TSstrdup(reg.c_str()); + } else + _rex_string = NULL; + + if (!sub.empty()) { + _subst = TSstrdup(sub.c_str()); + _subst_len = sub.length(); + } else { + _subst = NULL; + _subst_len = 0; + } + + _hits = 0; + + memset(_sub_pos, 0, sizeof(_sub_pos)); + memset(_sub_ix, 0, sizeof(_sub_ix)); + _next = NULL; + + // Parse options + std::string::size_type start = opt.find_first_of("@"); + std::string::size_type pos1, pos2; + + while (start != std::string::npos) { + std::string opt_val; + + ++start; + pos1 = opt.find_first_of("=", start); + if (pos1 == std::string::npos) { + TSError("Malformed options: %s", opt.c_str()); + break; + } + ++pos1; + pos2 = opt.find_first_of(" \t\n", pos1); + if (pos2 == std::string::npos) + pos2 = opt.length(); + opt_val = opt.substr(pos1, pos2-pos1); + + if (opt.compare(start, 6, "status") == 0) { + _status = static_cast<TSHttpStatus>(atoi(opt_val.c_str())); + } else if (opt.compare(start, 14, "active_timeout") == 0) { + _active_timeout = atoi(opt_val.c_str()); + } else if (opt.compare(start, 19, "no_activity_timeout") == 0) { + _no_activity_timeout = atoi(opt_val.c_str()); + } else if (opt.compare(start, 15, "connect_timeout") == 0) { + _connect_timeout = atoi(opt_val.c_str()); + } else if (opt.compare(start, 11, "dns_timeout") == 0) { + _dns_timeout = atoi(opt_val.c_str()); + } else { + TSError("Unknown options: %s", opt.c_str()); + } + start = opt.find_first_of("@", pos2); + } + }; + + ~RemapRegex() + { + TSDebug(PLUGIN_NAME, "Calling destructor"); + if (_rex_string) + TSfree(_rex_string); + if (_subst) + TSfree(_subst); + + if (_rex) + pcre_free(_rex); + if (_extra) + pcre_free(_extra); + }; + + // For profiling information + inline void + print(int ix, int max, const char* now) + { + fprintf(stderr, "[%s]:\tRegex %d ( %s ): %.2f%%\n", now, ix, _rex_string, 100.0 * _hits / max); + } + + inline void + increment() + { + ink_atomic_increment(&(_hits), 1); + } + + // Compile and study the regular expression. + int + compile(const char** error, int* erroffset) + { + char* str; + int ccount; + + _rex = pcre_compile(_rex_string, // the pattern + 0, // default options + error, // for error message + erroffset, // for error offset + NULL); // use default character tables + + if (NULL == _rex) + return -1; + + _extra = pcre_study(_rex, 0, error); + if ((_extra == NULL) && (*error != 0)) + return -1; + + if (pcre_fullinfo(_rex, _extra, PCRE_INFO_CAPTURECOUNT, &ccount) != 0) + return -1; + + // Get some info for the string substitutions + str = _subst; + _num_subs = 0; + + while (str && *str) { + if ('$' == *str) { + int ix = -1; + + if (isdigit(*(str+1))) { + ix = *(str + 1) - '0'; + } else { + switch (*(str + 1)) { + case 'h': + ix = SUB_HOST; + break; + case 'f': + ix = SUB_FROM_HOST; + break; + case 't': + ix = SUB_TO_HOST; + break; + case 'p': + ix = SUB_PORT; + break; + case 's': + ix = SUB_SCHEME; + break; + case 'P': + ix = SUB_PATH; + break; + case 'q': + ix = SUB_QUERY; + break; + case 'm': + ix = SUB_MATRIX; + break; + case 'i': + ix = SUB_CLIENT_IP; + break; + default: + break; + } + } + + if (ix > -1) { + if ((ix < 10) && (ix > ccount)) { + TSDebug(PLUGIN_NAME, "Trying to use unavailable substitution, check the regex!"); + return -1; // No substitutions available other than $0 + } + + _sub_ix[_num_subs] = ix; + _sub_pos[_num_subs] = (str - _subst); + str += 2; + ++_num_subs; + } else { // Not a valid substitution character, so just ignore it + ++str; + } + } else { + ++str; + } + } + return 0; + }; + + // Perform the regular expression matching against a string. + int + match(const char* str, int len, int ovector[]) + { + return pcre_exec(_rex, // the compiled pattern + _extra, // Extra data from study (maybe) + str, // the subject string + len, // the length of the subject + 0, // start at offset 0 in the subject + 0, // default options + ovector, // output vector for substring information + OVECCOUNT); // number of elements in the output vector + }; + + // Get the lengths of the matching string(s), taking into account variable substitutions. + // We also calculate a total length for the new string, which is the max length the + // substituted string can have (use it to allocate a buffer before calling substitute() ). + int + get_lengths(const int ovector[], int lengths[], TSRemapRequestInfo *rri, UrlComponents *req_url) + { + int len = _subst_len + 1; // Bigger then necessary + + for (int i=0; i < _num_subs; i++) { + int ix = _sub_ix[i]; + + if (ix < 10) { + lengths[ix] = ovector[2*ix+1] - ovector[2*ix]; // -1 - -1 == 0 + len += lengths[ix]; + } else { + int tmp_len; + + switch (ix) { + case SUB_HOST: + len += req_url->host_len; + break; + case SUB_FROM_HOST: + TSUrlHostGet(rri->requestBufp, rri->mapFromUrl, &tmp_len); + len += tmp_len; + break; + case SUB_TO_HOST: + TSUrlHostGet(rri->requestBufp, rri->mapToUrl, &tmp_len); + len += tmp_len; + break; + case SUB_PORT: + len += 6; // One extra for snprintf() + break; + case SUB_SCHEME: + len += req_url->scheme_len; + break; + case SUB_PATH: + len += req_url->path_len; + break; + case SUB_QUERY: + len += req_url->query_len; + break; + case SUB_MATRIX: + len += req_url->matrix_len; + break; + case SUB_CLIENT_IP: + len += 15; // Allow for 255.255.255.255 + break; + default: + break; + } + } + } + + return len; + }; + + // Perform substitution on the $0 - $9 variables in the "src" string. $0 is the entire + // regex that was matches, while $1 - $9 are the corresponding groups. Return the final + // length of the string as written to dest (not including the trailing '0'). + int + substitute(char dest[], const char *src, const int ovector[], const int lengths[], + TSRemapRequestInfo *rri, UrlComponents *req_url, struct sockaddr const* addr) + { + if (_num_subs > 0) { + char* p1 = dest; + char* p2 = _subst; + int prev = 0; + + for (int i=0; i < _num_subs; i++) { + int ix = _sub_ix[i]; + + memcpy(p1, p2, _sub_pos[i] - prev); + p1 += (_sub_pos[i] - prev); + if (ix < 10) { + memcpy(p1, src + ovector[2*ix], lengths[ix]); + p1 += lengths[ix]; + } else { + const char* str = NULL; + int len = 0; + + switch (ix) { + case SUB_HOST: + str = req_url->host; + len = req_url->host_len; + break; + case SUB_FROM_HOST: + str = TSUrlHostGet(rri->requestBufp, rri->mapFromUrl, &len); + break; + case SUB_TO_HOST: + str = TSUrlHostGet(rri->requestBufp, rri->mapToUrl, &len); + break; + case SUB_PORT: + p1 += snprintf(p1, 6, "%u", req_url->port); + break; + case SUB_SCHEME: + str = req_url->scheme; + len = req_url->scheme_len; + break; + case SUB_PATH: + str = req_url->path; + len = req_url->path_len; + break; + case SUB_QUERY: + str = req_url->query; + len = req_url->query_len; + break; + case SUB_MATRIX: + str = req_url->matrix; + len = req_url->matrix_len; + break; + case SUB_CLIENT_IP: + { + // TODO: Finish implementing with the addr from above + // p1 += snprintf(p1, 15, "%d.%d.%d.%d", ip[0], ip[1], ip[2], ip[3]); + } + break; + default: + break; + } + // If one of the rules fetched a read-only string, copy it in. + if (str && len > 0) { + memcpy(p1, str, len); + p1 += len; + } + } + p2 += (_sub_pos[i] - prev + 2); + prev = _sub_pos[i] + 2; + } + memcpy(p1, p2, _subst_len - (p2 - _subst)); + p1 += _subst_len - (p2 - _subst); + *p1 = 0; // Make sure it's NULL terminated (for safety). + return p1 - dest; + } else { + memcpy(dest, _subst, _subst_len + 1); // No substitutions in the string, copy it all + return _subst_len; + } + + return 0; // Shouldn't happen. + }; + + // setter / getters for members the linked list. + inline void set_next(RemapRegex* next) { _next = next; }; + inline RemapRegex* next() const { return _next; }; + + // setter / getters for order number within the linked list + inline void set_order(int order) { _order = order; }; + inline int order() { return _order; }; + + // Various getters + inline const char* regex() const { return _rex_string; }; + inline const char* substitution() const { return _subst; }; + inline int substitutions_used() const { return _num_subs; } + + inline bool is_simple() const { return _simple; } + + inline TSHttpStatus status_option() const { return _status; }; + inline int active_timeout_option() const { return _active_timeout; }; + inline int no_activity_timeout_option() const { return _no_activity_timeout; }; + inline int connect_timeout_option() const { return _connect_timeout; }; + inline int dns_timeout_option() const { return _dns_timeout; }; + + private: + char* _rex_string; + char* _subst; + int _subst_len; + int _num_subs; + int _hits; + + pcre* _rex; + pcre_extra* _extra; + int _sub_pos[MAX_SUBS]; + int _sub_ix[MAX_SUBS]; + RemapRegex* _next; + int _order; + TSHttpStatus _status; + bool _simple; + int _active_timeout; + int _no_activity_timeout; + int _connect_timeout; + int _dns_timeout; +}; + +struct RemapInstance +{ + RemapInstance() : + first(NULL), last(NULL), profile(false), method(false), query_string(true), + matrix_params(false), hits(0), misses(0), + filename("unknown") + { }; + + RemapRegex* first; + RemapRegex* last; + bool profile; + bool method; + bool query_string; + bool matrix_params; + int hits; + int misses; + std::string filename; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Helpers for memory management (to make sure pcre uses the TS APIs). +// +inline void* +ts_malloc(size_t s) +{ + return TSmalloc(s); +} + +inline void +ts_free(void *s) +{ + return TSfree(s); +} + +void +setup_memory_allocation() +{ + pcre_malloc = &ts_malloc; + pcre_free = &ts_free; +} + + +/////////////////////////////////////////////////////////////////////////////// +// Initialize the plugin. +// +TSReturnCode +TSRemapInit(TSRemapInterface* api_info, char *errbuf, int errbuf_size) +{ + if (!api_info) { + strncpy(errbuf, "[tsremap_init] - Invalid TSRemapInterface argument", errbuf_size - 1); + return TS_ERROR; + } + + if (api_info->tsremap_version < TSREMAP_VERSION) { + snprintf(errbuf, errbuf_size - 1, "[TSRemapInit] - Incorrect API version %ld.%ld", + api_info->tsremap_version >> 16, (api_info->tsremap_version & 0xffff)); + return TS_ERROR; + } + + setup_memory_allocation(); + TSDebug(PLUGIN_NAME, "plugin is succesfully initialized"); + return TS_SUCCESS; +} + + +/////////////////////////////////////////////////////////////////////////////// +// We don't have any specific "instances" here, at least not yet. +// +TSReturnCode +TSRemapNewInstance(int argc, char* argv[], void** ih, char* errbuf, int errbuf_size) +{ + const char* error; + int erroffset; + RemapInstance* ri = new RemapInstance(); + + std::ifstream f; + int lineno = 0; + int count = 0; + + *ih = (void*)ri; + if (ri == NULL) { + TSError("Unable to create remap instance"); + return TS_ERROR; + } + + // Really simple (e.g. basic) config parser + for (int i=2; i < argc; ++i) { + if (strncmp(argv[i], "profile", 7) == 0) { + ri->profile = true; + } else if (strncmp(argv[i], "no-profile", 10) == 0) { + ri->profile = false; + } else if (strncmp(argv[i], "method", 6) == 0) { + ri->method = true; + } else if (strncmp(argv[i], "no-method", 9) == 0) { + ri->method = true; + } else if (strncmp(argv[i], "query-string", 12) == 0) { + ri->query_string = true; + } else if (strncmp(argv[i], "no-query-string", 15) == 0) { + ri->query_string = false; + } else if (strncmp(argv[i], "matrix-parameters", 15) == 0) { + ri->matrix_params = true; + } else if (strncmp(argv[i], "no-matrix-parameters", 18) == 0) { + ri->matrix_params = false; + } else { + if (0 != access(argv[2], R_OK)) { + ri->filename = DEFAULT_PATH; + ri->filename += argv[2]; + } else { + ri->filename = argv[2]; + } + + f.open((ri->filename).c_str(), std::ios::in); + if (!f.is_open()) { // Try with the default path instead + TSError("unable to open %s", (ri->filename).c_str()); + return TS_ERROR; + } + TSDebug(PLUGIN_NAME, "loading regular expression maps from %s", (ri->filename).c_str()); + + while (!f.eof()) { + std::string line, regex, subst, options; + std::string::size_type pos1, pos2; + + getline(f, line); + ++lineno; + if (line.empty()) + continue; + pos1 = line.find_first_not_of(" \t\n"); + if (line[pos1] == '#') + continue; // Skip comment lines + + if (pos1 != std::string::npos) { + pos2 = line.find_first_of(" \t\n", pos1); + if (pos2 != std::string::npos) { + regex = line.substr(pos1, pos2-pos1); + pos1 = line.find_first_not_of(" \t\n#", pos2); + if (pos1 != std::string::npos) { + pos2 = line.find_first_of(" \t\n", pos1); + if (pos2 == std::string::npos) + pos2 = line.length(); + subst = line.substr(pos1, pos2-pos1); + pos1 = line.find_first_not_of(" \t\n#", pos2); + if (pos1 != std::string::npos) { + pos2 = line.find_first_of("\n#", pos1); + if (pos2 == std::string::npos) + pos2 = line.length(); + options = line.substr(pos1, pos2-pos1); + } + } + } + } + + if (regex.empty()) { + // No regex found on this line + TSError("no regexp found in %s: line %d", (ri->filename).c_str(), lineno); + continue; + } + if (subst.empty() && options.empty()) { + // No substitution found on this line (and no options) + TSError("no substitution string found in %s: line %d", (ri->filename).c_str(), lineno); + continue; + } + + // Got a regex and substitution string + RemapRegex* cur = new RemapRegex(regex, subst, options); + + if (cur == NULL) { + TSError("can't create a new regex remap rule"); + continue; + } + + if (cur->compile(&error, &erroffset) < 0) { + TSError("PCRE failed in %s (line %d) at offset %d: %s", (ri->filename).c_str(), lineno, erroffset, error); + delete(cur); + } else { + TSDebug(PLUGIN_NAME, "added regex=%s with substitution=%s and options `%s'", + regex.c_str(), subst.c_str(), options.c_str()); + cur->set_order(++count); + if (ri->first == NULL) + ri->first = cur; + else + ri->last->set_next(cur); + ri->last = cur; + } + } + } + } + + // Make sure we got something... + if (ri->first == NULL) { + TSError("Got no regular expressions from the maps"); + return TS_ERROR; + } + + return TS_SUCCESS; +} + + +void +TSRemapDeleteInstance(void* ih) +{ + RemapInstance* ri = static_cast<RemapInstance*>(ih); + RemapRegex* re; + RemapRegex* tmp; + + if (ri->profile) { + char now[64]; + const ink_time_t tim = time(NULL); + + if (ink_ctime_r(&tim, now)) + now[strlen(now) - 1] = '\0'; + else { + memcpy(now, "unknown time", 12); + *(now + 12) = '\0'; + } + + fprintf(stderr, "[%s]: Profiling information for regex_remap file `%s':\n", now, (ri->filename).c_str()); + fprintf(stderr, "[%s]:\tTotal hits (matches): %d\n", now, ri->hits); + fprintf(stderr, "[%s]:\tTotal missed (no regex matches): %d\n", now, ri->misses); + + if (ri->hits > 0) { // Avoid divide by zeros... + int ix = 1; + + re = ri->first; + while (re) { + re->print(ix, ri->hits, now); + re = re->next(); + ++ix; + } + } + } + + re = ri->first; + while (re) { + tmp = re; + re = re->next(); + delete tmp; + } + + delete ri; +} + + +/////////////////////////////////////////////////////////////////////////////// +// This is the main "entry" point for the plugin, called for every request. +// +TSRemapStatus +TSRemapDoRemap(void* ih, TSHttpTxn txnp, TSRemapRequestInfo *rri) +{ + if (NULL == ih) { + TSDebug(PLUGIN_NAME, "Falling back to default URL on regex remap without rules"); + return TSREMAP_NO_REMAP; + } + + // Populate the request url + UrlComponents req_url; + req_url.populate(rri); + + RemapInstance* ri = (RemapInstance*)ih; + int ovector[OVECCOUNT]; + int lengths[OVECCOUNT/2 + 1]; + int dest_len; + TSRemapStatus retval = TSREMAP_DID_REMAP; + RemapRegex* re = ri->first; + int match_len = 0; + char *match_buf; + + match_buf = (char*)alloca(req_url.url_len + 32); + + if (ri->method) { // Prepend the URI path or URL with the HTTP method + TSMBuffer mBuf; + TSMLoc reqHttpHdrLoc; + const char *method; + + // Note that Method can not be longer than 16 bytes, or we'll simply truncate it + if (TS_SUCCESS == TSHttpTxnClientReqGet(static_cast<TSHttpTxn>(txnp), &mBuf, &reqHttpHdrLoc)) { + method = TSHttpHdrMethodGet(mBuf, reqHttpHdrLoc, &match_len); + if (method && (match_len > 0)) { + if (match_len > 16) + match_len = 16; + memcpy(match_buf, method, match_len); + } + } + } + + *(match_buf + match_len) = '/'; + if (req_url.path && req_url.path_len > 0) { + memcpy(match_buf + match_len + 1, req_url.path, req_url.path_len); + match_len += (req_url.path_len + 1); + } + + if (ri->matrix_params && req_url.matrix && req_url.matrix_len > 0) { + *(match_buf + match_len) = ';'; + memcpy(match_buf + match_len + 1 , req_url.matrix, req_url.matrix_len); + match_len += (req_url.matrix_len + 1); + } + + if (ri->query_string && req_url.query && req_url.query_len > 0) { + *(match_buf + match_len) = '?'; + memcpy(match_buf + match_len + 1 , req_url.query, req_url.query_len); + match_len += (req_url.query_len + 1); + } + match_buf[match_len] = '\0'; // NULL terminate the match string + TSDebug(PLUGIN_NAME, "Target match string is `%s'", match_buf); + + // Apply the regular expressions, in order. First one wins. + while (re) { + // Since we check substitutions on parse time, we don't need to reset ovector + if (re->is_simple() || (re->match(match_buf, match_len, ovector) != -1)) { + int new_len = re->get_lengths(ovector, lengths, rri, &req_url); + + // Set timeouts + if (re->active_timeout_option() > (-1)) { + TSDebug(PLUGIN_NAME, "Setting active timeout to %d", re->active_timeout_option()); + TSHttpTxnActiveTimeoutSet(txnp, re->active_timeout_option()); + } + if (re->no_activity_timeout_option() > (-1)) { + TSDebug(PLUGIN_NAME, "Setting no activity timeout to %d", re->no_activity_timeout_option()); + TSHttpTxnNoActivityTimeoutSet(txnp, re->no_activity_timeout_option()); + } + if (re->connect_timeout_option() > (-1)) { + TSDebug(PLUGIN_NAME, "Setting connect timeout to %d", re->connect_timeout_option()); + TSHttpTxnConnectTimeoutSet(txnp, re->connect_timeout_option()); + } + if (re->dns_timeout_option() > (-1)) { + TSDebug(PLUGIN_NAME, "Setting DNS timeout to %d", re->dns_timeout_option()); + TSHttpTxnDNSTimeoutSet(txnp, re->dns_timeout_option()); + } + + // Update profiling if requested + if (ri->profile) { + re->increment(); + ink_atomic_increment(&(ri->hits), 1); + } + + if (new_len > 0) { + char* dest; + struct sockaddr const* addr = TSHttpTxnClientAddrGet(txnp); + + dest = (char*)alloca(new_len+8); + dest_len = re->substitute(dest, match_buf, ovector, lengths, rri, &req_url, addr); + + TSDebug(PLUGIN_NAME, "New URL is estimated to be %d bytes long, or less", new_len); + TSDebug(PLUGIN_NAME, "New URL is %s (length %d)", dest, dest_len); + TSDebug(PLUGIN_NAME, " matched rule %d [%s]", re->order(), re->regex()); + + // Check for a quick response, if the status option is set + if (re->status_option() > 0) { + if (re->status_option() != TS_HTTP_STATUS_MOVED_PERMANENTLY && + re->status_option() != TS_HTTP_STATUS_MOVED_TEMPORARILY) { + // Don't set the URL / Location for this. + TSHttpTxnSetHttpRetStatus(txnp, re->status_option()); + break; + } + + TSDebug(PLUGIN_NAME, "Redirecting URL, status=%d", re->status_option()); + TSHttpTxnSetHttpRetStatus(txnp, re->status_option()); + rri->redirect = 1; + } + + // Now parse the new URL, which can also be the redirect URL + if (dest_len > 0) { + const char *start = dest; + + // Setup the new URL + if (TS_PARSE_ERROR == TSUrlParse(rri->requestBufp, rri->requestUrl, &start, start + dest_len)) { + TSHttpTxnSetHttpRetStatus(txnp, TS_HTTP_STATUS_INTERNAL_SERVER_ERROR); + TSError("can't parse substituted URL string"); + } + } + break; + } + } + + // Try the next regex + re = re->next(); + if (re == NULL) { + retval = TSREMAP_NO_REMAP; // No match + if (ri->profile) + ink_atomic_increment(&(ri->misses), 1); + } + } + + return retval; +} + + + +/* + local variables: + mode: C++ + indent-tabs-mode: nil + c-basic-offset: 2 + c-comment-only-line-offset: 0 + c-file-offsets: ((statement-block-intro . +) + (label . 0) + (statement-cont . +) + (innamespace . 0)) + end: + + Indent with: /usr/bin/indent -ncs -nut -npcs -l 120 logstats.cc +*/
