This is an automated email from the ASF dual-hosted git repository. cmcfarlen pushed a commit to branch 10.0.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 954fb89da9bf6c8331470b0a651fda4d5640e1be Author: Leif Hedstrom <[email protected]> AuthorDate: Thu Mar 28 12:28:17 2024 -0600 This adds support for NOCASE on string compare / regex (#11152) (cherry picked from commit 0fef5a3450a4aaf21a465300d0e15c2d4d43f5fa) --- doc/admin-guide/plugins/header_rewrite.en.rst | 2 + plugins/header_rewrite/CMakeLists.txt | 1 + plugins/header_rewrite/condition.cc | 8 +++ plugins/header_rewrite/condition.h | 17 +++--- plugins/header_rewrite/conditions.cc | 43 ++++++++------- plugins/header_rewrite/matcher.cc | 77 +++++++++++++++++++++++++++ plugins/header_rewrite/matcher.h | 70 +++++++++++------------- plugins/header_rewrite/regex_helper.cc | 4 +- plugins/header_rewrite/regex_helper.h | 2 +- 9 files changed, 150 insertions(+), 74 deletions(-) diff --git a/doc/admin-guide/plugins/header_rewrite.en.rst b/doc/admin-guide/plugins/header_rewrite.en.rst index c45f14c536..4068140c83 100644 --- a/doc/admin-guide/plugins/header_rewrite.en.rst +++ b/doc/admin-guide/plugins/header_rewrite.en.rst @@ -605,6 +605,8 @@ AND Indicates that both the current condition and the next must be true. NOT Inverts the condition. OR Indicates that either the current condition or the next one must be true, as contrasted with the default behavior from ``[AND]``. +NOCASE Indicates that the string comparison, or regular expression, should be + case-insensitive. The default is to be case-sensitive. ====== ======================================================================== Operators diff --git a/plugins/header_rewrite/CMakeLists.txt b/plugins/header_rewrite/CMakeLists.txt index d5b53916ae..0e9f04ab8f 100644 --- a/plugins/header_rewrite/CMakeLists.txt +++ b/plugins/header_rewrite/CMakeLists.txt @@ -22,6 +22,7 @@ add_atsplugin( factory.cc header_rewrite.cc lulu.cc + matcher.cc operator.cc operators.cc parser.cc diff --git a/plugins/header_rewrite/condition.cc b/plugins/header_rewrite/condition.cc index 717742d8c3..b3e87a4d92 100644 --- a/plugins/header_rewrite/condition.cc +++ b/plugins/header_rewrite/condition.cc @@ -75,6 +75,14 @@ Condition::initialize(Parser &p) _mods = static_cast<CondModifiers>(_mods | COND_NOT); } + // The NOCASE / CASE modifier is a bit special, since it ripples down into the Matchers for + // strings and regexes. + if (p.mod_exist("NOCASE")) { + _mods = static_cast<CondModifiers>(_mods | COND_NOCASE); + } else if (p.mod_exist("CASE")) { + // Nothing to do, this is the default + } + if (p.mod_exist("L")) { _mods = static_cast<CondModifiers>(_mods | COND_LAST); } diff --git a/plugins/header_rewrite/condition.h b/plugins/header_rewrite/condition.h index 9758088cde..2b7185e4b6 100644 --- a/plugins/header_rewrite/condition.h +++ b/plugins/header_rewrite/condition.h @@ -30,17 +30,6 @@ #include "matcher.h" #include "parser.h" -// Condition modifiers -enum CondModifiers { - COND_NONE = 0, - COND_OR = 1, - COND_AND = 2, - COND_NOT = 4, - COND_NOCASE = 8, // Not implemented - COND_LAST = 16, - COND_CHAIN = 32 // Not implemented -}; - /////////////////////////////////////////////////////////////////////////////// // Base class for all Conditions (this is also the interface) // @@ -93,6 +82,12 @@ public: return _mods & COND_LAST; } + CondModifiers + mods() const + { + return _mods; + } + // Setters virtual void set_qualifier(const std::string &q) diff --git a/plugins/header_rewrite/conditions.cc b/plugins/header_rewrite/conditions.cc index 8973087a46..1b21805a83 100644 --- a/plugins/header_rewrite/conditions.cc +++ b/plugins/header_rewrite/conditions.cc @@ -41,7 +41,7 @@ ConditionStatus::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(static_cast<TSHttpStatus>(strtol(p.get_arg().c_str(), nullptr, 10))); + match->set(static_cast<TSHttpStatus>(strtol(p.get_arg().c_str(), nullptr, 10)), mods()); _matcher = match; require_resources(RSRC_SERVER_RESPONSE_HEADERS); @@ -77,7 +77,7 @@ ConditionMethod::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; require_resources(RSRC_CLIENT_REQUEST_HEADERS); @@ -123,7 +123,7 @@ ConditionRandom::initialize(Parser &p) _seed = getpid() * tv.tv_usec; _max = strtol(_qualifier.c_str(), nullptr, 10); - match->set(static_cast<unsigned int>(strtol(p.get_arg().c_str(), nullptr, 10))); + match->set(static_cast<unsigned int>(strtol(p.get_arg().c_str(), nullptr, 10)), mods()); _matcher = match; } @@ -195,7 +195,7 @@ ConditionHeader::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; require_resources(RSRC_CLIENT_REQUEST_HEADERS); @@ -259,7 +259,7 @@ ConditionUrl::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } @@ -367,6 +367,7 @@ ConditionUrl::eval(const Resources &res) std::string s; append_value(s, res); + return static_cast<const Matchers<std::string> *>(_matcher)->test(s); } @@ -377,7 +378,7 @@ ConditionDBM::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; std::string::size_type pos = _qualifier.find_first_of(','); @@ -442,7 +443,7 @@ ConditionCookie::initialize(Parser &p) MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; require_resources(RSRC_CLIENT_REQUEST_HEADERS); @@ -527,7 +528,7 @@ ConditionIp::initialize(Parser &p) } else { MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } } @@ -625,7 +626,7 @@ ConditionTransactCount::initialize(Parser &p) MatcherType *match = new MatcherType(_cond_op); std::string const &arg = p.get_arg(); - match->set(strtol(arg.c_str(), nullptr, 10)); + match->set(strtol(arg.c_str(), nullptr, 10), mods()); _matcher = match; } @@ -715,7 +716,7 @@ ConditionNow::initialize(Parser &p) MatcherType *match = new MatcherType(_cond_op); - match->set(static_cast<int64_t>(strtol(p.get_arg().c_str(), nullptr, 10))); + match->set(static_cast<int64_t>(strtol(p.get_arg().c_str(), nullptr, 10)), mods()); _matcher = match; } @@ -788,13 +789,13 @@ ConditionGeo::initialize(Parser &p) if (is_int_type()) { Matchers<int64_t> *match = new Matchers<int64_t>(_cond_op); - match->set(static_cast<int64_t>(strtol(p.get_arg().c_str(), nullptr, 10))); + match->set(static_cast<int64_t>(strtol(p.get_arg().c_str(), nullptr, 10)), mods()); _matcher = match; } else { // The default is to have a string matcher Matchers<std::string> *match = new Matchers<std::string>(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } } @@ -869,13 +870,13 @@ ConditionId::initialize(Parser &p) if (_id_qual == ID_QUAL_REQUEST) { Matchers<uint64_t> *match = new Matchers<uint64_t>(_cond_op); - match->set(static_cast<uint64_t>(strtol(p.get_arg().c_str(), nullptr, 10))); + match->set(static_cast<uint64_t>(strtol(p.get_arg().c_str(), nullptr, 10)), mods()); _matcher = match; } else { // The default is to have a string matcher Matchers<std::string> *match = new Matchers<std::string>(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } } @@ -952,7 +953,7 @@ ConditionCidr::initialize(Parser &p) MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } @@ -1061,7 +1062,7 @@ ConditionInbound::initialize(Parser &p) } else { MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } } @@ -1231,7 +1232,7 @@ ConditionSessionTransactCount::initialize(Parser &p) MatcherType *match = new MatcherType(_cond_op); std::string const &arg = p.get_arg(); - match->set(strtol(arg.c_str(), nullptr, 10)); + match->set(strtol(arg.c_str(), nullptr, 10), mods()); _matcher = match; } @@ -1265,7 +1266,7 @@ ConditionTcpInfo::initialize(Parser &p) MatcherType *match = new MatcherType(_cond_op); std::string const &arg = p.get_arg(); - match->set(strtol(arg.c_str(), nullptr, 10)); + match->set(strtol(arg.c_str(), nullptr, 10), mods()); _matcher = match; } @@ -1336,7 +1337,7 @@ ConditionCache::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } @@ -1384,7 +1385,7 @@ ConditionNextHop::initialize(Parser &p) Condition::initialize(p); MatcherType *match = new MatcherType(_cond_op); - match->set(p.get_arg()); + match->set(p.get_arg(), mods()); _matcher = match; } @@ -1424,6 +1425,8 @@ bool ConditionNextHop::eval(const Resources &res) { std::string s; + append_value(s, res); + return static_cast<const Matchers<std::string> *>(_matcher)->test(s); } diff --git a/plugins/header_rewrite/matcher.cc b/plugins/header_rewrite/matcher.cc new file mode 100644 index 0000000000..450c331ab8 --- /dev/null +++ b/plugins/header_rewrite/matcher.cc @@ -0,0 +1,77 @@ +/* @file + + Implementation for creating all values. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <string> +#include <algorithm> + +#include "matcher.h" + +// Special case for strings, to make the distinction between regexes and string matching +template <> +void +Matchers<std::string>::set(const std::string &d, CondModifiers mods) +{ + _data = d; + if (mods & COND_NOCASE) { + _nocase = true; + } + + if (_op == MATCH_REGULAR_EXPRESSION) { + if (!_reHelper.setRegexMatch(_data, _nocase)) { + std::stringstream ss; + + ss << _data; + TSError("[%s] Invalid regex: failed to precompile: %s", PLUGIN_NAME, ss.str().c_str()); + Dbg(pi_dbg_ctl, "Invalid regex: failed to precompile: %s", ss.str().c_str()); + throw std::runtime_error("Malformed regex"); + } else { + Dbg(pi_dbg_ctl, "Regex precompiled successfully"); + } + } +} + +// Special case for strings, to allow for insensitive case comparisons for std::string matchers. +template <> +bool +Matchers<std::string>::test_eq(const std::string &t) const +{ + bool r = false; + + if (_data.length() == t.length()) { + if (_nocase) { + // ToDo: in C++20, this would be nicer with std::range, e.g. + // r = std::ranges::equal(_data, t, [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); }); + r = std::equal(_data.begin(), _data.end(), t.begin(), [](char c1, char c2) { + return std::tolower(static_cast<unsigned char>(c1)) == std::tolower(static_cast<unsigned char>(c2)); + }); + } else { + r = (t == _data); + } + } + + if (pi_dbg_ctl.on()) { + debug_helper(t, " == ", r); + } + + return r; +} diff --git a/plugins/header_rewrite/matcher.h b/plugins/header_rewrite/matcher.h index 4b21592106..a40f2e6d13 100644 --- a/plugins/header_rewrite/matcher.h +++ b/plugins/header_rewrite/matcher.h @@ -41,6 +41,17 @@ enum MatcherOps { MATCH_IP_RANGES, }; +// Condition modifiers +enum CondModifiers { + COND_NONE = 0, + COND_OR = 1, + COND_AND = 2, + COND_NOT = 4, + COND_NOCASE = 8, + COND_LAST = 16, + COND_CHAIN = 32 // Not implemented +}; + /////////////////////////////////////////////////////////////////////////////// // Base class for all Matchers (this is also the interface) // @@ -77,36 +88,11 @@ public: }; void - setRegex(const std::string & /* data ATS_UNUSED */) - { - if (!reHelper.setRegexMatch(_data)) { - std::stringstream ss; - ss << _data; - TSError("[%s] Invalid regex: failed to precompile: %s", PLUGIN_NAME, ss.str().c_str()); - Dbg(pi_dbg_ctl, "Invalid regex: failed to precompile: %s", ss.str().c_str()); - throw std::runtime_error("Malformed regex"); - } else { - Dbg(pi_dbg_ctl, "Regex precompiled successfully"); - } - } - - void - setRegex(const unsigned int /* t ATS_UNUSED */) - { - return; - } - void - setRegex(const TSHttpStatus /* t ATS_UNUSED */) - { - return; - } - - void - set(const T &d) + set(const T &d, CondModifiers mods) { _data = d; - if (_op == MATCH_REGULAR_EXPRESSION) { - setRegex(d); + if (mods & COND_NOCASE) { + _nocase = true; } } @@ -158,6 +144,7 @@ private: if (pi_dbg_ctl.on()) { debug_helper(t, " == ", r); } + return r; } @@ -169,6 +156,7 @@ private: if (pi_dbg_ctl.on()) { debug_helper(t, " < ", r); } + return r; } @@ -180,6 +168,7 @@ private: if (pi_dbg_ctl.on()) { debug_helper(t, " > ", r); } + return r; } @@ -198,24 +187,25 @@ private: } bool - test_reg(const std::string &t) const + test_reg(const std::string &t, bool nocase = false) const { int ovector[OVECCOUNT]; - Dbg(pi_dbg_ctl, "Test regular expression %s : %s", _data.c_str(), t.c_str()); - if (reHelper.regexMatch(t.c_str(), t.length(), ovector) > 0) { + Dbg(pi_dbg_ctl, "Test regular expression %s : %s (NOCASE = %d)", _data.c_str(), t.c_str(), static_cast<int>(_nocase)); + if (_reHelper.regexMatch(t.c_str(), t.length(), ovector) > 0) { Dbg(pi_dbg_ctl, "Successfully found regular expression match"); return true; } + return false; } T _data; - regexHelper reHelper; + regexHelper _reHelper; + bool _nocase = false; }; // Specialized case matcher for the IP addresses matches. -// ToDo: we should specialize the regex matcher as well. template <> class Matchers<const sockaddr *> : public Matcher { public: @@ -236,16 +226,16 @@ public: bool test(const sockaddr *addr) const { - if (ipHelper.contains(swoc::IPAddr(addr))) { + if (_ipHelper.contains(swoc::IPAddr(addr))) { if (pi_dbg_ctl.on()) { char text[INET6_ADDRSTRLEN]; Dbg(pi_dbg_ctl, "Successfully found IP-range match on %s", getIP(addr, text)); } return true; - } else { - return false; } + + return false; } private: @@ -254,12 +244,12 @@ private: { while (text) { if (swoc::IPRange r; r.load(text.take_prefix_at(','))) { - ipHelper.mark(r); + _ipHelper.mark(r); } } - if (ipHelper.count() > 0) { - Dbg(pi_dbg_ctl, " Added %zu IP ranges while parsing", ipHelper.count()); + if (_ipHelper.count() > 0) { + Dbg(pi_dbg_ctl, " Added %zu IP ranges while parsing", _ipHelper.count()); return true; } else { Dbg(pi_dbg_ctl, " No IP ranges added, possibly bad input"); @@ -267,5 +257,5 @@ private: } } - swoc::IPRangeSet ipHelper; + swoc::IPRangeSet _ipHelper; }; diff --git a/plugins/header_rewrite/regex_helper.cc b/plugins/header_rewrite/regex_helper.cc index 8b30361123..ba9fac21c2 100644 --- a/plugins/header_rewrite/regex_helper.cc +++ b/plugins/header_rewrite/regex_helper.cc @@ -18,14 +18,14 @@ #include "regex_helper.h" bool -regexHelper::setRegexMatch(const std::string &s) +regexHelper::setRegexMatch(const std::string &s, bool nocase) { const char *errorComp = nullptr; const char *errorStudy = nullptr; int erroffset; regexString = s; - regex = pcre_compile(regexString.c_str(), 0, &errorComp, &erroffset, nullptr); + regex = pcre_compile(regexString.c_str(), nocase ? PCRE_CASELESS : 0, &errorComp, &erroffset, nullptr); if (regex == nullptr) { return false; diff --git a/plugins/header_rewrite/regex_helper.h b/plugins/header_rewrite/regex_helper.h index d156fee730..383ba9efa3 100644 --- a/plugins/header_rewrite/regex_helper.h +++ b/plugins/header_rewrite/regex_helper.h @@ -38,7 +38,7 @@ public: pcre_free(regexExtra); } - bool setRegexMatch(const std::string &s); + bool setRegexMatch(const std::string &s, bool nocase = false); int regexMatch(const char *, int, int ovector[]) const; private:
