This is an automated email from the ASF dual-hosted git repository. bcall pushed a commit to branch 7.0.x in repository https://git-dual.apache.org/repos/asf/trafficserver.git
commit e06af240e494fbace6a50324193a7c6790733eaa Author: Leif Hedstrom <[email protected]> AuthorDate: Fri Oct 21 19:18:43 2016 -0600 TS-4993: Disables escaping and quotes inside regexes In addition, this cleans up the unit tests a bit, to make it more useful from the command line when testing/debugging. New tests are also added for regular expression parse testing (cherry picked from commit c2dd8840e3fb96e95ba0b693ed5ef9d73f688fa7) --- plugins/header_rewrite/header_rewrite_test.cc | 305 ++++++++++++++++++-------- plugins/header_rewrite/parser.cc | 70 +++--- 2 files changed, 260 insertions(+), 115 deletions(-) diff --git a/plugins/header_rewrite/header_rewrite_test.cc b/plugins/header_rewrite/header_rewrite_test.cc index 6656107..94e9a89 100644 --- a/plugins/header_rewrite/header_rewrite_test.cc +++ b/plugins/header_rewrite/header_rewrite_test.cc @@ -22,6 +22,8 @@ #include <cstdio> #include <cstdarg> +#include <iostream> +#include <ostream> #include "parser.h" @@ -31,209 +33,307 @@ const char PLUGIN_NAME_DBG[] = "TEST_dbg_header_rewrite"; extern "C" void TSError(const char *fmt, ...) { - char buf[2048]; - int bytes = 0; - va_list args; - va_start(args, fmt); - if ((bytes = vsnprintf(buf, sizeof(buf), fmt, args)) > 0) { - fprintf(stderr, "TSError: %s: %.*s\n", PLUGIN_NAME, bytes, buf); - } - va_end(args); } -extern "C" void -TSDebug(const char *tag, const char *fmt, ...) -{ - char buf[2048]; - int bytes = 0; - va_list args; - va_start(args, fmt); - if ((bytes = vsnprintf(buf, sizeof(buf), fmt, args)) > 0) { - fprintf(stdout, "TSDebug: %s: %.*s\n", PLUGIN_NAME, bytes, buf); - } - va_end(args); -} - -#define CHECK_EQ(x, y) \ - do { \ - if ((x) != (y)) { \ - fprintf(stderr, "CHECK FAILED\n"); \ - return 1; \ - } \ - } while (false); - class ParserTest : public Parser { public: - ParserTest(std::string line) : Parser(line) {} + ParserTest(std::string line) : Parser(line), res(true) { std::cout << "Finished parser test: " << line << std::endl; } std::vector<std::string> getTokens() { return _tokens; } + + template <typename T, typename U> + void + do_parser_check(T x, U y, int line = 0) + { + if (x != y) { + std::cerr << "CHECK FAILED on line " << line << ": " << x << " != " << y << std::endl; + res = false; + } + } + + bool res; }; +#define CHECK_EQ(x, y) \ + do { \ + p.do_parser_check((x), (y), __LINE__); \ + } while (false); + +#define END_TEST(s) \ + do { \ + if (!p.res) { \ + ++errors; \ + } \ + } while (false); + int test_parsing() { + int errors = 0; + { ParserTest p("cond %{READ_REQUEST_HDR_HOOK}"); - CHECK_EQ(p.getTokens().size(), 2); + + CHECK_EQ(p.getTokens().size(), 2U); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{READ_REQUEST_HDR_HOOK}"); + + END_TEST(); } { ParserTest p("cond %{CLIENT-HEADER:Host} =a"); - CHECK_EQ(p.getTokens().size(), 4); + + CHECK_EQ(p.getTokens().size(), 4UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{CLIENT-HEADER:Host}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], "a"); + + END_TEST(); } { ParserTest p(" # COMMENT!"); - CHECK_EQ(p.getTokens().size(), 0); + + CHECK_EQ(p.getTokens().size(), 0UL); CHECK_EQ(p.empty(), true); + + END_TEST(); } { ParserTest p("# COMMENT"); - CHECK_EQ(p.getTokens().size(), 0); + + CHECK_EQ(p.getTokens().size(), 0UL); CHECK_EQ(p.empty(), true); + + END_TEST(); } { ParserTest p("cond %{Client-HEADER:Foo} =b"); - CHECK_EQ(p.getTokens().size(), 4); + + CHECK_EQ(p.getTokens().size(), 4UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{Client-HEADER:Foo}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], "b"); + + END_TEST(); } { ParserTest p("cond %{Client-HEADER:Blah} = x"); - CHECK_EQ(p.getTokens().size(), 4); + + CHECK_EQ(p.getTokens().size(), 4UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{Client-HEADER:Blah}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], "x"); + + END_TEST(); } { - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} = \"shouldnt_ exist _anyway\" [AND]"); - CHECK_EQ(p.getTokens().size(), 5); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} = "shouldnt_ exist _anyway" [AND])"); + + CHECK_EQ(p.getTokens().size(), 5UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{CLIENT-HEADER:non_existent_header}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], "shouldnt_ exist _anyway"); CHECK_EQ(p.getTokens()[4], "[AND]"); + + END_TEST(); } { - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} = \"shouldnt_ = _anyway\" [AND]"); - CHECK_EQ(p.getTokens().size(), 5); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} = "shouldnt_ = _anyway" [AND])"); + + CHECK_EQ(p.getTokens().size(), 5UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{CLIENT-HEADER:non_existent_header}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], "shouldnt_ = _anyway"); CHECK_EQ(p.getTokens()[4], "[AND]"); + + END_TEST(); } { - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} =\"=\" [AND]"); - CHECK_EQ(p.getTokens().size(), 5); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} ="=" [AND])"); + + CHECK_EQ(p.getTokens().size(), 5UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{CLIENT-HEADER:non_existent_header}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], "="); CHECK_EQ(p.getTokens()[4], "[AND]"); + + END_TEST(); } { - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} =\"\" [AND]"); - CHECK_EQ(p.getTokens().size(), 5); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} ="" [AND])"); + + CHECK_EQ(p.getTokens().size(), 5UL); CHECK_EQ(p.getTokens()[0], "cond"); CHECK_EQ(p.getTokens()[1], "%{CLIENT-HEADER:non_existent_header}"); CHECK_EQ(p.getTokens()[2], "="); CHECK_EQ(p.getTokens()[3], ""); CHECK_EQ(p.getTokens()[4], "[AND]"); + + END_TEST(); + } + + { + ParserTest p(R"(cond %{PATH} /\/foo\/bar/ [OR])"); + + CHECK_EQ(p.getTokens().size(), 4UL); + CHECK_EQ(p.getTokens()[0], "cond"); + CHECK_EQ(p.getTokens()[1], "%{PATH}"); + CHECK_EQ(p.getTokens()[2], R"(/\/foo\/bar/)"); + CHECK_EQ(p.getTokens()[3], "[OR]"); + + END_TEST(); } { ParserTest p("add-header X-HeaderRewriteApplied true"); - CHECK_EQ(p.getTokens().size(), 3); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "X-HeaderRewriteApplied"); CHECK_EQ(p.getTokens()[2], "true"); + + END_TEST(); } /* backslash-escape */ { - ParserTest p("add-header foo \\ \\=\\<\\>\\\"\\#\\\\"); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header foo \ \=\<\>\"\#\\)"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "foo"); - CHECK_EQ(p.getTokens()[2], " =<>\"#\\"); + CHECK_EQ(p.getTokens()[2], R"( =<>"#\)"); + + END_TEST(); } { - ParserTest p("add-header foo \\<bar\\>"); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header foo \<bar\>)"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "foo"); CHECK_EQ(p.getTokens()[2], "<bar>"); + + END_TEST(); } { - ParserTest p("add-header foo \\bar\\"); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header foo \bar\)"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "foo"); CHECK_EQ(p.getTokens()[2], "bar"); + + END_TEST(); } { - ParserTest p("add-header foo \"bar\""); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header foo "bar")"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "foo"); CHECK_EQ(p.getTokens()[2], "bar"); + + END_TEST(); } { - ParserTest p("add-header foo \"\\\"bar\\\"\""); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header foo "\"bar\"")"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "foo"); - CHECK_EQ(p.getTokens()[2], "\"bar\""); + CHECK_EQ(p.getTokens()[2], R"("bar")"); + + END_TEST(); } { - ParserTest p("add-header foo \"\\\"\\\\\\\"bar\\\\\\\"\\\"\""); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header foo "\"\\\"bar\\\"\"")"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "foo"); - CHECK_EQ(p.getTokens()[2], "\"\\\"bar\\\"\""); + CHECK_EQ(p.getTokens()[2], R"("\"bar\"")"); + + END_TEST(); } { - ParserTest p("add-header Public-Key-Pins \"max-age=3000; pin-sha256=\\\"d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=\\\"\""); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header Public-Key-Pins "max-age=3000; pin-sha256=\"d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=\"")"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "Public-Key-Pins"); - CHECK_EQ(p.getTokens()[2], "max-age=3000; pin-sha256=\"d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=\""); + CHECK_EQ(p.getTokens()[2], R"(max-age=3000; pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=")"); + + END_TEST(); } { - ParserTest p( - "add-header Public-Key-Pins max-age\\=3000;\\ pin-sha256\\=\\\"d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM\\=\\\""); - CHECK_EQ(p.getTokens().size(), 3); + ParserTest p(R"(add-header Public-Key-Pins max-age\=3000;\ pin-sha256\=\"d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM\=\")"); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.getTokens()[0], "add-header"); CHECK_EQ(p.getTokens()[1], "Public-Key-Pins"); - CHECK_EQ(p.getTokens()[2], "max-age=3000; pin-sha256=\"d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=\""); + CHECK_EQ(p.getTokens()[2], R"(max-age=3000; pin-sha256="d6qzRu9zOECb90Uez27xWltNsj0e1Md7GkYYkVoZWmM=")"); + + END_TEST(); + } + + { + ParserTest p(R"(add-header Client-IP "%<chi>")"); + + CHECK_EQ(p.getTokens().size(), 3UL); + CHECK_EQ(p.getTokens()[0], "add-header"); + CHECK_EQ(p.getTokens()[1], "Client-IP"); + CHECK_EQ(p.getTokens()[2], R"(%<chi>)"); + + END_TEST(); + } + + { + ParserTest p(R"(add-header X-Url "http://trafficserver.apache.org/")"); + + CHECK_EQ(p.getTokens().size(), 3UL); + CHECK_EQ(p.getTokens()[0], "add-header"); + CHECK_EQ(p.getTokens()[1], "X-Url"); + CHECK_EQ(p.getTokens()[2], "http://trafficserver.apache.org/"); + + END_TEST(); + } + + { + ParserTest p(R"(add-header X-Url http://trafficserver.apache.org/)"); + + CHECK_EQ(p.getTokens().size(), 3UL); + CHECK_EQ(p.getTokens()[0], "add-header"); + CHECK_EQ(p.getTokens()[1], "X-Url"); + CHECK_EQ(p.getTokens()[2], "http://trafficserver.apache.org/"); + + END_TEST(); } /* @@ -241,54 +341,91 @@ test_parsing() */ { /* unterminated quote */ - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} =\" [AND]"); - CHECK_EQ(p.getTokens().size(), 0); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} =" [AND])"); + + CHECK_EQ(p.getTokens().size(), 0UL); + + END_TEST(); } { /* quote in a token */ - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} =a\"b [AND]"); - CHECK_EQ(p.getTokens().size(), 0); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} =a"b [AND])"); + + CHECK_EQ(p.getTokens().size(), 0UL); + + END_TEST(); } - return 0; + return errors; } int test_processing() { + int errors = 0; /* * These tests are designed to verify that the processing of the parsed input is correct. */ { - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} =\"=\" [AND]"); - CHECK_EQ(p.getTokens().size(), 5); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} ="=" [AND])"); + + CHECK_EQ(p.getTokens().size(), 5UL); CHECK_EQ(p.get_op(), "CLIENT-HEADER:non_existent_header"); CHECK_EQ(p.get_arg(), "=="); CHECK_EQ(p.is_cond(), true); + + END_TEST(); } { - ParserTest p("cond %{CLIENT-HEADER:non_existent_header} = \"shouldnt_ = _anyway\" [AND]"); - CHECK_EQ(p.getTokens().size(), 5); + ParserTest p(R"(cond %{CLIENT-HEADER:non_existent_header} = "shouldnt_ = _anyway" [AND])"); + + CHECK_EQ(p.getTokens().size(), 5UL); CHECK_EQ(p.get_op(), "CLIENT-HEADER:non_existent_header"); CHECK_EQ(p.get_arg(), "=shouldnt_ = _anyway"); CHECK_EQ(p.is_cond(), true); + + END_TEST(); + } + + { + ParserTest p(R"(cond %{PATH} /\.html|\.txt/)"); + + CHECK_EQ(p.getTokens().size(), 3UL); + CHECK_EQ(p.get_op(), "PATH"); + CHECK_EQ(p.get_arg(), R"(/\.html|\.txt/)"); + CHECK_EQ(p.is_cond(), true); + + END_TEST(); + } + + { + ParserTest p(R"(cond %{PATH} /\/foo\/bar/)"); + + CHECK_EQ(p.getTokens().size(), 3UL); + CHECK_EQ(p.get_op(), "PATH"); + CHECK_EQ(p.get_arg(), R"(/\/foo\/bar/)"); + CHECK_EQ(p.is_cond(), true); + + END_TEST(); } { ParserTest p("add-header X-HeaderRewriteApplied true"); - CHECK_EQ(p.getTokens().size(), 3); + + CHECK_EQ(p.getTokens().size(), 3UL); CHECK_EQ(p.get_op(), "add-header"); CHECK_EQ(p.get_arg(), "X-HeaderRewriteApplied"); - CHECK_EQ(p.get_value(), "true") + CHECK_EQ(p.get_value(), "true"); CHECK_EQ(p.is_cond(), false); + + END_TEST(); } - return 0; + return errors; } - int -tests() +main() { if (test_parsing() || test_processing()) { return 1; @@ -296,9 +433,3 @@ tests() return 0; } - -int -main() -{ - return tests(); -} diff --git a/plugins/header_rewrite/parser.cc b/plugins/header_rewrite/parser.cc index ba51c9e..28a78a2 100644 --- a/plugins/header_rewrite/parser.cc +++ b/plugins/header_rewrite/parser.cc @@ -28,50 +28,63 @@ #include "parser.h" +enum ParserState { PARSER_DEFAULT, PARSER_IN_QUOTE, PARSER_IN_REGEX }; + Parser::Parser(const std::string &original_line) : _cond(false), _empty(false) { - TSDebug(PLUGIN_NAME_DBG, "Calling CTOR for Parser"); std::string line = original_line; - bool inquote = false; + ParserState state = PARSER_DEFAULT; bool extracting_token = false; off_t cur_token_start = 0; size_t cur_token_length = 0; for (size_t i = 0; i < line.size(); ++i) { - if (!inquote && (std::isspace(line[i]) || (line[i] == '=' || line[i] == '>' || line[i] == '<'))) { + if ((state == PARSER_DEFAULT) && (std::isspace(line[i]) || ((line[i] == '=') || (line[i] == '>') || (line[i] == '<')))) { if (extracting_token) { cur_token_length = i - cur_token_start; - - if (cur_token_length) { + if (cur_token_length > 0) { _tokens.push_back(line.substr(cur_token_start, cur_token_length)); } - extracting_token = false; + state = PARSER_DEFAULT; } else if (!std::isspace(line[i])) { - /* we got a standalone =, > or < */ + // we got a standalone =, > or < _tokens.push_back(std::string(1, line[i])); } - continue; /* always eat whitespace */ - } else if (line[i] == '\\') { + } else if ((state != PARSER_IN_QUOTE) && (line[i] == '/')) { + // Deal with regexes, nothing gets escaped / quoted in here + if ((state != PARSER_IN_REGEX) && !extracting_token) { + state = PARSER_IN_REGEX; + extracting_token = true; + cur_token_start = i; + } else if ((state == PARSER_IN_REGEX) && extracting_token && (line[i - 1] != '\\')) { + cur_token_length = i - cur_token_start + 1; + _tokens.push_back(line.substr(cur_token_start, cur_token_length)); + state = PARSER_DEFAULT; + extracting_token = false; + } else if (!extracting_token) { + // /'s elsewhere are just consumed, but we should not end up here if we're not extracting a token + TSError("[%s] malformed regex \"%s\" ignoring...", PLUGIN_NAME, line.c_str()); + } + } else if ((state != PARSER_IN_REGEX) && (line[i] == '\\')) { + // Escaping if (!extracting_token) { extracting_token = true; cur_token_start = i; } line.erase(i, 1); - continue; - } else if (line[i] == '"') { - if (!inquote && !extracting_token) { - inquote = true; + } else if ((state != PARSER_IN_REGEX) && (line[i] == '"')) { + if ((state != PARSER_IN_QUOTE) && !extracting_token) { + state = PARSER_IN_QUOTE; extracting_token = true; - cur_token_start = i + 1; /* eat the leading quote */ - continue; - } else if (inquote && extracting_token) { + cur_token_start = i + 1; // Eat the leading quote + } else if ((state == PARSER_IN_QUOTE) && extracting_token) { cur_token_length = i - cur_token_start; _tokens.push_back(line.substr(cur_token_start, cur_token_length)); - inquote = false; + state = PARSER_DEFAULT; extracting_token = false; } else { - /* malformed */ + // Malformed expression / operation, ignore ... TSError("[%s] malformed line \"%s\" ignoring...", PLUGIN_NAME, line.c_str()); _tokens.clear(); _empty = true; @@ -84,8 +97,8 @@ Parser::Parser(const std::string &original_line) : _cond(false), _empty(false) break; } - if (line[i] == '=' || line[i] == '>' || line[i] == '<') { - /* these are always a seperate token */ + if ((line[i] == '=') || (line[i] == '>') || (line[i] == '<')) { + // These are always a seperate token _tokens.push_back(std::string(1, line[i])); continue; } @@ -96,15 +109,15 @@ Parser::Parser(const std::string &original_line) : _cond(false), _empty(false) } if (extracting_token) { - if (inquote) { + if (state != PARSER_IN_QUOTE) { + /* we hit the end of the line while parsing a token, let's add it */ + _tokens.push_back(line.substr(cur_token_start)); + } else { // unterminated quote, error case. TSError("[%s] malformed line, unterminated quotation: \"%s\" ignoring...", PLUGIN_NAME, line.c_str()); _tokens.clear(); _empty = true; return; - } else { - /* we hit the end of the line while parsing a token, let's add it */ - _tokens.push_back(line.substr(cur_token_start)); } } @@ -133,14 +146,15 @@ Parser::preprocess(std::vector<std::string> tokens) std::string s = tokens[0].substr(2, tokens[0].size() - 3); _op = s; - if (tokens.size() > 2 && (tokens[1][0] == '=' || tokens[1][0] == '>' || tokens[1][0] == '<')) { // cond + (=/</>) + argument + if (tokens.size() > 2 && (tokens[1][0] == '=' || tokens[1][0] == '>' || tokens[1][0] == '<')) { + // cond + [=<>] + argument _arg = tokens[1] + tokens[2]; } else if (tokens.size() > 1) { + // This is for the regular expression, which for some reason has its own handling?? ToDo: Why ? _arg = tokens[1]; } else { - { - _arg = ""; - } + // This would be for hook conditions, which has no argument. + _arg = ""; } } else { TSError("[%s] conditions must be embraced in %%{}", PLUGIN_NAME); -- To stop receiving notification emails like this one, please contact "[email protected]" <[email protected]>.
