This is an automated email from the ASF dual-hosted git repository.
amc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git
The following commit(s) were added to refs/heads/master by this push:
new 0be4e73 MIMEScanner: Make MIMEScanner a class, not a POD with free
functions.
0be4e73 is described below
commit 0be4e7326b89670dea1ffe4c7f21c36bfd2bdf59
Author: Alan M. Carroll <[email protected]>
AuthorDate: Wed Feb 6 11:10:22 2019 -0600
MIMEScanner: Make MIMEScanner a class, not a POD with free functions.
---
proxy/hdrs/HTTP.cc | 27 +--
proxy/hdrs/HdrTest.cc | 79 ---------
proxy/hdrs/HdrTest.h | 1 -
proxy/hdrs/MIME.cc | 303 ++++++++++++--------------------
proxy/hdrs/MIME.h | 82 +++++++--
proxy/hdrs/Makefile.am | 1 +
proxy/hdrs/unit_tests/test_Hdrs.cc | 86 +++++++++
proxy/hdrs/unit_tests/unit_test_main.cc | 21 ++-
8 files changed, 305 insertions(+), 295 deletions(-)
diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc
index 41bf41b..eebf4ad 100644
--- a/proxy/hdrs/HTTP.cc
+++ b/proxy/hdrs/HTTP.cc
@@ -894,17 +894,21 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap,
HTTPHdrImpl *hh, const
const char *version_start;
const char *version_end;
+ ts::TextView text, parsed;
+
real_end = end;
start:
hh->m_polarity = HTTP_TYPE_REQUEST;
// Make sure the line is not longer than 64K
- if (scanner->m_line_length >= UINT16_MAX) {
+ if (scanner->get_buffered_line_size() >= UINT16_MAX) {
return PARSE_RESULT_ERROR;
}
- err = mime_scanner_get(scanner, start, real_end, &line_start, &end,
&line_is_real, eof, MIME_SCANNER_TYPE_LINE);
+ text.assign(*start, real_end);
+ err = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::LINE);
+ *start = text.data();
if (err < 0) {
return err;
}
@@ -917,9 +921,9 @@ http_parser_parse_req(HTTPParser *parser, HdrHeap *heap,
HTTPHdrImpl *hh, const
return err;
}
- cur = line_start;
- ink_assert((end - cur) >= 0);
- ink_assert((end - cur) < UINT16_MAX);
+ ink_assert(parsed.size() < UINT16_MAX);
+ line_start = cur = parsed.data();
+ end = parsed.data_end();
must_copy_strings = (must_copy_strings || (!line_is_real));
@@ -1244,11 +1248,14 @@ http_parser_parse_resp(HTTPParser *parser, HdrHeap
*heap, HTTPHdrImpl *hh, const
hh->m_polarity = HTTP_TYPE_RESPONSE;
// Make sure the line is not longer than 64K
- if (scanner->m_line_length >= UINT16_MAX) {
+ if (scanner->get_buffered_line_size() >= UINT16_MAX) {
return PARSE_RESULT_ERROR;
}
- err = mime_scanner_get(scanner, start, real_end, &line_start, &end,
&line_is_real, eof, MIME_SCANNER_TYPE_LINE);
+ ts::TextView text{*start, real_end};
+ ts::TextView parsed;
+ err = scanner->get(text, parsed, line_is_real, eof, MIMEScanner::LINE);
+ *start = text.data();
if (err < 0) {
return err;
}
@@ -1256,9 +1263,9 @@ http_parser_parse_resp(HTTPParser *parser, HdrHeap *heap,
HTTPHdrImpl *hh, const
return err;
}
- cur = line_start;
- ink_assert((end - cur) >= 0);
- ink_assert((end - cur) < UINT16_MAX);
+ ink_assert(parsed.size() < UINT16_MAX);
+ line_start = cur = parsed.data();
+ end = parsed.data_end();
must_copy_strings = (must_copy_strings || (!line_is_real));
diff --git a/proxy/hdrs/HdrTest.cc b/proxy/hdrs/HdrTest.cc
index ee3f93d..875e4e1 100644
--- a/proxy/hdrs/HdrTest.cc
+++ b/proxy/hdrs/HdrTest.cc
@@ -74,7 +74,6 @@ HdrTest::go(RegressionTest *t, int /* atype ATS_UNUSED */)
status = status & test_url();
status = status & test_arena();
status = status & test_regex();
- status = status & test_http_parser_eos_boundary_cases();
status = status & test_http_mutation();
status = status & test_mime();
status = status & test_http();
@@ -521,84 +520,6 @@ HdrTest::test_mime()
-------------------------------------------------------------------------*/
int
-HdrTest::test_http_parser_eos_boundary_cases()
-{
- struct {
- const char *msg;
- int expected_result;
- int expected_bytes_consumed;
- } tests[] = {
- {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
- {"GET /index.html HTTP/1.0\r\n\r\n***BODY****", PARSE_RESULT_DONE, 28},
- {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n***BODY****",
PARSE_RESULT_DONE, 48},
- {"GET", PARSE_RESULT_ERROR, 3},
- {"GET /index.html", PARSE_RESULT_ERROR, 15},
- {"GET /index.html\r\n", PARSE_RESULT_ERROR, 17},
- {"GET /index.html HTTP/1.0", PARSE_RESULT_ERROR, 24},
- {"GET /index.html HTTP/1.0\r", PARSE_RESULT_ERROR, 25},
- {"GET /index.html HTTP/1.0\n", PARSE_RESULT_DONE, 25},
- {"GET /index.html HTTP/1.0\n\n", PARSE_RESULT_DONE, 26},
- {"GET /index.html HTTP/1.0\r\n\r\n", PARSE_RESULT_DONE, 28},
- {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar", PARSE_RESULT_ERROR, 44},
- {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\n", PARSE_RESULT_DONE,
45},
- {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE,
46},
- {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n",
PARSE_RESULT_DONE, 48},
- {"GET /index.html HTTP/1.0\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 44},
- {"GET /index.html HTTP/1.0\nUser-Agent: foobar\nBoo: foo\n",
PARSE_RESULT_DONE, 53},
- {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE,
46},
- {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
- {"", PARSE_RESULT_ERROR, 0},
- {nullptr, 0, 0},
- };
-
- int i, ret, bytes_consumed;
- const char *orig_start;
- const char *start;
- const char *end;
- HTTPParser parser;
-
- int failures = 0;
-
- bri_box("test_http_parser_eos_boundary_cases");
-
- http_parser_init(&parser);
-
- for (i = 0; tests[i].msg != nullptr; i++) {
- HTTPHdr req_hdr;
-
- start = tests[i].msg;
- end = start + strlen(start); // 1 character past end of string
-
- req_hdr.create(HTTP_TYPE_REQUEST);
-
- http_parser_clear(&parser);
-
- orig_start = start;
- ret = req_hdr.parse_req(&parser, &start, end, true);
- bytes_consumed = (int)(start - orig_start);
-
- printf("======== test %d (length=%d, consumed=%d)\n", i,
(int)strlen(tests[i].msg), bytes_consumed);
- printf("[%s]\n", tests[i].msg);
- printf("\n[");
- req_hdr.print(nullptr, 0, nullptr, nullptr);
- printf("]\n\n");
-
- if ((ret != tests[i].expected_result) || (bytes_consumed !=
tests[i].expected_bytes_consumed)) {
- ++failures;
- printf("FAILED: test %d: retval <expected %d, got %d>, eaten <expected
%d, got %d>\n\n", i, tests[i].expected_result, ret,
- tests[i].expected_bytes_consumed, bytes_consumed);
- }
-
- req_hdr.destroy();
- }
-
- return (failures_to_status("test_http_parser_eos_boundary_cases", failures));
-}
-
-/*-------------------------------------------------------------------------
- -------------------------------------------------------------------------*/
-
-int
HdrTest::test_http_aux(const char *request, const char *response)
{
int err;
diff --git a/proxy/hdrs/HdrTest.h b/proxy/hdrs/HdrTest.h
index 91a979c..7f8dc83 100644
--- a/proxy/hdrs/HdrTest.h
+++ b/proxy/hdrs/HdrTest.h
@@ -51,7 +51,6 @@ private:
int test_parse_date();
int test_format_date();
int test_url();
- int test_http_parser_eos_boundary_cases();
int test_arena();
int test_regex();
int test_accept_language_match();
diff --git a/proxy/hdrs/MIME.cc b/proxy/hdrs/MIME.cc
index 81c4622..d130b20 100644
--- a/proxy/hdrs/MIME.cc
+++ b/proxy/hdrs/MIME.cc
@@ -34,6 +34,8 @@
#include "HdrUtils.h"
#include "HttpCompat.h"
+using ts::TextView;
+
/***********************************************************************
* *
* C O M P I L E O P T I O N S *
@@ -2302,215 +2304,161 @@ MIMEHdr::get_host_port_values(const char **host_ptr,
///< Pointer to host.
* P A R S E R *
* *
***********************************************************************/
-void
-_mime_scanner_init(MIMEScanner *scanner)
-{
- scanner->m_line = nullptr;
- scanner->m_line_size = 0;
- scanner->m_line_length = 0;
- scanner->m_state = MIME_PARSE_BEFORE;
-}
-//////////////////////////////////////////////////////
-// init first time structure setup //
-// clear resets an already-initialized structure //
-//////////////////////////////////////////////////////
void
-mime_scanner_init(MIMEScanner *scanner)
+MIMEScanner::init()
{
- _mime_scanner_init(scanner);
+ m_state = INITIAL_PARSE_STATE;
+ // Ugly, but required because of how proxy allocation works - that leaves
the instance in a
+ // random state, so even assigning to it can crash. Because this method
substitutes for a real
+ // constructor in the proxy allocation system, call the CTOR here. Any
memory that gets allocated
+ // is supposed to be cleaned up by calling @c clear on this object.
+ new (&m_line) std::string;
}
-// clear is to reset an already initialized structure
-void
-mime_scanner_clear(MIMEScanner *scanner)
+MIMEScanner &
+MIMEScanner::append(TextView text)
{
- ats_free(scanner->m_line);
- _mime_scanner_init(scanner);
-}
-
-void
-mime_scanner_append(MIMEScanner *scanner, const char *data, int data_size)
-{
- int free_size = scanner->m_line_size - scanner->m_line_length;
-
- //////////////////////////////////////////////////////
- // if not enough space, allocate or grow the buffer //
- //////////////////////////////////////////////////////
- if (data_size > free_size) { // need to allocate/grow the buffer
- if (scanner->m_line_size == 0) { // buffer should be at least 128 bytes
- scanner->m_line_size = 128;
- }
-
- while (free_size < data_size) { // grow buffer by powers of 2
- scanner->m_line_size *= 2;
- free_size = scanner->m_line_size - scanner->m_line_length;
- }
-
- if (scanner->m_line == nullptr) { // if no buffer yet, allocate one
- scanner->m_line = (char *)ats_malloc(scanner->m_line_size);
- } else {
- scanner->m_line = (char *)ats_realloc(scanner->m_line,
scanner->m_line_size);
- }
- }
- ////////////////////////////////////////////////
- // append new data onto the end of the buffer //
- ////////////////////////////////////////////////
-
- memcpy(&(scanner->m_line[scanner->m_line_length]), data, data_size);
- scanner->m_line_length += data_size;
+ m_line += text;
+ return *this;
}
ParseResult
-mime_scanner_get(MIMEScanner *S, const char **raw_input_s, const char
*raw_input_e, const char **output_s, const char **output_e,
- bool *output_shares_raw_input,
- bool raw_input_eof, ///< All data has been received for this
header.
- int raw_input_scan_type)
+MIMEScanner::get(TextView &input, TextView &output, bool &output_shares_input,
bool eof_p, ScanType scan_type)
{
- const char *raw_input_c, *lf_ptr;
ParseResult zret = PARSE_RESULT_CONT;
// Need this for handling dangling CR.
- static const char RAW_CR = ParseRules::CHAR_CR;
-
- ink_assert((raw_input_s != nullptr) && (*raw_input_s != nullptr));
- ink_assert(raw_input_e != nullptr);
+ static const char RAW_CR{ParseRules::CHAR_CR};
- raw_input_c = *raw_input_s;
-
- while (PARSE_RESULT_CONT == zret && raw_input_c < raw_input_e) {
- ptrdiff_t runway = raw_input_e - raw_input_c; // remaining input.
- switch (S->m_state) {
+ auto text = input;
+ while (PARSE_RESULT_CONT == zret && !text.empty()) {
+ switch (m_state) {
case MIME_PARSE_BEFORE: // waiting to find a field.
- if (ParseRules::is_cr(*raw_input_c)) {
- ++raw_input_c;
- if (runway >= 2 && ParseRules::is_lf(*raw_input_c)) {
+ if (ParseRules::is_cr(*text)) {
+ ++text;
+ if (!text.empty() && ParseRules::is_lf(*text)) {
// optimize a bit - this happens >99% of the time after a CR.
- ++raw_input_c;
+ ++text;
zret = PARSE_RESULT_DONE;
} else {
- S->m_state = MIME_PARSE_FOUND_CR;
+ m_state = MIME_PARSE_FOUND_CR;
}
- } else if (ParseRules::is_lf(*raw_input_c)) {
- ++raw_input_c;
+ } else if (ParseRules::is_lf(*text)) {
+ ++text;
zret = PARSE_RESULT_DONE; // Required by regression test.
} else {
// consume this character in the next state.
- S->m_state = MIME_PARSE_INSIDE;
+ m_state = MIME_PARSE_INSIDE;
}
break;
case MIME_PARSE_FOUND_CR:
- // Looking for a field and found a CR, which should mean terminating
- // the header. Note that we've left the CR in the input so we have
- // to skip over it.
- if (ParseRules::is_lf(*raw_input_c)) {
- // Header terminated.
- ++raw_input_c;
+ // Looking for a field and found a CR, which should mean terminating the
header.
+ if (ParseRules::is_lf(*text)) {
+ ++text;
zret = PARSE_RESULT_DONE;
} else {
- // This really should be an error (spec doesn't permit lone CR)
- // but the regression tests require it.
- mime_scanner_append(S, &RAW_CR, 1);
- S->m_state = MIME_PARSE_INSIDE;
+ // This really should be an error (spec doesn't permit lone CR) but
the regression tests
+ // require it.
+ this->append({&RAW_CR, 1});
+ m_state = MIME_PARSE_INSIDE;
}
break;
- case MIME_PARSE_INSIDE:
- lf_ptr = static_cast<const char *>(memchr(raw_input_c,
ParseRules::CHAR_LF, runway));
- if (lf_ptr) {
- raw_input_c = lf_ptr + 1;
- if (MIME_SCANNER_TYPE_LINE == raw_input_scan_type) {
- zret = PARSE_RESULT_OK;
- S->m_state = MIME_PARSE_BEFORE;
+ case MIME_PARSE_INSIDE: {
+ auto lf_off = text.find(ParseRules::CHAR_LF);
+ if (lf_off != TextView::npos) {
+ text.remove_prefix(lf_off + 1); // drop up to and including LF
+ if (LINE == scan_type) {
+ zret = PARSE_RESULT_OK;
+ m_state = MIME_PARSE_BEFORE;
} else {
- S->m_state = MIME_PARSE_AFTER;
+ m_state = MIME_PARSE_AFTER; // looking for line folding.
}
- } else {
- raw_input_c = raw_input_e; // grab all that's available.
+ } else { // no EOL, consume all text without changing state.
+ text.remove_prefix(text.size());
}
- break;
+ } break;
case MIME_PARSE_AFTER:
- // After a LF. Might be the end or a continuation.
- if (ParseRules::is_ws(*raw_input_c)) {
- char *unfold = const_cast<char *>(raw_input_c - 1);
-
- *unfold-- = ' ';
+ // After a LF, the next line might be a continuation / folded line.
That's indicated by a
+ // starting whitespace. If that's the case, back up over the preceding
CR/LF with space and
+ // pretend it's the same line.
+ if (ParseRules::is_ws(*text)) { // folded line.
+ char *unfold = const_cast<char *>(text.data() - 1);
+ *unfold-- = ' ';
if (ParseRules::is_cr(*unfold)) {
*unfold = ' ';
}
- S->m_state = MIME_PARSE_INSIDE; // back inside the field.
+ m_state = MIME_PARSE_INSIDE; // back inside the field.
} else {
- S->m_state = MIME_PARSE_BEFORE; // field terminated.
- zret = PARSE_RESULT_OK;
+ m_state = MIME_PARSE_BEFORE; // field terminated.
+ zret = PARSE_RESULT_OK;
}
break;
}
}
- ptrdiff_t data_size = raw_input_c - *raw_input_s;
+ TextView parsed_text{input.data(), text.data()};
+ bool save_parsed_text_p = !parsed_text.empty();
if (PARSE_RESULT_CONT == zret) {
- // data ran out before we got a clear final result.
- // There a number of things we need to check and possibly adjust
- // that result. It's less complex to do this cleanup than handle
- // in the parser state machine.
- if (raw_input_eof) {
+ // data ran out before we got a clear final result. There a number of
things we need to check
+ // and possibly adjust that result. It's less complex to do this cleanup
than handle all of
+ // these checks in the parser state machine.
+ if (eof_p) {
// Should never return PARSE_CONT if we've hit EOF.
- if (0 == data_size) {
+ if (parsed_text.empty()) {
// all input previously consumed. If we're between fields, that's cool.
- if (MIME_PARSE_INSIDE != S->m_state) {
- S->m_state = MIME_PARSE_BEFORE; // probably not needed...
- zret = PARSE_RESULT_DONE;
+ if (MIME_PARSE_INSIDE != m_state) {
+ m_state = MIME_PARSE_BEFORE; // probably not needed...
+ zret = PARSE_RESULT_DONE;
} else {
zret = PARSE_RESULT_ERROR; // unterminated field.
}
- } else if (MIME_PARSE_AFTER == S->m_state) {
- // Special case it seems - need to accept the final field
- // even if there's no header terminating CR LF. We check for
- // absolute end of input because otherwise this might be
- // a multiline field where we haven't seen the next leading space.
- S->m_state = MIME_PARSE_BEFORE;
- zret = PARSE_RESULT_OK;
+ } else if (MIME_PARSE_AFTER == m_state) {
+ // Special case it seems - need to accept the final field even if
there's no header
+ // terminating CR LF. This is only reasonable after absolute end of
input (EOF) because
+ // otherwise this might be a multiline field where we haven't seen the
next leading space.
+ m_state = MIME_PARSE_BEFORE;
+ zret = PARSE_RESULT_OK;
} else {
// Partial input, no field / line CR LF
zret = PARSE_RESULT_ERROR; // Unterminated field.
}
- } else if (data_size) {
- if (MIME_PARSE_INSIDE == S->m_state) {
+ } else if (!parsed_text.empty()) {
+ if (MIME_PARSE_INSIDE == m_state) {
// Inside a field but more data is expected. Save what we've got.
- mime_scanner_append(S, *raw_input_s, data_size);
- data_size = 0; // Don't append again.
- } else if (MIME_PARSE_AFTER == S->m_state) {
+ this->append(parsed_text); // Do this here to force appending.
+ save_parsed_text_p = false; // don't double append.
+ } else if (MIME_PARSE_AFTER == m_state) {
// After a field but we still have data. Need to parse it too.
- S->m_state = MIME_PARSE_BEFORE;
- zret = PARSE_RESULT_OK;
+ m_state = MIME_PARSE_BEFORE;
+ zret = PARSE_RESULT_OK;
}
}
}
- if (data_size && S->m_line_length) {
+ if (save_parsed_text_p && !m_line.empty()) {
// If we're already accumulating, continue to do so if we have data.
- mime_scanner_append(S, *raw_input_s, data_size);
+ this->append(parsed_text);
}
- // No sharing if we've accumulated data (really, force this to make compiler
shut up).
- *output_shares_raw_input = 0 == S->m_line_length;
// adjust out arguments.
if (PARSE_RESULT_CONT != zret) {
- if (0 != S->m_line_length) {
- *output_s = S->m_line;
- *output_e = *output_s + S->m_line_length;
- S->m_line_length = 0;
+ if (!m_line.empty()) {
+ output = m_line;
+ m_line.resize(0); // depending resize(0) not deallocating internal
string memory.
+ output_shares_input = false;
} else {
- *output_s = *raw_input_s;
- *output_e = raw_input_c;
+ output = parsed_text;
+ output_shares_input = true;
}
}
- // Make sure there are no '\0' in the input scanned so far
- if (zret != PARSE_RESULT_ERROR && memchr(*raw_input_s, '\0', raw_input_c -
*raw_input_s) != nullptr) {
+ // Make sure there are no null characters in the input scanned so far
+ if (zret != PARSE_RESULT_ERROR && TextView::npos != parsed_text.find('\0')) {
zret = PARSE_RESULT_ERROR;
}
- *raw_input_s = raw_input_c; // mark input consumed.
+ input.remove_prefix(parsed_text.size());
return zret;
}
@@ -2528,14 +2476,14 @@ _mime_parser_init(MIMEParser *parser)
void
mime_parser_init(MIMEParser *parser)
{
- mime_scanner_init(&parser->m_scanner);
+ parser->m_scanner.init();
_mime_parser_init(parser);
}
void
mime_parser_clear(MIMEParser *parser)
{
- mime_scanner_clear(&parser->m_scanner);
+ parser->m_scanner.clear();
_mime_parser_init(parser);
}
@@ -2545,17 +2493,6 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap,
MIMEHdrImpl *mh, const char
{
ParseResult err;
bool line_is_real;
- const char *colon;
- const char *line_c;
- const char *line_s = nullptr;
- const char *line_e = nullptr;
- const char *field_name_first;
- const char *field_name_last;
- const char *field_value_first;
- const char *field_value_last;
- const char *field_line_first;
- const char *field_line_last;
- int field_name_length, field_value_length;
MIMEScanner *scanner = &parser->m_scanner;
@@ -2564,22 +2501,23 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap,
MIMEHdrImpl *mh, const char
// get a name:value line, with all continuation lines glued into one line
//
////////////////////////////////////////////////////////////////////////////
- err = mime_scanner_get(scanner, real_s, real_e, &line_s, &line_e,
&line_is_real, eof, MIME_SCANNER_TYPE_FIELD);
+ TextView text{*real_s, real_e};
+ TextView parsed;
+ err = scanner->get(text, parsed, line_is_real, eof,
MIMEScanner::FIELD);
+ *real_s = text.data();
if (err != PARSE_RESULT_OK) {
return err;
}
- line_c = line_s;
-
//////////////////////////////////////////////////
// if got a LF or CR on its own, end the header //
//////////////////////////////////////////////////
- if ((line_e - line_c >= 2) && (line_c[0] == ParseRules::CHAR_CR) &&
(line_c[1] == ParseRules::CHAR_LF)) {
+ if ((parsed.size() >= 2) && (parsed[0] == ParseRules::CHAR_CR) &&
(parsed[1] == ParseRules::CHAR_LF)) {
return PARSE_RESULT_DONE;
}
- if ((line_e - line_c >= 1) && (line_c[0] == ParseRules::CHAR_LF)) {
+ if ((parsed.size() >= 1) && (parsed[0] == ParseRules::CHAR_LF)) {
return PARSE_RESULT_DONE;
}
@@ -2587,27 +2525,23 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap,
MIMEHdrImpl *mh, const char
// find pointers into the name:value field //
/////////////////////////////////////////////
- field_line_first = line_c;
- field_line_last = line_e - 1;
-
- // find name first
- field_name_first = line_c;
/**
* Fix for INKqa09141. The is_token function fails for '@' character.
* Header names starting with '@' signs are valid headers. Hence we
* have to add one more check to see if the first parameter is '@'
* character then, the header name is valid.
**/
- if ((!ParseRules::is_token(*field_name_first)) && (*field_name_first !=
'@')) {
+ if ((!ParseRules::is_token(*parsed)) && (*parsed != '@')) {
continue; // toss away garbage line
}
// find name last
- colon = (char *)memchr(line_c, ':', (line_e - line_c));
- if (!colon) {
+ auto field_value = parsed; // need parsed as is later on.
+ auto field_name = field_value.split_prefix_at(':');
+ if (field_name.empty()) {
continue; // toss away garbage line
}
- field_name_last = colon - 1;
+
// RFC7230 section 3.2.4:
// No whitespace is allowed between the header field-name and colon. In
// the past, differences in the handling of such whitespace have led to
@@ -2615,57 +2549,44 @@ mime_parser_parse(MIMEParser *parser, HdrHeap *heap,
MIMEHdrImpl *mh, const char
// server MUST reject any received request message that contains
// whitespace between a header field-name and colon with a response code
// of 400 (Bad Request).
- if ((field_name_last >= field_name_first) && is_ws(*field_name_last)) {
+ if (is_ws(field_name.back())) {
return PARSE_RESULT_ERROR;
}
// find value first
- field_value_first = colon + 1;
- while ((field_value_first < line_e) && is_ws(*field_value_first)) {
- ++field_value_first;
- }
-
- // find_value_last
- field_value_last = line_e - 1;
- while ((field_value_last >= field_value_first) &&
ParseRules::is_wslfcr(*field_value_last)) {
- --field_value_last;
- }
-
- field_name_length = (int)(field_name_last - field_name_first + 1);
- field_value_length = (int)(field_value_last - field_value_first + 1);
+ field_value.ltrim_if(&ParseRules::is_ws);
+ field_value.rtrim_if(&ParseRules::is_wslfcr);
// Make sure the name or value is not longer than 64K
- if (field_name_length >= UINT16_MAX || field_value_length >= UINT16_MAX) {
+ if (field_name.size() >= UINT16_MAX || field_value.size() >= UINT16_MAX) {
return PARSE_RESULT_ERROR;
}
- int total_line_length = (int)(field_line_last - field_line_first + 1);
+ // int total_line_length = (int)(field_line_last - field_line_first +
1);
//////////////////////////////////////////////////////////////////////
// if we can't leave the name & value in the real buffer, copy them //
//////////////////////////////////////////////////////////////////////
if (must_copy_strings || (!line_is_real)) {
- int length = total_line_length;
- char *dup = heap->duplicate_str(field_name_first, length);
- intptr_t delta = dup - field_name_first;
-
- field_name_first += delta;
- field_value_first += delta;
+ char *dup = heap->duplicate_str(parsed.data(), parsed.size());
+ intptr_t delta = dup - parsed.data();
+ field_name.assign(field_name.data() + delta, field_name.size());
+ field_value.assign(field_value.data() + delta, field_value.size());
}
///////////////////////
// tokenize the name //
///////////////////////
- int field_name_wks_idx = hdrtoken_tokenize(field_name_first,
field_name_length);
+ int field_name_wks_idx = hdrtoken_tokenize(field_name.data(),
field_name.size());
///////////////////////////////////////////
// build and insert the new field object //
///////////////////////////////////////////
MIMEField *field = mime_field_create(heap, mh);
- mime_field_name_value_set(heap, mh, field, field_name_wks_idx,
field_name_first, field_name_length, field_value_first,
- field_value_length, true, total_line_length,
false);
+ mime_field_name_value_set(heap, mh, field, field_name_wks_idx,
field_name.data(), field_name.size(), field_value.data(),
+ field_value.size(), true, parsed.size(), false);
mime_hdr_field_attach(mh, field, 1, nullptr);
}
}
diff --git a/proxy/hdrs/MIME.h b/proxy/hdrs/MIME.h
index 5e78834..972af09 100644
--- a/proxy/hdrs/MIME.h
+++ b/proxy/hdrs/MIME.h
@@ -25,6 +25,7 @@
#include <sys/time.h>
#include <string_view>
+#include <string>
#include "tscore/ink_assert.h"
#include "tscore/ink_apidefs.h"
@@ -33,6 +34,8 @@
#include "HdrHeap.h"
#include "HdrToken.h"
+#include "tscpp/util/TextView.h"
+
/***********************************************************************
* *
* Defines *
@@ -58,9 +61,6 @@ enum MimeParseState {
MIME_PARSE_AFTER, ///< After a field.
};
-#define MIME_SCANNER_TYPE_LINE 0
-#define MIME_SCANNER_TYPE_FIELD 1
-
/***********************************************************************
* *
* Assertions *
@@ -283,13 +283,75 @@ struct MIMEHdrImpl : public HdrHeapObjImpl {
* *
***********************************************************************/
+/** A pre-parser used to extract MIME "lines" from raw input for further
parsing.
+ *
+ * This maintains an internal line buffer which is used to keeping content
between calls
+ * when the parse has not yet completed.
+ *
+ */
struct MIMEScanner {
- char *m_line; // buffered line being built up
- int m_line_length; // size of real live data in buffer
- int m_line_size; // total allocated size of buffer
- MimeParseState m_state; ///< Parsing machine state.
+ using self_type = MIMEScanner; ///< Self reference type.
+public:
+ /// Type of input scanning.
+ enum ScanType {
+ LINE = 0, ///< Scan a single line.
+ FIELD = 1, ///< Scan with line folding enabled.
+ };
+
+ void init(); ///< Pseudo-constructor required by proxy allocation.
+ void clear(); ///< Pseudo-destructor required by proxy allocation.
+
+ /// @return The size of the internal line buffer.
+ size_t get_buffered_line_size() const;
+
+ /** Scan @a input for MIME data delimited by CR/LF end of line markers.
+ *
+ * @param input [in,out] Text to scan.
+ * @param output [out] Parsed text from @a input, if any.
+ * @param output_shares_input [out] Whether @a output is in @a input.
+ * @param eof_p [in] The source for @a input is done, no more data will ever
be available.
+ * @param scan_type [in] Whether to check for line folding.
+ * @return The result of scanning.
+ *
+ * @a input is updated to remove text that was scanned. @a output is updated
to be a view of the
+ * scanned @a input. This is separate because @a output may be a view of @a
input or a view of the
+ * internal line buffer. Which of these cases obtains is returned in @a
output_shares_input. This
+ * is @c true if @a output is a view of @a input, and @c false if @a output
is a view of the
+ * internal buffer, but is only set if the result is not @c
PARSE_RESULT_CONT (that is, it is not
+ * set until scanning has completed). If @a scan_type is @c FIELD then
folded lines are
+ * accumulated in to a single line stored in the internal buffer. Otherwise
the scanning
+ * terminates at the first CR/LF.
+ */
+ ParseResult get(ts::TextView &input, ts::TextView &output, bool
&output_shares_input, bool eof_p, ScanType scan_type);
+
+protected:
+ /** Append @a text to the internal buffer.
+ *
+ * @param text Text to append.
+ * @return @a this
+ *
+ * A copy of @a text is appended to the internal line buffer.
+ */
+ self_type &append(ts::TextView text);
+
+ static constexpr MimeParseState INITIAL_PARSE_STATE = MIME_PARSE_BEFORE;
+ std::string m_line; ///< Internally buffered line
data for field coalescence.
+ MimeParseState m_state{INITIAL_PARSE_STATE}; ///< Parsing machine state.
};
+inline size_t
+MIMEScanner::get_buffered_line_size() const
+{
+ return m_line.size();
+}
+
+inline void
+MIMEScanner::clear()
+{
+ m_line.clear();
+ m_state = INITIAL_PARSE_STATE;
+}
+
struct MIMEParser {
MIMEScanner m_scanner;
int32_t m_field;
@@ -692,12 +754,6 @@ void mime_field_name_value_set(HdrHeap *heap, MIMEHdrImpl
*mh, MIMEField *field,
void mime_field_value_append(HdrHeap *heap, MIMEHdrImpl *mh, MIMEField *field,
const char *value, int length, bool prepend_comma,
const char separator);
-void mime_scanner_init(MIMEScanner *scanner);
-void mime_scanner_clear(MIMEScanner *scanner);
-void mime_scanner_append(MIMEScanner *scanner, const char *data, int
data_size);
-ParseResult mime_scanner_get(MIMEScanner *S, const char **raw_input_s, const
char *raw_input_e, const char **output_s,
- const char **output_e, bool
*output_shares_raw_input, bool raw_input_eof, int raw_input_scan_type);
-
void mime_parser_init(MIMEParser *parser);
void mime_parser_clear(MIMEParser *parser);
ParseResult mime_parser_parse(MIMEParser *parser, HdrHeap *heap, MIMEHdrImpl
*mh, const char **real_s, const char *real_e,
diff --git a/proxy/hdrs/Makefile.am b/proxy/hdrs/Makefile.am
index 172c0d8..75c3386 100644
--- a/proxy/hdrs/Makefile.am
+++ b/proxy/hdrs/Makefile.am
@@ -82,6 +82,7 @@ test_proxy_hdrs_CPPFLAGS = $(AM_CPPFLAGS)\
test_proxy_hdrs_SOURCES = \
unit_tests/unit_test_main.cc \
+ unit_tests/test_Hdrs.cc \
unit_tests/test_HdrUtils.cc
test_proxy_hdrs_LDADD = \
diff --git a/proxy/hdrs/unit_tests/test_Hdrs.cc
b/proxy/hdrs/unit_tests/test_Hdrs.cc
new file mode 100644
index 0000000..a43503a
--- /dev/null
+++ b/proxy/hdrs/unit_tests/test_Hdrs.cc
@@ -0,0 +1,86 @@
+/** @file
+
+ Catch-based unit tests for various header logic.
+ This replaces the old regression tests in HdrTest.cc.
+
+ @section license License
+
+ Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements.
+ See the NOTICE file distributed with this work for additional information
regarding copyright
+ ownership. The ASF licenses this file to you under the Apache License,
Version 2.0 (the
+ "License"); you may not use this file except in compliance with the
License. You may obtain a
+ copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
distributed under the License
+ is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express
+ or implied. See the License for the specific language governing permissions
and limitations under
+ the License.
+ */
+
+#include <string>
+#include <cstring>
+#include <cctype>
+#include <bitset>
+#include <initializer_list>
+#include <array>
+#include <new>
+
+#include "catch.hpp"
+
+#include "HTTP.h"
+
+// replaces test_http_parser_eos_boundary_cases
+TEST_CASE("HdrTest", "[proxy][hdrtest]")
+{
+ struct Test {
+ ts::TextView msg;
+ int expected_result;
+ int expected_bytes_consumed;
+ };
+ static const std::array<Test, 20> tests = {{
+ {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
+ {"GET /index.html HTTP/1.0\r\n\r\n***BODY****", PARSE_RESULT_DONE, 28},
+ {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n***BODY****",
PARSE_RESULT_DONE, 48},
+ {"GET", PARSE_RESULT_ERROR, 3},
+ {"GET /index.html", PARSE_RESULT_ERROR, 15},
+ {"GET /index.html\r\n", PARSE_RESULT_ERROR, 17},
+ {"GET /index.html HTTP/1.0", PARSE_RESULT_ERROR, 24},
+ {"GET /index.html HTTP/1.0\r", PARSE_RESULT_ERROR, 25},
+ {"GET /index.html HTTP/1.0\n", PARSE_RESULT_DONE, 25},
+ {"GET /index.html HTTP/1.0\n\n", PARSE_RESULT_DONE, 26},
+ {"GET /index.html HTTP/1.0\r\n\r\n", PARSE_RESULT_DONE, 28},
+ {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar", PARSE_RESULT_ERROR, 44},
+ {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\n", PARSE_RESULT_DONE,
45},
+ {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE,
46},
+ {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n\r\n",
PARSE_RESULT_DONE, 48},
+ {"GET /index.html HTTP/1.0\nUser-Agent: foobar\n", PARSE_RESULT_DONE, 44},
+ {"GET /index.html HTTP/1.0\nUser-Agent: foobar\nBoo: foo\n",
PARSE_RESULT_DONE, 53},
+ {"GET /index.html HTTP/1.0\r\nUser-Agent: foobar\r\n", PARSE_RESULT_DONE,
46},
+ {"GET /index.html HTTP/1.0\r\n", PARSE_RESULT_DONE, 26},
+ {"", PARSE_RESULT_ERROR, 0},
+ }};
+
+ HTTPParser parser;
+
+ http_parser_init(&parser);
+
+ for (auto const &test : tests) {
+ HTTPHdr req_hdr;
+ HdrHeap *heap = new_HdrHeap(HDR_HEAP_DEFAULT_SIZE + 64); // extra to
prevent proxy allocation.
+
+ req_hdr.create(HTTP_TYPE_REQUEST, heap);
+
+ http_parser_clear(&parser);
+
+ auto start = test.msg.data();
+ auto ret = req_hdr.parse_req(&parser, &start,
test.msg.data_end(), true);
+ auto bytes_consumed = start - test.msg.data();
+
+ REQUIRE(bytes_consumed == test.expected_bytes_consumed);
+ REQUIRE(ret == test.expected_result);
+
+ req_hdr.destroy();
+ }
+}
diff --git a/proxy/hdrs/unit_tests/unit_test_main.cc
b/proxy/hdrs/unit_tests/unit_test_main.cc
index 6aed3a6..636c5cc 100644
--- a/proxy/hdrs/unit_tests/unit_test_main.cc
+++ b/proxy/hdrs/unit_tests/unit_test_main.cc
@@ -21,5 +21,24 @@
limitations under the License.
*/
-#define CATCH_CONFIG_MAIN
+#include "HTTP.h"
+
+#define CATCH_CONFIG_RUNNER
#include "catch.hpp"
+
+extern int cmd_disable_pfreelist;
+
+int
+main(int argc, char *argv[])
+{
+ // No thread setup, forbid use of thread local allocators.
+ cmd_disable_pfreelist = true;
+ // Get all of the HTTP WKS items populated.
+ http_init();
+
+ int result = Catch::Session().run(argc, argv);
+
+ // global clean-up...
+
+ return result;
+}