Compliance: Ignore unused chunk-extensions to correctly handle large ones.
Chunk parser did not advance until it got a complete chunk-extension.
HttpStateData::maybeReadVirginBody() does not grow the buffer if there
is no space available for the [chunked] body so the transaction with a
large chunk-extension would stall. The default HttpStateData input
buffer size is just 2KB so it does not take a "very large" extension to
stall the transaction.
Somewhat ironically, we are not even interested in the HTTP
chunk-extension itself. After the change, Squid skips the
chunk-extension data as soon as it gets it (except for the last-chunk,
see below). Incrementally ignoring data requires handling quoted strings
correctly, to avoid mis-detecting a quoted CRLF. Thus, we now preserve
the quoted string parsing state in ChunkedCodingParser.
Last-chunk chunk-extension is useful for ICAP. We parse it instead of
ignoring. This parsing is done as before and may still lead to
connection hanging, but a proper fix is outside this patch scope.
Similarly, other stalling reasons are outside this patch scope.
Co-Advisor test case: test_case/rfc2616/chunked-1p0-longValExt-16385-toClt
Compliance: Ignore unused chunk-extensions to correctly handle large ones.
Chunk parser did not advance until it got a complete chunk-extension.
HttpStateData::maybeReadVirginBody() does not grow the buffer if there is no
space available for the [chunked] body so the transaction with a large
chunk-extension would stall. The default HttpStateData input buffer size is
just 2KB so it does not take a "very large" extension to stall the
transaction.
Somewhat ironically, we are not even interested in the HTTP chunk-extension
itself. After the change, Squid skips the chunk-extension data as soon as it
gets it (except for the last-chunk, see below). Incrementally ignoring data
requires handling quoted strings correctly, to avoid mis-detecting a quoted
CRLF. Thus, we now preserve the quoted string parsing state in
ChunkedCodingParser.
Last-chunk chunk-extension is useful for ICAP. We parse it instead of
ignoring. This parsing is done as before and may still lead to connection
hanging, but a proper fix is outside this patch scope. Similarly, other
stalling reasons are outside this patch scope.
Co-Advisor test case: test_case/rfc2616/chunked-1p0-longValExt-16385-toClt
=== modified file 'src/ChunkedCodingParser.cc'
--- src/ChunkedCodingParser.cc 2010-06-15 07:21:57 +0000
+++ src/ChunkedCodingParser.cc 2010-08-29 23:55:59 +0000
@@ -1,150 +1,161 @@
#include "squid.h"
#include "base/TextException.h"
#include "Parsing.h"
#include "ChunkedCodingParser.h"
#include "MemBuf.h"
-ChunkedCodingParser::Step ChunkedCodingParser::psChunkBeg = &ChunkedCodingParser::parseChunkBeg;
+ChunkedCodingParser::Step ChunkedCodingParser::psChunkSize = &ChunkedCodingParser::parseChunkSize;
+ChunkedCodingParser::Step ChunkedCodingParser::psUnusedChunkExtension = &ChunkedCodingParser::parseUnusedChunkExtension;
+ChunkedCodingParser::Step ChunkedCodingParser::psLastChunkExtension = &ChunkedCodingParser::parseLastChunkExtension;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkBody = &ChunkedCodingParser::parseChunkBody;
ChunkedCodingParser::Step ChunkedCodingParser::psChunkEnd = &ChunkedCodingParser::parseChunkEnd;
ChunkedCodingParser::Step ChunkedCodingParser::psTrailer = &ChunkedCodingParser::parseTrailer;
ChunkedCodingParser::Step ChunkedCodingParser::psMessageEnd = &ChunkedCodingParser::parseMessageEnd;
ChunkedCodingParser::ChunkedCodingParser()
{
reset();
}
void ChunkedCodingParser::reset()
{
- theStep = psChunkBeg;
+ theStep = psChunkSize;
theChunkSize = theLeftBodySize = 0;
doNeedMoreData = false;
theIn = theOut = NULL;
useOriginBody = -1;
+ inQuoted = inSlashed = false;
}
bool ChunkedCodingParser::parse(MemBuf *rawData, MemBuf *parsedContent)
{
Must(rawData && parsedContent);
theIn = rawData;
theOut = parsedContent;
// we must reset this all the time so that mayContinue() lets us
// output more content if we stopped due to needsMoreSpace() before
doNeedMoreData = !theIn->hasContent();
while (mayContinue()) {
(this->*theStep)();
}
return theStep == psMessageEnd;
}
bool ChunkedCodingParser::needsMoreData() const
{
return doNeedMoreData;
}
bool ChunkedCodingParser::needsMoreSpace() const
{
assert(theOut);
return theStep == psChunkBody && !theOut->hasPotentialSpace();
}
bool ChunkedCodingParser::mayContinue() const
{
return !needsMoreData() && !needsMoreSpace() && theStep != psMessageEnd;
}
-void ChunkedCodingParser::parseChunkBeg()
+void ChunkedCodingParser::parseChunkSize()
{
Must(theChunkSize <= 0); // Should(), really
- size_t crlfBeg = 0;
- size_t crlfEnd = 0;
-
- if (findCrlf(crlfBeg, crlfEnd)) {
- debugs(94,7, "found chunk-size end: " << crlfBeg << "-" << crlfEnd);
- int64_t size = -1;
- const char *p = 0;
-
- if (StringToInt64(theIn->content(), size, &p, 16)) {
- if (size < 0) {
- throw TexcHere("negative chunk size");
- return;
- }
+ const char *p = theIn->content();
+ while (p < theIn->space() && xisxdigit(*p)) ++p;
+ if (p >= theIn->space()) {
+ doNeedMoreData = true;
+ return;
+ }
- // to allow chunk extensions in any chunk, remove this size check
- if (size == 0) // is this the last-chunk?
- parseChunkExtension(p, theIn->content() + crlfBeg);
-
- theIn->consume(crlfEnd);
- theChunkSize = theLeftBodySize = size;
- debugs(94,7, "found chunk: " << theChunkSize);
- theStep = theChunkSize == 0 ? psTrailer : psChunkBody;
- return;
+ int64_t size = -1;
+ if (StringToInt64(theIn->content(), size, &p, 16)) {
+ if (size < 0)
+ throw TexcHere("negative chunk size");
+
+ theChunkSize = theLeftBodySize = size;
+ debugs(94,7, "found chunk: " << theChunkSize);
+ // parse chunk extensions only in the last-chunk
+ if (theChunkSize)
+ theStep = psUnusedChunkExtension;
+ else {
+ theIn->consume(p - theIn->content());
+ theStep = psLastChunkExtension;
}
-
+ } else
throw TexcHere("corrupted chunk size");
- }
+}
- doNeedMoreData = true;
+void ChunkedCodingParser::parseUnusedChunkExtension()
+{
+ size_t crlfBeg = 0;
+ size_t crlfEnd = 0;
+ if (findCrlf(crlfBeg, crlfEnd, inQuoted, inSlashed)) {
+ inQuoted = inSlashed = false;
+ theIn->consume(crlfEnd);
+ theStep = theChunkSize ? psChunkBody : psTrailer;
+ } else {
+ theIn->consume(theIn->contentSize());
+ doNeedMoreData = true;
+ }
}
void ChunkedCodingParser::parseChunkBody()
{
Must(theLeftBodySize > 0); // Should, really
const size_t availSize = min(theLeftBodySize, (uint64_t)theIn->contentSize());
const size_t safeSize = min(availSize, (size_t)theOut->potentialSpaceSize());
doNeedMoreData = availSize < theLeftBodySize;
// and we may also need more space
theOut->append(theIn->content(), safeSize);
theIn->consume(safeSize);
theLeftBodySize -= safeSize;
if (theLeftBodySize == 0)
theStep = psChunkEnd;
else
Must(needsMoreData() || needsMoreSpace());
}
void ChunkedCodingParser::parseChunkEnd()
{
Must(theLeftBodySize == 0); // Should(), really
size_t crlfBeg = 0;
size_t crlfEnd = 0;
if (findCrlf(crlfBeg, crlfEnd)) {
if (crlfBeg != 0) {
throw TexcHere("found data bewteen chunk end and CRLF");
return;
}
theIn->consume(crlfEnd);
theChunkSize = 0; // done with the current chunk
- theStep = psChunkBeg;
+ theStep = psChunkSize;
return;
}
doNeedMoreData = true;
}
void ChunkedCodingParser::parseTrailer()
{
Must(theChunkSize == 0); // Should(), really
while (mayContinue())
parseTrailerHeader();
}
void ChunkedCodingParser::parseTrailerHeader()
{
size_t crlfBeg = 0;
size_t crlfEnd = 0;
if (findCrlf(crlfBeg, crlfEnd)) {
@@ -152,55 +163,62 @@ void ChunkedCodingParser::parseTrailerHe
; //theTrailer.append(theIn->content(), crlfEnd);
theIn->consume(crlfEnd);
if (crlfBeg == 0)
theStep = psMessageEnd;
return;
}
doNeedMoreData = true;
}
void ChunkedCodingParser::parseMessageEnd()
{
// termination step, should not be called
Must(false); // Should(), really
}
-// finds next CRLF
+/// Finds next CRLF. Does not store parsing state.
bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd)
{
+ bool quoted = false;
+ bool slashed = false;
+ return findCrlf(crlfBeg, crlfEnd, quoted, slashed);
+}
+
+/// Finds next CRLF. Parsing state stored in quoted and slashed
+/// parameters. Incremental: can resume when more data is available.
+bool ChunkedCodingParser::findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed)
+{
// XXX: This code was copied, with permission, from another software.
// There is a similar and probably better code inside httpHeaderParse
// but it seems difficult to isolate due to parsing-unrelated bloat.
// Such isolation should probably be done before this class is used
// for handling of traffic "more external" than ICAP.
const char *buf = theIn->content();
size_t size = theIn->contentSize();
ssize_t crOff = -1;
- bool quoted = false;
- bool slashed = false;
for (size_t i = 0; i < size; ++i) {
if (slashed) {
slashed = false;
continue;
}
const char c = buf[i];
// handle quoted strings
if (quoted) {
if (c == '\\')
slashed = true;
else if (c == '"')
quoted = false;
continue;
} else if (c == '"') {
quoted = true;
crOff = -1;
@@ -217,51 +235,65 @@ bool ChunkedCodingParser::findCrlf(size_
if (c == '\r')
crOff = i;
} else { // skipping CRs, looking for the first LF
if (c == '\n') {
crlfBeg = crOff;
crlfEnd = ++i;
return true;
}
if (c != '\r')
crOff = -1;
}
}
return false;
}
// chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
-void ChunkedCodingParser::parseChunkExtension(const char *startExt, const char *endExt)
+void ChunkedCodingParser::parseLastChunkExtension()
{
+ size_t crlfBeg = 0;
+ size_t crlfEnd = 0;
+
+ if (!findCrlf(crlfBeg, crlfEnd)) {
+ doNeedMoreData = true;
+ return;
+ }
+
+ const char *const startExt = theIn->content();
+ const char *const endExt = theIn->content() + crlfBeg;
+
// chunk-extension starts at startExt and ends with LF at endEx
for (const char *p = startExt; p < endExt;) {
while (*p == ' ' || *p == '\t') ++p; // skip spaces before ';'
if (*p++ != ';') // each ext name=value pair is preceded with ';'
- return;
+ break;
while (*p == ' ' || *p == '\t') ++p; // skip spaces before name
if (p >= endExt)
- return; // malformed extension: ';' without ext name=value pair
+ break; // malformed extension: ';' without ext name=value pair
const int extSize = endExt - p;
// TODO: we need debugData() stream manipulator to dump data
debugs(94,7, "Found chunk extension; size=" << extSize);
// TODO: support implied *LWS around '='
if (extSize > 18 && strncmp(p, "use-original-body=", 18) == 0) {
(void)StringToInt64(p+18, useOriginBody, &p, 10);
debugs(94, 3, HERE << "use-original-body=" << useOriginBody);
- return; // remove to support more than just use-original-body
+ break; // remove to support more than just use-original-body
} else {
debugs(94, 5, HERE << "skipping unknown chunk extension");
// TODO: support quoted-string chunk-ext-val
while (p < endExt && *p != ';') ++p; // skip until the next ';'
}
}
+
+ theIn->consume(crlfEnd);
+ theStep = theChunkSize ? psChunkBody : psTrailer;
}
=== modified file 'src/ChunkedCodingParser.h'
--- src/ChunkedCodingParser.h 2010-06-14 20:01:59 +0000
+++ src/ChunkedCodingParser.h 2010-08-29 23:56:17 +0000
@@ -56,50 +56,57 @@ public:
ChunkedCodingParser();
void reset();
/**
\retval true complete success
\retval false needs more data
\throws ?? error.
*/
bool parse(MemBuf *rawData, MemBuf *parsedContent);
bool needsMoreData() const;
bool needsMoreSpace() const;
private:
typedef void (ChunkedCodingParser::*Step)();
private:
bool mayContinue() const;
+ void parseChunkSize();
+ void parseUnusedChunkExtension();
+ void parseLastChunkExtension();
void parseChunkBeg();
void parseChunkBody();
void parseChunkEnd();
void parseTrailer();
void parseTrailerHeader();
void parseMessageEnd();
- void parseChunkExtension(const char *, const char * );
bool findCrlf(size_t &crlfBeg, size_t &crlfEnd);
+ bool findCrlf(size_t &crlfBeg, size_t &crlfEnd, bool "ed, bool &slashed);
private:
- static Step psChunkBeg;
+ static Step psChunkSize;
+ static Step psUnusedChunkExtension;
+ static Step psLastChunkExtension;
static Step psChunkBody;
static Step psChunkEnd;
static Step psTrailer;
static Step psMessageEnd;
MemBuf *theIn;
MemBuf *theOut;
Step theStep;
uint64_t theChunkSize;
uint64_t theLeftBodySize;
bool doNeedMoreData;
+ bool inQuoted; ///< stores parsing state for incremental findCrlf
+ bool inSlashed; ///< stores parsing state for incremental findCrlf
public:
int64_t useOriginBody;
};
#endif /* SQUID_CHUNKEDCODINGPARSER_H */