Diff
Modified: trunk/LayoutTests/ChangeLog (89868 => 89869)
--- trunk/LayoutTests/ChangeLog 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/LayoutTests/ChangeLog 2011-06-27 22:18:15 UTC (rev 89869)
@@ -1,3 +1,13 @@
+2011-06-27 Jay Civelli <[email protected]>
+
+ Reviewed by Darin Fisher.
+
+ Adding binary part support to MHTML.
+ https://bugs.webkit.org/show_bug.cgi?id=63310
+
+ * mhtml/multi_frames_binary.mht: Added.
+ * platform/chromium/mhtml/multi_frames_binary-expected.txt: Added.
+
2011-06-27 Levi Weintraub <[email protected]>
Reviewed by Eric Seidel.
Added: trunk/LayoutTests/mhtml/multi_frames_binary.mht (0 => 89869)
--- trunk/LayoutTests/mhtml/multi_frames_binary.mht (rev 0)
+++ trunk/LayoutTests/mhtml/multi_frames_binary.mht 2011-06-27 22:18:15 UTC (rev 89869)
@@ -0,0 +1,92 @@
+From: <Saved by WebKit>
+Subject: A page that contains multiple nested frames
+Date: Mon, 26 Jun 2011 09:09:01 -0800
+MIME-Version: 1.0
+Content-Type: multipart/related;
+ type="text/html";
+ boundary="----=_NextPart_000_8D20_8CDAF17B.8E5ECA45"
+
+------=_NextPart_000_8D20_8CDAF17B.8E5ECA45
+Content-Type: text/html
+Content-Transfer-Encoding: binary
+Content-Location: http://localhost/multi_frames.html
+
+<html><head><meta charset="ISO-8859-1">
+
+
+
+<title>A page that contains multiple nested frames</title>
+
+<script>
+if (window.layoutTestController) {
+ layoutTestController.dumpAsText();
+ layoutTestController.dumpChildFramesAsText();
+}
+</script>
+
+</head><body>
+This page contains several frames.<br>
+<iframe src=""
+<iframe src=""
+<iframe src=""
+And a red square:<br>
+<img src="" _onerror_="document.getElementById('error').innerHTML+='Failed to load image!'">
+<div id="error"></div>
+
+</body></html>------=_NextPart_000_8D20_8CDAF17B.8E5ECA45
+Content-Type: image/png
+Content-Transfer-Encoding: binary
+Content-Location: http://localhost/resources/red_square.png
+
+\x89PNG
+
+
+IHDR ;0\xAE\xA2 sRGB \xAE\xCE\xE9 gAMA \xB1\x8F\xFCa pHYs \xC4 \xC4\x95+ \x84IDATHK\xED\x961\x800E\xC1ر\x93\xA3\x8E\x9E\xC1x\xFF\xCDx=\xA0{\xFF.\xA5\xF3\xCF\xE9+i\xE0{Y\x8D\x82\x8E\x89\xA9\x97\xD2\x86 &\xACM0T\xE4H\xD5^&aO\xAA\x86\x8A\xBC\xA9\xDA\xCB$\xECI\xD5P\x91W\xA0?\xD5|Ms\xD8\xEAC\xA2d\xA64\xD4ڼG\xEE\㾽`#9\xCE\xC1_\x83\xD8\xDFp卽>&ؓ\xAA\xA1"\xAF\xC0XB\xE9\xDC\xFBwW IEND\xAEB`\x82------=_NextPart_000_8D20_8CDAF17B.8E5ECA45
+Content-Type: text/html
+Content-Transfer-Encoding: binary
+Content-Location: http://localhost/resources/frame_0.html
+
+<html><head><meta charset="ISO-8859-1"></head><body>
+The first frame!
+
+
+
+</body></html>------=_NextPart_000_8D20_8CDAF17B.8E5ECA45
+Content-Type: text/html
+Content-Transfer-Encoding: binary
+Content-Location: http://localhost/resources/frame_1.html
+
+<html><head><meta charset="ISO-8859-1"></head><body>
+The second frame!
+
+
+
+</body></html>------=_NextPart_000_8D20_8CDAF17B.8E5ECA45
+Content-Type: text/html
+Content-Transfer-Encoding: binary
+Content-Location: http://localhost/resources/frame_2.html
+
+<html><head><meta charset="ISO-8859-1"></head><body>
+The third frame!<br>
+This one contains yet another frame. What a twist!<br>
+<iframe src=""
+
+
+
+
+
+
+</body></html>------=_NextPart_000_8D20_8CDAF17B.8E5ECA45
+Content-Type: text/html
+Content-Transfer-Encoding: binary
+Content-Location: http://localhost/resources/frame_4.html
+
+<html><head><meta charset="ISO-8859-1"></head><body>
+This is frame 4!<br>
+<img src="" _onerror_="document.getElementById('error').innerHTML+='Failed to load image!'">
+<div id="error"></div>
+
+
+
+
+</body></html>------=_NextPart_000_8D20_8CDAF17B.8E5ECA45--
Added: trunk/LayoutTests/platform/chromium/mhtml/multi_frames_binary-expected.txt (0 => 89869)
--- trunk/LayoutTests/platform/chromium/mhtml/multi_frames_binary-expected.txt (rev 0)
+++ trunk/LayoutTests/platform/chromium/mhtml/multi_frames_binary-expected.txt 2011-06-27 22:18:15 UTC (rev 89869)
@@ -0,0 +1,31 @@
+This page contains several frames.
+
+
+
+And a red square:
+
+
+
+--------
+Frame: '<!--framePath //<!--frame0-->-->'
+--------
+The first frame!
+
+--------
+Frame: '<!--framePath //<!--frame1-->-->'
+--------
+The second frame!
+
+--------
+Frame: '<!--framePath //<!--frame2-->-->'
+--------
+The third frame!
+This one contains yet another frame. What a twist!
+
+
+--------
+Frame: '<!--framePath //<!--frame2-->/<!--frame0-->-->'
+--------
+This is frame 4!
+
+
Modified: trunk/Source/WebCore/ChangeLog (89868 => 89869)
--- trunk/Source/WebCore/ChangeLog 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/ChangeLog 2011-06-27 22:18:15 UTC (rev 89869)
@@ -1,3 +1,23 @@
+2011-06-27 Jay Civelli <[email protected]>
+
+ Reviewed by Darin Fisher.
+
+ Adding binary part support to MHTML.
+ https://bugs.webkit.org/show_bug.cgi?id=63310
+
+ * loader/archive/mhtml/MHTMLArchive.cpp:
+ (WebCore::MHTMLArchive::generateMHTMLData):
+ (WebCore::MHTMLArchive::generateMHTMLDataUsingBinaryEncoding):
+ * loader/archive/mhtml/MHTMLArchive.h:
+ * loader/archive/mhtml/MHTMLParser.cpp:
+ (WebCore::MHTMLParser::parseNextPart):
+ * platform/SharedBuffer.cpp:
+ (WebCore::SharedBuffer::append):
+ * platform/SharedBuffer.h:
+ * platform/network/MIMEHeader.cpp:
+ (WebCore::MIMEHeader::parseContentTransferEncoding):
+ * platform/network/MIMEHeader.h:
+
2011-06-27 Levi Weintraub <[email protected]>
Reviewed by Eric Seidel.
Modified: trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp (89868 => 89869)
--- trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.cpp 2011-06-27 22:18:15 UTC (rev 89869)
@@ -57,6 +57,7 @@
const char* const quotedPrintable = "quoted-printable";
const char* const base64 = "base64";
+const char* const binary = "binary";
static String generateRandomBoundary()
{
@@ -124,6 +125,16 @@
PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page)
{
+ return generateMHTMLData(page, false);
+}
+
+PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(Page* page)
+{
+ return generateMHTMLData(page, true);
+}
+
+PassRefPtr<SharedBuffer> MHTMLArchive::generateMHTMLData(Page* page, bool useBinaryEncoding)
+{
Vector<PageSerializer::Resource> resources;
PageSerializer pageSerializer(&resources);
pageSerializer.serialize(page);
@@ -167,7 +178,7 @@
stringBuilder.append("Content-Type: ");
stringBuilder.append(resource.mimeType);
- const char* contentEncoding = MIMETypeRegistry::isSupportedJavaScriptMIMEType(resource.mimeType) || MIMETypeRegistry::isSupportedNonImageMIMEType(resource.mimeType) ? quotedPrintable : base64;
+ const char* contentEncoding = useBinaryEncoding ? binary : base64;
stringBuilder.append("\r\nContent-Transfer-Encoding: ");
stringBuilder.append(contentEncoding);
stringBuilder.append("\r\nContent-Location: ");
@@ -177,27 +188,36 @@
asciiString = stringBuilder.toString().utf8();
mhtmlData->append(asciiString.data(), asciiString.length());
- // FIXME: ideally we would encode the content as a stream without having to fetch it all.
- const char* data = ""
- size_t dataLength = resource.data->size();
- Vector<char> encodedData;
- if (!strcmp(contentEncoding, quotedPrintable)) {
- quotedPrintableEncode(data, dataLength, encodedData);
- mhtmlData->append(encodedData.data(), encodedData.size());
- mhtmlData->append("\r\n", 2);
+ if (!strcmp(contentEncoding, binary)) {
+ const char* data;
+ size_t position = 0;
+ while (size_t length = resource.data->getSomeData(data, position)) {
+ mhtmlData->append(data, length);
+ position += length;
+ }
} else {
- ASSERT(!strcmp(contentEncoding, base64));
- // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
- base64Encode(data, dataLength, encodedData);
- const size_t maximumLineLength = 76;
- size_t index = 0;
- size_t encodedDataLength = encodedData.size();
- do {
- size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
- mhtmlData->append(encodedData.data() + index, lineLength);
+ // FIXME: ideally we would encode the content as a stream without having to fetch it all.
+ const char* data = ""
+ size_t dataLength = resource.data->size();
+ Vector<char> encodedData;
+ if (!strcmp(contentEncoding, quotedPrintable)) {
+ quotedPrintableEncode(data, dataLength, encodedData);
+ mhtmlData->append(encodedData.data(), encodedData.size());
mhtmlData->append("\r\n", 2);
- index += maximumLineLength;
- } while (index < encodedDataLength);
+ } else {
+ ASSERT(!strcmp(contentEncoding, base64));
+ // We are not specifying insertLFs = true below as it would cut the lines with LFs and MHTML requires CRLFs.
+ base64Encode(data, dataLength, encodedData);
+ const size_t maximumLineLength = 76;
+ size_t index = 0;
+ size_t encodedDataLength = encodedData.size();
+ do {
+ size_t lineLength = std::min(encodedDataLength - index, maximumLineLength);
+ mhtmlData->append(encodedData.data() + index, lineLength);
+ mhtmlData->append("\r\n", 2);
+ index += maximumLineLength;
+ } while (index < encodedDataLength);
+ }
}
}
Modified: trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h (89868 => 89869)
--- trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/loader/archive/mhtml/MHTMLArchive.h 2011-06-27 22:18:15 UTC (rev 89869)
@@ -49,8 +49,12 @@
static PassRefPtr<MHTMLArchive> create(const KURL&, SharedBuffer*);
static PassRefPtr<SharedBuffer> generateMHTMLData(Page*);
+ // Binary encoding results in smaller MHTML files but they might not work in other browsers.
+ static PassRefPtr<SharedBuffer> generateMHTMLDataUsingBinaryEncoding(Page*);
private:
+ static PassRefPtr<SharedBuffer> generateMHTMLData(Page*, bool useBinaryEncoding);
+
friend class MHTMLParser;
MHTMLArchive();
};
Modified: trunk/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp (89868 => 89869)
--- trunk/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/loader/archive/mhtml/MHTMLParser.cpp 2011-06-27 22:18:15 UTC (rev 89869)
@@ -146,19 +146,49 @@
RefPtr<SharedBuffer> content = SharedBuffer::create();
const bool checkBoundary = !endOfPartBoundary.isEmpty();
bool endOfPartReached = false;
- String line;
- while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
- if (checkBoundary && (line == endOfPartBoundary || line == endOfDocumentBoundary)) {
+ if (mimeHeader.contentTransferEncoding() == MIMEHeader::Binary) {
+ if (!checkBoundary) {
+ LOG_ERROR("Binary contents requires end of part");
+ return 0;
+ }
+ m_lineReader.setSeparator(endOfPartBoundary.utf8().data());
+ Vector<char> part;
+ if (!m_lineReader.nextChunk(part)) {
+ LOG_ERROR("Binary contents requires end of part");
+ return 0;
+ }
+ content->append(part);
+ m_lineReader.setSeparator("\r\n");
+ Vector<char> nextChars;
+ if (m_lineReader.peek(nextChars, 2) != 2) {
+ LOG_ERROR("Invalid seperator.");
+ return 0;
+ }
+ endOfPartReached = true;
+ ASSERT(nextChars.size() == 2);
+ endOfArchiveReached = (nextChars[0] == '-' && nextChars[1] == '-');
+ if (!endOfArchiveReached) {
+ String line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback();
+ if (!line.isEmpty()) {
+ LOG_ERROR("No CRLF at end of binary section.");
+ return 0;
+ }
+ }
+ } else {
+ String line;
+ while (!(line = m_lineReader.nextChunkAsUTF8StringWithLatin1Fallback()).isNull()) {
endOfArchiveReached = (line == endOfDocumentBoundary);
- endOfPartReached = true;
- break;
+ if (checkBoundary && (line == endOfPartBoundary || endOfArchiveReached)) {
+ endOfPartReached = true;
+ break;
+ }
+ // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
+ content->append(line.utf8().data(), line.length());
+ if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
+ // The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
+ content->append("\r\n", 2);
+ }
}
- // Note that we use line.utf8() and not line.ascii() as ascii turns special characters (such as tab, line-feed...) into '?'.
- content->append(line.utf8().data(), line.length());
- if (mimeHeader.contentTransferEncoding() == MIMEHeader::QuotedPrintable) {
- // The line reader removes the \r\n, but we need them for the content in this case as the QuotedPrintable decoder expects CR-LF terminated lines.
- content->append("\r\n", 2);
- }
}
if (!endOfPartReached && checkBoundary) {
LOG_ERROR("No bounday found for MHTML part.");
@@ -177,6 +207,7 @@
quotedPrintableDecode(content->data(), content->size(), data);
break;
case MIMEHeader::SevenBit:
+ case MIMEHeader::Binary:
data.append(content->data(), content->size());
break;
default:
Modified: trunk/Source/WebCore/platform/SharedBuffer.cpp (89868 => 89869)
--- trunk/Source/WebCore/platform/SharedBuffer.cpp 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/platform/SharedBuffer.cpp 2011-06-27 22:18:15 UTC (rev 89869)
@@ -165,6 +165,11 @@
}
}
+void SharedBuffer::append(const Vector<char>& data)
+{
+ append(data.data(), data.size());
+}
+
void SharedBuffer::clear()
{
clearPlatformData();
Modified: trunk/Source/WebCore/platform/SharedBuffer.h (89868 => 89869)
--- trunk/Source/WebCore/platform/SharedBuffer.h 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/platform/SharedBuffer.h 2011-06-27 22:18:15 UTC (rev 89869)
@@ -86,6 +86,8 @@
void append(SharedBuffer*);
void append(const char*, unsigned);
+ void append(const Vector<char>&);
+
void clear();
const char* platformData() const;
unsigned platformDataSize() const;
Modified: trunk/Source/WebCore/platform/network/MIMEHeader.cpp (89868 => 89869)
--- trunk/Source/WebCore/platform/network/MIMEHeader.cpp 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/platform/network/MIMEHeader.cpp 2011-06-27 22:18:15 UTC (rev 89869)
@@ -123,6 +123,8 @@
return QuotedPrintable;
if (encoding == "7bit")
return SevenBit;
+ if (encoding == "binary")
+ return Binary;
LOG_ERROR("Unknown encoding '%s' found in MIME header.", text.ascii().data());
return Unknown;
}
Modified: trunk/Source/WebCore/platform/network/MIMEHeader.h (89868 => 89869)
--- trunk/Source/WebCore/platform/network/MIMEHeader.h 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebCore/platform/network/MIMEHeader.h 2011-06-27 22:18:15 UTC (rev 89869)
@@ -46,6 +46,7 @@
QuotedPrintable,
Base64,
SevenBit,
+ Binary,
Unknown
};
Modified: trunk/Source/WebKit/chromium/ChangeLog (89868 => 89869)
--- trunk/Source/WebKit/chromium/ChangeLog 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebKit/chromium/ChangeLog 2011-06-27 22:18:15 UTC (rev 89869)
@@ -1,3 +1,15 @@
+2011-06-27 Jay Civelli <[email protected]>
+
+ Reviewed by Darin Fisher.
+
+ Adding binary part support to MHTML.
+ https://bugs.webkit.org/show_bug.cgi?id=63310
+
+ * public/WebPageSerializer.h:
+ * src/WebPageSerializer.cpp:
+ (WebKit::WebPageSerializer::serializeToMHTML):
+ (WebKit::WebPageSerializer::serializeToMHTMLUsingBinaryEncoding):
+
2011-05-17 Nat Duca <[email protected]>
Reviewed by James Robinson.
Modified: trunk/Source/WebKit/chromium/public/WebPageSerializer.h (89868 => 89869)
--- trunk/Source/WebKit/chromium/public/WebPageSerializer.h 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebKit/chromium/public/WebPageSerializer.h 2011-06-27 22:18:15 UTC (rev 89869)
@@ -61,6 +61,10 @@
// Serializes the WebView contents to a MHTML representation.
WEBKIT_API static WebCString serializeToMHTML(WebView*);
+ // Similar to serializeToMHTML but uses binary encoding for the MHTML parts.
+ // This results in a smaller MHTML file but it might not be supported by other browsers.
+ WEBKIT_API static WebCString serializeToMHTMLUsingBinaryEncoding(WebView*);
+
// IMPORTANT:
// The API below is an older implementation of a pageserialization that
// will be removed soon.
Modified: trunk/Source/WebKit/chromium/src/WebPageSerializer.cpp (89868 => 89869)
--- trunk/Source/WebKit/chromium/src/WebPageSerializer.cpp 2011-06-27 22:17:45 UTC (rev 89868)
+++ trunk/Source/WebKit/chromium/src/WebPageSerializer.cpp 2011-06-27 22:18:15 UTC (rev 89869)
@@ -208,6 +208,13 @@
return WebCString(mhtml->data(), mhtml->size());
}
+WebCString WebPageSerializer::serializeToMHTMLUsingBinaryEncoding(WebView* view)
+{
+ RefPtr<SharedBuffer> mhtml = MHTMLArchive::generateMHTMLDataUsingBinaryEncoding(static_cast<WebViewImpl*>(view)->page());
+ // FIXME: we are copying all the data here. Idealy we would have a WebSharedData().
+ return WebCString(mhtml->data(), mhtml->size());
+}
+
bool WebPageSerializer::serialize(WebFrame* frame,
bool recursive,
WebPageSerializerClient* client,