Title: [144498] trunk/Source
Revision
144498
Author
[email protected]
Date
2013-03-01 13:53:26 -0800 (Fri, 01 Mar 2013)

Log Message

Threaded HTML Parser has an extra copy of every byte from the network
https://bugs.webkit.org/show_bug.cgi?id=111135

Reviewed by Adam Barth.

Source/WebCore:

Every LayoutTest executes this code in threaded parsing mode.

* dom/DecodedDataDocumentParser.cpp:
(WebCore::DecodedDataDocumentParser::appendBytes):
 - Pass ownership of the decoded string to the parser.
(WebCore::DecodedDataDocumentParser::flush):
 - Same.
* dom/DecodedDataDocumentParser.h:
(DecodedDataDocumentParser):
* dom/Document.cpp:
(WebCore::Document::setContent):
* dom/DocumentParser.h:
(DocumentParser):
* dom/RawDataDocumentParser.h:
(WebCore::RawDataDocumentParser::append):
* html/FTPDirectoryDocument.cpp:
(FTPDirectoryDocumentParser):
(WebCore::FTPDirectoryDocumentParser::append):
* html/parser/HTMLDocumentParser.cpp:
(WebCore::HTMLDocumentParser::append):
* html/parser/HTMLDocumentParser.h:
(HTMLDocumentParser):
* html/parser/HTMLViewSourceParser.cpp:
(WebCore::HTMLViewSourceParser::append):
* html/parser/HTMLViewSourceParser.h:
(HTMLViewSourceParser):
* html/parser/TextDocumentParser.cpp:
(WebCore::TextDocumentParser::append):
* html/parser/TextDocumentParser.h:
(TextDocumentParser):
* loader/DocumentWriter.cpp:
(WebCore::DocumentWriter::replaceDocument):
* xml/parser/XMLDocumentParser.cpp:
(WebCore::XMLDocumentParser::append):
* xml/parser/XMLDocumentParser.h:
(XMLDocumentParser):
* xml/parser/XMLDocumentParserLibxml2.cpp:
(WebCore::XMLDocumentParser::resumeParsing):

Source/WTF:

The threaded html parser needs to accept ownership
of a string buffer.  The easiest way to do this seemed
to be to use a PassRefPtr<StringImpl>, but there was no way
to generated one from a String (easily), so I added one.

* wtf/text/WTFString.h:
(WTF::String::releaseImpl):

Modified Paths

Diff

Modified: trunk/Source/WTF/ChangeLog (144497 => 144498)


--- trunk/Source/WTF/ChangeLog	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WTF/ChangeLog	2013-03-01 21:53:26 UTC (rev 144498)
@@ -1,3 +1,18 @@
+2013-03-01  Eric Seidel  <[email protected]>
+
+        Threaded HTML Parser has an extra copy of every byte from the network
+        https://bugs.webkit.org/show_bug.cgi?id=111135
+
+        Reviewed by Adam Barth.
+
+        The threaded html parser needs to accept ownership
+        of a string buffer.  The easiest way to do this seemed
+        to be to use a PassRefPtr<StringImpl>, but there was no way
+        to generated one from a String (easily), so I added one.
+
+        * wtf/text/WTFString.h:
+        (WTF::String::releaseImpl):
+
 2013-02-28  Oliver Hunt  <[email protected]>
 
         Crash in JSC::MarkedBlock::FreeList JSC::MarkedBlock::sweepHelper

Modified: trunk/Source/WTF/wtf/text/WTFString.h (144497 => 144498)


--- trunk/Source/WTF/wtf/text/WTFString.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WTF/wtf/text/WTFString.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -167,6 +167,7 @@
     bool isEmpty() const { return !m_impl || !m_impl->length(); }
 
     StringImpl* impl() const { return m_impl.get(); }
+    PassRefPtr<StringImpl> releaseImpl() { return m_impl.release(); }
 
     unsigned length() const
     {

Modified: trunk/Source/WebCore/ChangeLog (144497 => 144498)


--- trunk/Source/WebCore/ChangeLog	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/ChangeLog	2013-03-01 21:53:26 UTC (rev 144498)
@@ -1,3 +1,49 @@
+2013-03-01  Eric Seidel  <[email protected]>
+
+        Threaded HTML Parser has an extra copy of every byte from the network
+        https://bugs.webkit.org/show_bug.cgi?id=111135
+
+        Reviewed by Adam Barth.
+
+        Every LayoutTest executes this code in threaded parsing mode.
+
+        * dom/DecodedDataDocumentParser.cpp:
+        (WebCore::DecodedDataDocumentParser::appendBytes):
+         - Pass ownership of the decoded string to the parser.
+        (WebCore::DecodedDataDocumentParser::flush):
+         - Same.
+        * dom/DecodedDataDocumentParser.h:
+        (DecodedDataDocumentParser):
+        * dom/Document.cpp:
+        (WebCore::Document::setContent):
+        * dom/DocumentParser.h:
+        (DocumentParser):
+        * dom/RawDataDocumentParser.h:
+        (WebCore::RawDataDocumentParser::append):
+        * html/FTPDirectoryDocument.cpp:
+        (FTPDirectoryDocumentParser):
+        (WebCore::FTPDirectoryDocumentParser::append):
+        * html/parser/HTMLDocumentParser.cpp:
+        (WebCore::HTMLDocumentParser::append):
+        * html/parser/HTMLDocumentParser.h:
+        (HTMLDocumentParser):
+        * html/parser/HTMLViewSourceParser.cpp:
+        (WebCore::HTMLViewSourceParser::append):
+        * html/parser/HTMLViewSourceParser.h:
+        (HTMLViewSourceParser):
+        * html/parser/TextDocumentParser.cpp:
+        (WebCore::TextDocumentParser::append):
+        * html/parser/TextDocumentParser.h:
+        (TextDocumentParser):
+        * loader/DocumentWriter.cpp:
+        (WebCore::DocumentWriter::replaceDocument):
+        * xml/parser/XMLDocumentParser.cpp:
+        (WebCore::XMLDocumentParser::append):
+        * xml/parser/XMLDocumentParser.h:
+        (XMLDocumentParser):
+        * xml/parser/XMLDocumentParserLibxml2.cpp:
+        (WebCore::XMLDocumentParser::resumeParsing):
+
 2013-03-01  David Hyatt  <[email protected]>
 
         [New Multicolumn] Change inRenderFlowThread to follow containing block chain

Modified: trunk/Source/WebCore/dom/DecodedDataDocumentParser.cpp (144497 => 144498)


--- trunk/Source/WebCore/dom/DecodedDataDocumentParser.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/dom/DecodedDataDocumentParser.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -47,7 +47,7 @@
         return;
 
     writer->reportDataReceived();
-    append(decoded);
+    append(decoded.releaseImpl());
 }
 
 void DecodedDataDocumentParser::flush(DocumentWriter* writer)
@@ -57,7 +57,7 @@
         return;
 
     writer->reportDataReceived();
-    append(remainingData);
+    append(remainingData.releaseImpl());
 }
 
 };

Modified: trunk/Source/WebCore/dom/DecodedDataDocumentParser.h (144497 => 144498)


--- trunk/Source/WebCore/dom/DecodedDataDocumentParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/dom/DecodedDataDocumentParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -41,7 +41,7 @@
 
 private:
     // append is used by DocumentWriter::replaceDocument.
-    virtual void append(const SegmentedString&) = 0;
+    virtual void append(PassRefPtr<StringImpl>) = 0;
 
     // appendBytes and flush are used by DocumentWriter (the loader).
     virtual void appendBytes(DocumentWriter*, const char* bytes, size_t length);

Modified: trunk/Source/WebCore/dom/Document.cpp (144497 => 144498)


--- trunk/Source/WebCore/dom/Document.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/dom/Document.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -1358,7 +1358,12 @@
 void Document::setContent(const String& content)
 {
     open();
-    m_parser->append(content);
+    // FIXME: This should probably use insert(), but that's (intentionally)
+    // not implemented for the XML parser as it's normally synonymous with
+    // document.write(). append() will end up yielding, but close() will
+    // pump the tokenizer syncrhonously and finish the parse.
+    m_parser->pinToMainThread();
+    m_parser->append(content.impl());
     close();
 }
 

Modified: trunk/Source/WebCore/dom/DocumentParser.h (144497 => 144498)


--- trunk/Source/WebCore/dom/DocumentParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/dom/DocumentParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -24,6 +24,7 @@
 #ifndef DocumentParser_h
 #define DocumentParser_h
 
+#include <wtf/Forward.h>
 #include <wtf/RefCounted.h>
 
 namespace WebCore {
@@ -51,9 +52,10 @@
 
     virtual void pinToMainThread() { }
 
-    // FIXME: append() should be private, but DocumentWriter::replaceDocument
-    // uses it for now.
-    virtual void append(const SegmentedString&) = 0;
+    // FIXME: append() should be private, but DocumentWriter::replaceDocument uses it for now.
+    // FIXME: This really should take a PassOwnPtr to signify that it expects to take
+    // ownership of the buffer. The parser expects the PassRefPtr to hold the only ref of the StringImpl.
+    virtual void append(PassRefPtr<StringImpl>) = 0;
 
     virtual void finish() = 0;
 

Modified: trunk/Source/WebCore/dom/RawDataDocumentParser.h (144497 => 144498)


--- trunk/Source/WebCore/dom/RawDataDocumentParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/dom/RawDataDocumentParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -56,7 +56,7 @@
         ASSERT_NOT_REACHED();
     }
 
-    virtual void append(const SegmentedString&)
+    virtual void append(PassRefPtr<StringImpl>)
     {
         ASSERT_NOT_REACHED();
     }

Modified: trunk/Source/WebCore/html/FTPDirectoryDocument.cpp (144497 => 144498)


--- trunk/Source/WebCore/html/FTPDirectoryDocument.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/FTPDirectoryDocument.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -57,7 +57,7 @@
         return adoptRef(new FTPDirectoryDocumentParser(document));
     }
 
-    virtual void append(const SegmentedString&);
+    virtual void append(PassRefPtr<StringImpl>);
     virtual void finish();
 
     virtual bool isWaitingForScripts() const { return false; }
@@ -346,8 +346,10 @@
     bodyElement->appendChild(m_tableElement, IGNORE_EXCEPTION);
 }
 
-void FTPDirectoryDocumentParser::append(const SegmentedString& source)
+void FTPDirectoryDocumentParser::append(PassRefPtr<StringImpl> inputSource)
 {
+    String source(inputSource);
+
     // Make sure we have the table element to append to by loading the template set in the pref, or
     // creating a very basic document with the appropriate table
     if (!m_tableElement) {

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp (144497 => 144498)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -654,7 +654,7 @@
 
 #endif
 
-void HTMLDocumentParser::append(const SegmentedString& source)
+void HTMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
 {
     if (isStopped())
         return;
@@ -664,8 +664,12 @@
         if (!m_haveBackgroundParser)
             startBackgroundParser();
 
-        HTMLParserThread::shared()->postTask(bind(
-            &BackgroundHTMLParser::append, m_backgroundParser, source.toString().isolatedCopy()));
+        ASSERT(inputSource->hasOneRef());
+        Closure closure = bind(&BackgroundHTMLParser::append, m_backgroundParser, String(inputSource));
+        // NOTE: Important that the String temporary is destroyed before we post the task
+        // otherwise the String could call deref() on a StringImpl now owned by the background parser.
+        // We would like to ASSERT(closure.arg3()->hasOneRef()) but sadly the args are private.
+        HTMLParserThread::shared()->postTask(closure);
         return;
     }
 #endif
@@ -673,6 +677,7 @@
     // pumpTokenizer can cause this parser to be detached from the Document,
     // but we need to ensure it isn't deleted yet.
     RefPtr<HTMLDocumentParser> protect(this);
+    String source(inputSource);
 
     if (m_preloadScanner) {
         if (m_input.current().isEmpty() && !isWaitingForScripts()) {

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.h (144497 => 144498)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -98,7 +98,7 @@
 
 protected:
     virtual void insert(const SegmentedString&) OVERRIDE;
-    virtual void append(const SegmentedString&) OVERRIDE;
+    virtual void append(PassRefPtr<StringImpl>) OVERRIDE;
     virtual void finish() OVERRIDE;
 
     HTMLDocumentParser(HTMLDocument*, bool reportErrors);

Modified: trunk/Source/WebCore/html/parser/HTMLViewSourceParser.cpp (144497 => 144498)


--- trunk/Source/WebCore/html/parser/HTMLViewSourceParser.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/parser/HTMLViewSourceParser.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -62,9 +62,9 @@
     }
 }
 
-void HTMLViewSourceParser::append(const SegmentedString& input)
+void HTMLViewSourceParser::append(PassRefPtr<StringImpl> input)
 {
-    m_input.appendToEnd(input);
+    m_input.appendToEnd(String(input));
     pumpTokenizer();
 }
 

Modified: trunk/Source/WebCore/html/parser/HTMLViewSourceParser.h (144497 => 144498)


--- trunk/Source/WebCore/html/parser/HTMLViewSourceParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/parser/HTMLViewSourceParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -59,7 +59,7 @@
 private:
     // DocumentParser
     virtual void insert(const SegmentedString&);
-    virtual void append(const SegmentedString&);
+    virtual void append(PassRefPtr<StringImpl>);
     virtual void finish();
 
     HTMLViewSourceDocument* document() const { return static_cast<HTMLViewSourceDocument*>(DecodedDataDocumentParser::document()); }

Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.cpp (144497 => 144498)


--- trunk/Source/WebCore/html/parser/TextDocumentParser.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -44,7 +44,7 @@
 {
 }
 
-void TextDocumentParser::append(const SegmentedString& text)
+void TextDocumentParser::append(PassRefPtr<StringImpl> text)
 {
     if (!m_haveInsertedFakePreElement)
         insertFakePreElement();

Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.h (144497 => 144498)


--- trunk/Source/WebCore/html/parser/TextDocumentParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -41,7 +41,7 @@
 private:
     explicit TextDocumentParser(HTMLDocument*);
 
-    virtual void append(const SegmentedString&);
+    virtual void append(PassRefPtr<StringImpl>);
     void insertFakePreElement();
 
     bool m_haveInsertedFakePreElement;

Modified: trunk/Source/WebCore/loader/DocumentWriter.cpp (144497 => 144498)


--- trunk/Source/WebCore/loader/DocumentWriter.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/loader/DocumentWriter.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -80,7 +80,9 @@
         // to support RawDataDocumentParsers.
         if (DocumentParser* parser = m_frame->document()->parser()) {
             parser->pinToMainThread();
-            parser->append(source);
+            // Because we're pinned to the main thread we don't need to worry about
+            // passing ownership of the source string.
+            parser->append(source.impl());
         }
     }
 

Modified: trunk/Source/WebCore/xml/parser/XMLDocumentParser.cpp (144497 => 144498)


--- trunk/Source/WebCore/xml/parser/XMLDocumentParser.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/xml/parser/XMLDocumentParser.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -112,8 +112,9 @@
     ASSERT_NOT_REACHED();
 }
 
-void XMLDocumentParser::append(const SegmentedString& source)
+void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
 {
+    SegmentedString source(inputSource);
     if (m_sawXSLTransform || !m_sawFirstElement)
         m_originalSourceForTransform.append(source);
 

Modified: trunk/Source/WebCore/xml/parser/XMLDocumentParser.h (144497 => 144498)


--- trunk/Source/WebCore/xml/parser/XMLDocumentParser.h	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/xml/parser/XMLDocumentParser.h	2013-03-01 21:53:26 UTC (rev 144498)
@@ -107,7 +107,7 @@
 
         // From DocumentParser
         virtual void insert(const SegmentedString&);
-        virtual void append(const SegmentedString&);
+        virtual void append(PassRefPtr<StringImpl>);
         virtual void finish();
         virtual bool isWaitingForScripts() const;
         virtual void stopParsing();

Modified: trunk/Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp (144497 => 144498)


--- trunk/Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp	2013-03-01 21:44:06 UTC (rev 144497)
+++ trunk/Source/WebCore/xml/parser/XMLDocumentParserLibxml2.cpp	2013-03-01 21:53:26 UTC (rev 144498)
@@ -1444,7 +1444,10 @@
     // Then, write any pending data
     SegmentedString rest = m_pendingSrc;
     m_pendingSrc.clear();
-    append(rest);
+    // There is normally only one string left, so toString() shouldn't copy.
+    // In any case, the XML parser runs on the main thread and it's OK if
+    // the passed string has more than one reference.
+    append(rest.toString().impl());
 
     // Finally, if finish() has been called and write() didn't result
     // in any further callbacks being queued, call end()
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to