Skip to site navigation (Press enter)

[webkit-changes] [142363] trunk/Source/WebCore

eric Sat, 09 Feb 2013 09:50:32 -0800

Title: [142363] trunk/Source/WebCore

Revision: 142363
Author: [email protected]
Date: 2013-02-09 09:52:19 -0800 (Sat, 09 Feb 2013)

Log Message

Fix TextDocumentParser to play nice with threading
https://bugs.webkit.org/show_bug.cgi?id=109240


Reviewed by Adam Barth.

Before the HTML5 parser re-write the text document parser
was completely custom.  With the HTML5 parser, we just made
the TextDocumentParser use the HTMLDocumentParser with an
artificial script tag.

However, our solution was slightly over-engineered to avoid
lying about the column numbers of the first line of the text document
during parsing. :)

This change makes us use a simpler (and threading-compatible)
solution by just inserting a real "<pre>" tag into the
input stream instead of hacking one together with the treebuilder
and manually setting the Tokenizer state.

fast/parser/empty-text-resource.html covers this case.

* html/parser/TextDocumentParser.cpp:
(WebCore::TextDocumentParser::TextDocumentParser):
(WebCore::TextDocumentParser::insertFakePreElement):

Modified Paths

trunk/Source/WebCore/ChangeLog
trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp
trunk/Source/WebCore/html/parser/BackgroundHTMLParser.h
trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp
trunk/Source/WebCore/html/parser/HTMLDocumentParser.h
trunk/Source/WebCore/html/parser/TextDocumentParser.cpp

Diff

Modified: trunk/Source/WebCore/ChangeLog (142362 => 142363)


--- trunk/Source/WebCore/ChangeLog	2013-02-09 17:28:41 UTC (rev 142362)
+++ trunk/Source/WebCore/ChangeLog	2013-02-09 17:52:19 UTC (rev 142363)
@@ -1,3 +1,30 @@
+2013-02-09  Eric Seidel  <[email protected]>
+
+        Fix TextDocumentParser to play nice with threading
+        https://bugs.webkit.org/show_bug.cgi?id=109240
+
+        Reviewed by Adam Barth.
+
+        Before the HTML5 parser re-write the text document parser
+        was completely custom.  With the HTML5 parser, we just made
+        the TextDocumentParser use the HTMLDocumentParser with an
+        artificial script tag.
+
+        However, our solution was slightly over-engineered to avoid
+        lying about the column numbers of the first line of the text document
+        during parsing. :)
+
+        This change makes us use a simpler (and threading-compatible)
+        solution by just inserting a real "<pre>" tag into the
+        input stream instead of hacking one together with the treebuilder
+        and manually setting the Tokenizer state.
+
+        fast/parser/empty-text-resource.html covers this case.
+
+        * html/parser/TextDocumentParser.cpp:
+        (WebCore::TextDocumentParser::TextDocumentParser):
+        (WebCore::TextDocumentParser::insertFakePreElement):
+
 2013-02-09  Kent Tamura  <[email protected]>
 
         Add missing copyright header

Modified: trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp (142362 => 142363)


--- trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp	2013-02-09 17:28:41 UTC (rev 142362)
+++ trunk/Source/WebCore/html/parser/BackgroundHTMLParser.cpp	2013-02-09 17:52:19 UTC (rev 142363)
@@ -96,6 +96,14 @@
     delete this;
 }
 
+void BackgroundHTMLParser::forcePlaintextForTextDocument()
+{
+    // This is only used by the TextDocumentParser (a subclass of HTMLDocumentParser)
+    // to force us into the PLAINTEXT state w/o using a <plaintext> tag.
+    // The TextDocumentParser uses a <pre> tag for historical/compatibility reasons.
+    m_tokenizer->setState(HTMLTokenizerState::PLAINTEXTState);
+}
+
 void BackgroundHTMLParser::markEndOfFile()
 {
     // FIXME: This should use InputStreamPreprocessor::endOfFileMarker

Modified: trunk/Source/WebCore/html/parser/BackgroundHTMLParser.h (142362 => 142363)


--- trunk/Source/WebCore/html/parser/BackgroundHTMLParser.h	2013-02-09 17:28:41 UTC (rev 142362)
+++ trunk/Source/WebCore/html/parser/BackgroundHTMLParser.h	2013-02-09 17:52:19 UTC (rev 142363)
@@ -58,6 +58,8 @@
     void finish();
     void stop();
 
+    void forcePlaintextForTextDocument();
+
 private:
     BackgroundHTMLParser(PassRefPtr<WeakReference<BackgroundHTMLParser> >, const HTMLParserOptions&, const WeakPtr<HTMLDocumentParser>&, PassOwnPtr<XSSAuditor>);

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp (142362 => 142363)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2013-02-09 17:28:41 UTC (rev 142362)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2013-02-09 17:52:19 UTC (rev 142363)
@@ -355,6 +355,21 @@
 
 #endif // ENABLE(THREADED_HTML_PARSER)
 
+void HTMLDocumentParser::forcePlaintextForTextDocument()
+{
+#if ENABLE(THREADED_HTML_PARSER)
+    if (shouldUseThreading()) {
+        // This method is called before any data is appended, so we have to start
+        // the background parser ourselves.
+        if (!m_haveBackgroundParser)
+            startBackgroundParser();
+
+        HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::forcePlaintextForTextDocument, m_backgroundParser));
+    } else
+#endif
+        m_tokenizer->setState(HTMLTokenizerState::PLAINTEXTState);
+}
+
 void HTMLDocumentParser::pumpTokenizer(SynchronousMode mode)
 {
     ASSERT(!isStopped());

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.h (142362 => 142363)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.h	2013-02-09 17:28:41 UTC (rev 142362)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.h	2013-02-09 17:52:19 UTC (rev 142363)
@@ -101,6 +101,8 @@
 
     HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
 
+    void forcePlaintextForTextDocument();
+
 private:
     static PassRefPtr<HTMLDocumentParser> create(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission permission)
     {

Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.cpp (142362 => 142363)


--- trunk/Source/WebCore/html/parser/TextDocumentParser.cpp	2013-02-09 17:28:41 UTC (rev 142362)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.cpp	2013-02-09 17:52:19 UTC (rev 142363)
@@ -38,9 +38,6 @@
     : HTMLDocumentParser(document, false)
     , m_haveInsertedFakePreElement(false)
 {
-    // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
-    if (tokenizer())
-        tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
 }
 
 TextDocumentParser::~TextDocumentParser()
@@ -61,16 +58,19 @@
     // We create a fake token and give it to the tree builder rather than
     // sending fake bytes through the front-end of the parser to avoid
     // distrubing the line/column number calculations.
-
     Vector<Attribute> attributes;
     attributes.append(Attribute(styleAttr, "word-wrap: break-word; white-space: pre-wrap;"));
     RefPtr<AtomicHTMLToken> fakePre = AtomicHTMLToken::create(HTMLTokenTypes::StartTag, preTag.localName(), attributes);
+    treeBuilder()->constructTree(fakePre.get());
 
-    treeBuilder()->constructTree(fakePre.get());
     // Normally we would skip the first \n after a <pre> element, but we don't
     // want to skip the first \n for text documents!
     treeBuilder()->setShouldSkipLeadingNewline(false);
 
+    // Although Text Documents expose a "pre" element in their DOM, they
+    // act like a <plaintext> tag, so we have to force plaintext mode.
+    forcePlaintextForTextDocument();
+
     m_haveInsertedFakePreElement = true;
 }

_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes