Title: [141328] trunk/Source/WebCore
Revision
141328
Author
[email protected]
Date
2013-01-30 15:16:03 -0800 (Wed, 30 Jan 2013)

Log Message

HTMLDocumentParser::insert should be aware of threaded parsing
https://bugs.webkit.org/show_bug.cgi?id=107764

Reviewed by Eric Seidel.

This patch is an incremental step towards recovering from
document.write invalidating our speculative parsing buffer. The
approach I've taken is to make it possible to transfer the
HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
thread. To make that possible, I've taught the HTMLDocumentParser how
to operate without a tokenizer or a token.

Not having a tokenizer or a token while parsing in the background also
helps us avoid accidentially feeding input to the main thread's
tokenizer when we're supposed to feed it to the background thread.

This patch shouldn't have any behavior change (other than possibly
fixing a crash in fast/parser when threading parsing is enabled).

* html/parser/HTMLDocumentParser.cpp:
(WebCore::HTMLDocumentParser::HTMLDocumentParser):
(WebCore::HTMLDocumentParser::didFailSpeculation):
(WebCore):
(WebCore::HTMLDocumentParser::insert):
(WebCore::HTMLDocumentParser::finish):
(WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
* html/parser/HTMLDocumentParser.h:
(HTMLDocumentParser):
* html/parser/HTMLTreeBuilder.cpp:
(WebCore::HTMLTreeBuilder::constructTree):
(WebCore::HTMLTreeBuilder::processStartTagForInBody):
(WebCore::HTMLTreeBuilder::processEndTag):
(WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
(WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
(WebCore::HTMLTreeBuilder::processScriptStartTag):
* html/parser/TextDocumentParser.cpp:
(WebCore::TextDocumentParser::TextDocumentParser):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (141327 => 141328)


--- trunk/Source/WebCore/ChangeLog	2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/ChangeLog	2013-01-30 23:16:03 UTC (rev 141328)
@@ -1,3 +1,43 @@
+2013-01-30  Adam Barth  <[email protected]>
+
+        HTMLDocumentParser::insert should be aware of threaded parsing
+        https://bugs.webkit.org/show_bug.cgi?id=107764
+
+        Reviewed by Eric Seidel.
+
+        This patch is an incremental step towards recovering from
+        document.write invalidating our speculative parsing buffer. The
+        approach I've taken is to make it possible to transfer the
+        HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
+        thread. To make that possible, I've taught the HTMLDocumentParser how
+        to operate without a tokenizer or a token.
+
+        Not having a tokenizer or a token while parsing in the background also
+        helps us avoid accidentially feeding input to the main thread's
+        tokenizer when we're supposed to feed it to the background thread.
+
+        This patch shouldn't have any behavior change (other than possibly
+        fixing a crash in fast/parser when threading parsing is enabled).
+
+        * html/parser/HTMLDocumentParser.cpp:
+        (WebCore::HTMLDocumentParser::HTMLDocumentParser):
+        (WebCore::HTMLDocumentParser::didFailSpeculation):
+        (WebCore):
+        (WebCore::HTMLDocumentParser::insert):
+        (WebCore::HTMLDocumentParser::finish):
+        (WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
+        * html/parser/HTMLDocumentParser.h:
+        (HTMLDocumentParser):
+        * html/parser/HTMLTreeBuilder.cpp:
+        (WebCore::HTMLTreeBuilder::constructTree):
+        (WebCore::HTMLTreeBuilder::processStartTagForInBody):
+        (WebCore::HTMLTreeBuilder::processEndTag):
+        (WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
+        (WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
+        (WebCore::HTMLTreeBuilder::processScriptStartTag):
+        * html/parser/TextDocumentParser.cpp:
+        (WebCore::TextDocumentParser::TextDocumentParser):
+
 2013-01-30  Rafael Weinstein  <[email protected]>
 
         [HTMLTemplateElement] prevent the parser from removing nodes from the content when the foster agency is processing formatting elements

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp (141327 => 141328)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp	2013-01-30 23:16:03 UTC (rev 141328)
@@ -77,8 +77,8 @@
 HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
     : ScriptableDocumentParser(document)
     , m_options(document)
-    , m_token(adoptPtr(new HTMLToken))
-    , m_tokenizer(HTMLTokenizer::create(m_options))
+    , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken))
+    , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options))
     , m_scriptRunner(HTMLScriptRunner::create(document, this))
     , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, m_options))
     , m_parserScheduler(HTMLParserScheduler::create(this))
@@ -90,6 +90,7 @@
     , m_haveBackgroundParser(false)
     , m_pumpSessionNestingLevel(0)
 {
+    ASSERT(shouldUseThreading() || (m_token && m_tokenizer));
 }
 
 // FIXME: Member variables should be grouped into self-initializing structs to
@@ -108,6 +109,7 @@
     , m_haveBackgroundParser(false)
     , m_pumpSessionNestingLevel(0)
 {
+    ASSERT(!shouldUseThreading());
     bool reportErrors = false; // For now document fragment parsing never reports errors.
     m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
 }
@@ -284,6 +286,11 @@
     processTokensFromBackgroundParser(tokens);
 }
 
+void HTMLDocumentParser::didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>)
+{
+    // FIXME: Tell the background parser to resume parsing with this token and tokenizer.
+}
+
 void HTMLDocumentParser::processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream> tokens)
 {
     ASSERT(shouldUseThreading());
@@ -446,11 +453,29 @@
     // but we need to ensure it isn't deleted yet.
     RefPtr<HTMLDocumentParser> protect(this);
 
+#if ENABLE(THREADED_HTML_PARSER)
+    if (!m_tokenizer) {
+        ASSERT(!inPumpSession());
+        ASSERT(m_haveBackgroundParser || wasCreatedByScript());
+        m_token = adoptPtr(new HTMLToken);
+        m_tokenizer = HTMLTokenizer::create(m_options);
+    }
+#endif
+
     SegmentedString excludedLineNumberSource(source);
     excludedLineNumberSource.setExcludeLineNumbers();
     m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
     pumpTokenizerIfPossible(ForceSynchronous);
-    
+
+#if ENABLE(THREADED_HTML_PARSER)
+    if (!inPumpSession() && m_haveBackgroundParser) {
+        // FIXME: If the tokenizer is in the same state as when we started this function,
+        // then we haven't necessarily failed our speculation.
+        didFailSpeculation(m_token.release(), m_tokenizer.release());
+        return;
+    }
+#endif
+
     if (isWaitingForScripts()) {
         // Check the document.write() output with a separate preload scanner as
         // the main scanner can't deal with insertions.
@@ -604,6 +629,14 @@
         HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finishPartial, ParserMap::identifierForParser(this)));
         return;
     }
+    if (shouldUseThreading() && !wasCreatedByScript()) {
+        ASSERT(!m_tokenizer && !m_token);
+        // We're finishing before receiving any data. Rather than booting up
+        // the background parser just to spin it down, we finish parsing
+        // synchronously.
+        m_token = adoptPtr(new HTMLToken);
+        m_tokenizer = HTMLTokenizer::create(m_options);
+    }
 #endif
 
     attemptToEnd();
@@ -670,7 +703,7 @@
     if (shouldUseThreading()) {
         while (!m_pendingTokens.isEmpty()) {
             processTokensFromBackgroundParser(m_pendingTokens.takeFirst());
-            if (isWaitingForScripts())
+            if (isWaitingForScripts() || isStopped())
                 return;
         }
         return;

Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.h (141327 => 141328)


--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.h	2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.h	2013-01-30 23:16:03 UTC (rev 141328)
@@ -124,6 +124,7 @@
 #if ENABLE(THREADED_HTML_PARSER)
     void startBackgroundParser();
     void stopBackgroundParser();
+    void didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
     void processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream>);
 #endif
 

Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp (141327 => 141328)


--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp	2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp	2013-01-30 23:16:03 UTC (rev 141328)
@@ -367,13 +367,15 @@
     else
         processToken(token);
 
-    bool inForeignContent = !m_tree.isEmpty()
-        && !m_tree.currentStackItem()->isInHTMLNamespace()
-        && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
-        && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
+    if (m_parser->tokenizer()) {
+        bool inForeignContent = !m_tree.isEmpty()
+            && !m_tree.currentStackItem()->isInHTMLNamespace()
+            && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
+            && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
 
-    m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
-    m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+        m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
+        m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+    }
 
     m_tree.executeQueuedTasks();
     // We might be detached now.
@@ -740,7 +742,8 @@
     if (token->name() == plaintextTag) {
         processFakePEndTagIfPInButtonScope();
         m_tree.insertHTMLElement(token);
-        m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+        if (m_parser->tokenizer())
+            m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
         return;
     }
     if (token->name() == buttonTag) {
@@ -850,7 +853,8 @@
     if (token->name() == textareaTag) {
         m_tree.insertHTMLElement(token);
         m_shouldSkipLeadingNewline = true;
-        m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+        if (m_parser->tokenizer())
+            m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
         m_originalInsertionMode = m_insertionMode;
         m_framesetOk = false;
         setInsertionMode(TextMode);
@@ -2163,12 +2167,14 @@
                 m_scriptToProcess->removeChildren();
             setInsertionMode(m_originalInsertionMode);
 
-            // This token will not have been created by the tokenizer if a
-            // self-closing script tag was encountered and pre-HTML5 parser
-            // quirks are enabled. We must set the tokenizer's state to
-            // DataState explicitly if the tokenizer didn't have a chance to.
-            ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
-            m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+            if (m_parser->tokenizer()) {
+                // This token will not have been created by the tokenizer if a
+                // self-closing script tag was encountered and pre-HTML5 parser
+                // quirks are enabled. We must set the tokenizer's state to
+                // DataState explicitly if the tokenizer didn't have a chance to.
+                ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
+                m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+            }
             return;
         }
         m_tree.openElements()->pop();
@@ -2706,7 +2712,8 @@
 {
     ASSERT(token->type() == HTMLTokenTypes::StartTag);
     m_tree.insertHTMLElement(token);
-    m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+    if (m_parser->tokenizer())
+        m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
     m_originalInsertionMode = m_insertionMode;
     setInsertionMode(TextMode);
 }
@@ -2715,7 +2722,8 @@
 {
     ASSERT(token->type() == HTMLTokenTypes::StartTag);
     m_tree.insertHTMLElement(token);
-    m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
+    if (m_parser->tokenizer())
+        m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
     m_originalInsertionMode = m_insertionMode;
     setInsertionMode(TextMode);
 }
@@ -2724,7 +2732,8 @@
 {
     ASSERT(token->type() == HTMLTokenTypes::StartTag);
     m_tree.insertScriptElement(token);
-    m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
+    if (m_parser->tokenizer())
+        m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
     m_originalInsertionMode = m_insertionMode;
 
     TextPosition position = m_parser->textPosition();

Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.cpp (141327 => 141328)


--- trunk/Source/WebCore/html/parser/TextDocumentParser.cpp	2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.cpp	2013-01-30 23:16:03 UTC (rev 141328)
@@ -38,7 +38,9 @@
     : HTMLDocumentParser(document, false)
     , m_haveInsertedFakePreElement(false)
 {
-    tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+    // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
+    if (tokenizer())
+        tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
 }
 
 TextDocumentParser::~TextDocumentParser()
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to