- Revision
- 141328
- Author
- [email protected]
- Date
- 2013-01-30 15:16:03 -0800 (Wed, 30 Jan 2013)
Log Message
HTMLDocumentParser::insert should be aware of threaded parsing
https://bugs.webkit.org/show_bug.cgi?id=107764
Reviewed by Eric Seidel.
This patch is an incremental step towards recovering from
document.write invalidating our speculative parsing buffer. The
approach I've taken is to make it possible to transfer the
HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
thread. To make that possible, I've taught the HTMLDocumentParser how
to operate without a tokenizer or a token.
Not having a tokenizer or a token while parsing in the background also
helps us avoid accidentially feeding input to the main thread's
tokenizer when we're supposed to feed it to the background thread.
This patch shouldn't have any behavior change (other than possibly
fixing a crash in fast/parser when threading parsing is enabled).
* html/parser/HTMLDocumentParser.cpp:
(WebCore::HTMLDocumentParser::HTMLDocumentParser):
(WebCore::HTMLDocumentParser::didFailSpeculation):
(WebCore):
(WebCore::HTMLDocumentParser::insert):
(WebCore::HTMLDocumentParser::finish):
(WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
* html/parser/HTMLDocumentParser.h:
(HTMLDocumentParser):
* html/parser/HTMLTreeBuilder.cpp:
(WebCore::HTMLTreeBuilder::constructTree):
(WebCore::HTMLTreeBuilder::processStartTagForInBody):
(WebCore::HTMLTreeBuilder::processEndTag):
(WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
(WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
(WebCore::HTMLTreeBuilder::processScriptStartTag):
* html/parser/TextDocumentParser.cpp:
(WebCore::TextDocumentParser::TextDocumentParser):
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (141327 => 141328)
--- trunk/Source/WebCore/ChangeLog 2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/ChangeLog 2013-01-30 23:16:03 UTC (rev 141328)
@@ -1,3 +1,43 @@
+2013-01-30 Adam Barth <[email protected]>
+
+ HTMLDocumentParser::insert should be aware of threaded parsing
+ https://bugs.webkit.org/show_bug.cgi?id=107764
+
+ Reviewed by Eric Seidel.
+
+ This patch is an incremental step towards recovering from
+ document.write invalidating our speculative parsing buffer. The
+ approach I've taken is to make it possible to transfer the
+ HTMLDocumentParser's HTMLTokenizer and HTMLToken to the background
+ thread. To make that possible, I've taught the HTMLDocumentParser how
+ to operate without a tokenizer or a token.
+
+ Not having a tokenizer or a token while parsing in the background also
+ helps us avoid accidentially feeding input to the main thread's
+ tokenizer when we're supposed to feed it to the background thread.
+
+ This patch shouldn't have any behavior change (other than possibly
+ fixing a crash in fast/parser when threading parsing is enabled).
+
+ * html/parser/HTMLDocumentParser.cpp:
+ (WebCore::HTMLDocumentParser::HTMLDocumentParser):
+ (WebCore::HTMLDocumentParser::didFailSpeculation):
+ (WebCore):
+ (WebCore::HTMLDocumentParser::insert):
+ (WebCore::HTMLDocumentParser::finish):
+ (WebCore::HTMLDocumentParser::resumeParsingAfterScriptExecution):
+ * html/parser/HTMLDocumentParser.h:
+ (HTMLDocumentParser):
+ * html/parser/HTMLTreeBuilder.cpp:
+ (WebCore::HTMLTreeBuilder::constructTree):
+ (WebCore::HTMLTreeBuilder::processStartTagForInBody):
+ (WebCore::HTMLTreeBuilder::processEndTag):
+ (WebCore::HTMLTreeBuilder::processGenericRCDATAStartTag):
+ (WebCore::HTMLTreeBuilder::processGenericRawTextStartTag):
+ (WebCore::HTMLTreeBuilder::processScriptStartTag):
+ * html/parser/TextDocumentParser.cpp:
+ (WebCore::TextDocumentParser::TextDocumentParser):
+
2013-01-30 Rafael Weinstein <[email protected]>
[HTMLTemplateElement] prevent the parser from removing nodes from the content when the foster agency is processing formatting elements
Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp (141327 => 141328)
--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp 2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.cpp 2013-01-30 23:16:03 UTC (rev 141328)
@@ -77,8 +77,8 @@
HTMLDocumentParser::HTMLDocumentParser(HTMLDocument* document, bool reportErrors)
: ScriptableDocumentParser(document)
, m_options(document)
- , m_token(adoptPtr(new HTMLToken))
- , m_tokenizer(HTMLTokenizer::create(m_options))
+ , m_token(m_options.useThreading ? nullptr : adoptPtr(new HTMLToken))
+ , m_tokenizer(m_options.useThreading ? nullptr : HTMLTokenizer::create(m_options))
, m_scriptRunner(HTMLScriptRunner::create(document, this))
, m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, m_options))
, m_parserScheduler(HTMLParserScheduler::create(this))
@@ -90,6 +90,7 @@
, m_haveBackgroundParser(false)
, m_pumpSessionNestingLevel(0)
{
+ ASSERT(shouldUseThreading() || (m_token && m_tokenizer));
}
// FIXME: Member variables should be grouped into self-initializing structs to
@@ -108,6 +109,7 @@
, m_haveBackgroundParser(false)
, m_pumpSessionNestingLevel(0)
{
+ ASSERT(!shouldUseThreading());
bool reportErrors = false; // For now document fragment parsing never reports errors.
m_tokenizer->setState(tokenizerStateForContextElement(contextElement, reportErrors, m_options));
}
@@ -284,6 +286,11 @@
processTokensFromBackgroundParser(tokens);
}
+void HTMLDocumentParser::didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>)
+{
+ // FIXME: Tell the background parser to resume parsing with this token and tokenizer.
+}
+
void HTMLDocumentParser::processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream> tokens)
{
ASSERT(shouldUseThreading());
@@ -446,11 +453,29 @@
// but we need to ensure it isn't deleted yet.
RefPtr<HTMLDocumentParser> protect(this);
+#if ENABLE(THREADED_HTML_PARSER)
+ if (!m_tokenizer) {
+ ASSERT(!inPumpSession());
+ ASSERT(m_haveBackgroundParser || wasCreatedByScript());
+ m_token = adoptPtr(new HTMLToken);
+ m_tokenizer = HTMLTokenizer::create(m_options);
+ }
+#endif
+
SegmentedString excludedLineNumberSource(source);
excludedLineNumberSource.setExcludeLineNumbers();
m_input.insertAtCurrentInsertionPoint(excludedLineNumberSource);
pumpTokenizerIfPossible(ForceSynchronous);
-
+
+#if ENABLE(THREADED_HTML_PARSER)
+ if (!inPumpSession() && m_haveBackgroundParser) {
+ // FIXME: If the tokenizer is in the same state as when we started this function,
+ // then we haven't necessarily failed our speculation.
+ didFailSpeculation(m_token.release(), m_tokenizer.release());
+ return;
+ }
+#endif
+
if (isWaitingForScripts()) {
// Check the document.write() output with a separate preload scanner as
// the main scanner can't deal with insertions.
@@ -604,6 +629,14 @@
HTMLParserThread::shared()->postTask(bind(&BackgroundHTMLParser::finishPartial, ParserMap::identifierForParser(this)));
return;
}
+ if (shouldUseThreading() && !wasCreatedByScript()) {
+ ASSERT(!m_tokenizer && !m_token);
+ // We're finishing before receiving any data. Rather than booting up
+ // the background parser just to spin it down, we finish parsing
+ // synchronously.
+ m_token = adoptPtr(new HTMLToken);
+ m_tokenizer = HTMLTokenizer::create(m_options);
+ }
#endif
attemptToEnd();
@@ -670,7 +703,7 @@
if (shouldUseThreading()) {
while (!m_pendingTokens.isEmpty()) {
processTokensFromBackgroundParser(m_pendingTokens.takeFirst());
- if (isWaitingForScripts())
+ if (isWaitingForScripts() || isStopped())
return;
}
return;
Modified: trunk/Source/WebCore/html/parser/HTMLDocumentParser.h (141327 => 141328)
--- trunk/Source/WebCore/html/parser/HTMLDocumentParser.h 2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/HTMLDocumentParser.h 2013-01-30 23:16:03 UTC (rev 141328)
@@ -124,6 +124,7 @@
#if ENABLE(THREADED_HTML_PARSER)
void startBackgroundParser();
void stopBackgroundParser();
+ void didFailSpeculation(PassOwnPtr<HTMLToken>, PassOwnPtr<HTMLTokenizer>);
void processTokensFromBackgroundParser(PassOwnPtr<CompactHTMLTokenStream>);
#endif
Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp (141327 => 141328)
--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp 2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp 2013-01-30 23:16:03 UTC (rev 141328)
@@ -367,13 +367,15 @@
else
processToken(token);
- bool inForeignContent = !m_tree.isEmpty()
- && !m_tree.currentStackItem()->isInHTMLNamespace()
- && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
- && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
+ if (m_parser->tokenizer()) {
+ bool inForeignContent = !m_tree.isEmpty()
+ && !m_tree.currentStackItem()->isInHTMLNamespace()
+ && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem())
+ && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem());
- m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
- m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+ m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || inForeignContent);
+ m_parser->tokenizer()->setShouldAllowCDATA(inForeignContent);
+ }
m_tree.executeQueuedTasks();
// We might be detached now.
@@ -740,7 +742,8 @@
if (token->name() == plaintextTag) {
processFakePEndTagIfPInButtonScope();
m_tree.insertHTMLElement(token);
- m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+ if (m_parser->tokenizer())
+ m_parser->tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
return;
}
if (token->name() == buttonTag) {
@@ -850,7 +853,8 @@
if (token->name() == textareaTag) {
m_tree.insertHTMLElement(token);
m_shouldSkipLeadingNewline = true;
- m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+ if (m_parser->tokenizer())
+ m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
m_originalInsertionMode = m_insertionMode;
m_framesetOk = false;
setInsertionMode(TextMode);
@@ -2163,12 +2167,14 @@
m_scriptToProcess->removeChildren();
setInsertionMode(m_originalInsertionMode);
- // This token will not have been created by the tokenizer if a
- // self-closing script tag was encountered and pre-HTML5 parser
- // quirks are enabled. We must set the tokenizer's state to
- // DataState explicitly if the tokenizer didn't have a chance to.
- ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
- m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+ if (m_parser->tokenizer()) {
+ // This token will not have been created by the tokenizer if a
+ // self-closing script tag was encountered and pre-HTML5 parser
+ // quirks are enabled. We must set the tokenizer's state to
+ // DataState explicitly if the tokenizer didn't have a chance to.
+ ASSERT(m_parser->tokenizer()->state() == HTMLTokenizerState::DataState || m_options.usePreHTML5ParserQuirks || m_options.useThreading);
+ m_parser->tokenizer()->setState(HTMLTokenizerState::DataState);
+ }
return;
}
m_tree.openElements()->pop();
@@ -2706,7 +2712,8 @@
{
ASSERT(token->type() == HTMLTokenTypes::StartTag);
m_tree.insertHTMLElement(token);
- m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
+ if (m_parser->tokenizer())
+ m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
m_originalInsertionMode = m_insertionMode;
setInsertionMode(TextMode);
}
@@ -2715,7 +2722,8 @@
{
ASSERT(token->type() == HTMLTokenTypes::StartTag);
m_tree.insertHTMLElement(token);
- m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
+ if (m_parser->tokenizer())
+ m_parser->tokenizer()->setState(HTMLTokenizerState::RAWTEXTState);
m_originalInsertionMode = m_insertionMode;
setInsertionMode(TextMode);
}
@@ -2724,7 +2732,8 @@
{
ASSERT(token->type() == HTMLTokenTypes::StartTag);
m_tree.insertScriptElement(token);
- m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
+ if (m_parser->tokenizer())
+ m_parser->tokenizer()->setState(HTMLTokenizerState::ScriptDataState);
m_originalInsertionMode = m_insertionMode;
TextPosition position = m_parser->textPosition();
Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.cpp (141327 => 141328)
--- trunk/Source/WebCore/html/parser/TextDocumentParser.cpp 2013-01-30 23:11:52 UTC (rev 141327)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.cpp 2013-01-30 23:16:03 UTC (rev 141328)
@@ -38,7 +38,9 @@
: HTMLDocumentParser(document, false)
, m_haveInsertedFakePreElement(false)
{
- tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
+ // FIXME: If we're using threading, we need to tell the BackgroundHTMLParser to use PLAINTEXTState.
+ if (tokenizer())
+ tokenizer()->setState(HTMLTokenizerState::PLAINTEXTState);
}
TextDocumentParser::~TextDocumentParser()