Diff
Modified: trunk/LayoutTests/ChangeLog (103101 => 103102)
--- trunk/LayoutTests/ChangeLog 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/LayoutTests/ChangeLog 2011-12-16 21:44:45 UTC (rev 103102)
@@ -1,3 +1,14 @@
+2011-12-16 Adam Barth <[email protected]>
+
+ <!DOCTYPE html><pre>

A</pre> doesn't parse correctly
+ https://bugs.webkit.org/show_bug.cgi?id=74658
+
+ Reviewed by Darin Adler.
+
+ Shows test progression.
+
+ * html5lib/runner-expected.txt:
+
2011-12-16 Adrienne Walker <[email protected]>
[chromium] Further suppress media/video-transformed.html failures
Modified: trunk/LayoutTests/fast/forms/textarea-newline-expected.txt (103101 => 103102)
--- trunk/LayoutTests/fast/forms/textarea-newline-expected.txt 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/LayoutTests/fast/forms/textarea-newline-expected.txt 2011-12-16 21:44:45 UTC (rev 103102)
@@ -3,8 +3,8 @@
PASS document.getElementById("no-line-feed").value is "Madoka"
PASS document.getElementById("one-line-feed").value is "Sayaka"
PASS document.getElementById("two-line-feeds").value is "\nMami"
-PASS document.getElementById("one-line-feed-escaped-char-and-one-line-feed").value is "\n\nKyoko"
-PASS document.getElementById("two-line-feed-escaped-chars").value is "\n\nHomura"
+PASS document.getElementById("one-line-feed-escaped-char-and-one-line-feed").value is "\nKyoko"
+PASS document.getElementById("two-line-feed-escaped-chars").value is "\nHomura"
PASS successfullyParsed is true
TEST COMPLETE
Modified: trunk/LayoutTests/fast/forms/textarea-newline.html (103101 => 103102)
--- trunk/LayoutTests/fast/forms/textarea-newline.html 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/LayoutTests/fast/forms/textarea-newline.html 2011-12-16 21:44:45 UTC (rev 103102)
@@ -31,9 +31,9 @@
shouldBe('document.getElementById("two-line-feeds").value', '"\\nMami"');
-shouldBe('document.getElementById("one-line-feed-escaped-char-and-one-line-feed").value', '"\\n\\nKyoko"');
+shouldBe('document.getElementById("one-line-feed-escaped-char-and-one-line-feed").value', '"\\nKyoko"');
-shouldBe('document.getElementById("two-line-feed-escaped-chars").value', '"\\n\\nHomura"');
+shouldBe('document.getElementById("two-line-feed-escaped-chars").value', '"\\nHomura"');
</script>
<script src=""
Modified: trunk/LayoutTests/html5lib/runner-expected.txt (103101 => 103102)
--- trunk/LayoutTests/html5lib/runner-expected.txt 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/LayoutTests/html5lib/runner-expected.txt 2011-12-16 21:44:45 UTC (rev 103102)
@@ -126,28 +126,8 @@
resources/tests2.dat: PASS
-resources/tests3.dat:
-12
+resources/tests3.dat: PASS
-Test 12 of 24 in resources/tests3.dat failed. Input:
-<!DOCTYPE html><pre>

A</pre>
-Got:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "
-
-A"
-Expected:
-| <!DOCTYPE html>
-| <html>
-| <head>
-| <body>
-| <pre>
-| "
-A"
resources/tests4.dat: PASS
resources/tests5.dat: PASS
Modified: trunk/Source/WebCore/ChangeLog (103101 => 103102)
--- trunk/Source/WebCore/ChangeLog 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/ChangeLog 2011-12-16 21:44:45 UTC (rev 103102)
@@ -1,3 +1,32 @@
+2011-12-16 Adam Barth <[email protected]>
+
+ <!DOCTYPE html><pre>

A</pre> doesn't parse correctly
+ https://bugs.webkit.org/show_bug.cgi?id=74658
+
+ Reviewed by Darin Adler.
+
+ Previously, we handled skipping newlines after <pre> in the tokenizer,
+ which isn't how the spec handles them. Instead, the spec skips them in
+ the tree builder. This isn't usually observable, except in the case of
+ an HTML entity. In that case, the tokenzier sees '&' (because the
+ entity hasn't been decoded yet), but the tree builder sees '\n' (the
+ decoded entity). This patch fixes the bug by more closely aligning our
+ implementation with the spec.
+
+ Test: html5lib/runner.html
+
+ * html/parser/HTMLTokenizer.cpp:
+ (WebCore::HTMLTokenizer::reset):
+ (WebCore::HTMLTokenizer::nextToken):
+ * html/parser/HTMLTokenizer.h:
+ * html/parser/HTMLTreeBuilder.cpp:
+ (WebCore::HTMLTreeBuilder::ExternalCharacterTokenBuffer::skipAtMostOneLeadingNewline):
+ (WebCore::HTMLTreeBuilder::HTMLTreeBuilder):
+ (WebCore::HTMLTreeBuilder::processStartTagForInBody):
+ (WebCore::HTMLTreeBuilder::processCharacterBuffer):
+ * html/parser/HTMLTreeBuilder.h:
+ * xml/parser/MarkupTokenizerBase.h:
+
2011-12-16 Joshua Bell <[email protected]>
IndexedDB: Implement IDBObjectStore.count() and IDBIndex.count()
Modified: trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp (103101 => 103102)
--- trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/html/parser/HTMLTokenizer.cpp 2011-12-16 21:44:45 UTC (rev 103102)
@@ -136,7 +136,6 @@
m_state = HTMLTokenizerState::DataState;
m_token = 0;
m_lineNumber = 0;
- m_skipLeadingNewLineForListing = false;
m_forceNullCharacterReplacement = false;
m_shouldAllowCDATA = false;
m_additionalAllowedCharacter = '\0';
@@ -211,30 +210,6 @@
return haveBufferedCharacterToken();
UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
- // Note that this logic is different than the generic \r\n collapsing
- // handled in the input stream preprocessor. This logic is here as an
- // "authoring convenience" so folks can write:
- //
- // <pre>
- // lorem ipsum
- // lorem ipsum
- // </pre>
- //
- // without getting an extra newline at the start of their <pre> element.
- if (m_skipLeadingNewLineForListing) {
- m_skipLeadingNewLineForListing = false;
- if (cc == '\n') {
- if (m_state == HTMLTokenizerState::DataState)
- HTML_ADVANCE_TO(DataState);
- if (m_state == HTMLTokenizerState::RCDATAState)
- HTML_ADVANCE_TO(RCDATAState);
- // When parsing text/plain documents, we run the tokenizer in the
- // PLAINTEXTState and ignore m_skipLeadingNewLineForListing.
- ASSERT(m_state == HTMLTokenizerState::PLAINTEXTState);
- }
- }
-
// Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
switch (m_state) {
HTML_BEGIN_STATE(DataState) {
Modified: trunk/Source/WebCore/html/parser/HTMLTokenizer.h (103101 => 103102)
--- trunk/Source/WebCore/html/parser/HTMLTokenizer.h 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/html/parser/HTMLTokenizer.h 2011-12-16 21:44:45 UTC (rev 103102)
@@ -146,10 +146,6 @@
//
void updateStateFor(const AtomicString& tagName, Frame*);
- // Hack to skip leading newline in <pre>/<listing> for authoring ease.
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
- void setSkipLeadingNewLineForListing(bool value) { m_skipLeadingNewLineForListing = value; }
-
bool forceNullCharacterReplacement() const { return m_forceNullCharacterReplacement; }
void setForceNullCharacterReplacement(bool value) { m_forceNullCharacterReplacement = value; }
Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp (103101 => 103102)
--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.cpp 2011-12-16 21:44:45 UTC (rev 103102)
@@ -268,6 +268,13 @@
bool isEmpty() const { return m_current == m_end; }
+ void skipAtMostOneLeadingNewline()
+ {
+ ASSERT(!isEmpty());
+ if (*m_current == '\n')
+ ++m_current;
+ }
+
void skipLeadingWhitespace()
{
skipLeading<isHTMLSpace>();
@@ -349,6 +356,7 @@
, m_isPaused(false)
, m_insertionMode(InitialMode)
, m_originalInsertionMode(InitialMode)
+ , m_shouldSkipLeadingNewline(false)
, m_parser(parser)
, m_scriptToProcessStartPosition(uninitializedPositionValue1())
, m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
@@ -367,6 +375,7 @@
, m_isPaused(false)
, m_insertionMode(InitialMode)
, m_originalInsertionMode(InitialMode)
+ , m_shouldSkipLeadingNewline(false)
, m_parser(parser)
, m_scriptToProcessStartPosition(uninitializedPositionValue1())
, m_lastScriptElementStartPosition(TextPosition::belowRangePosition())
@@ -476,21 +485,26 @@
ASSERT_NOT_REACHED();
break;
case HTMLTokenTypes::DOCTYPE:
+ m_shouldSkipLeadingNewline = false;
processDoctypeToken(token);
break;
case HTMLTokenTypes::StartTag:
+ m_shouldSkipLeadingNewline = false;
processStartTag(token);
break;
case HTMLTokenTypes::EndTag:
+ m_shouldSkipLeadingNewline = false;
processEndTag(token);
break;
case HTMLTokenTypes::Comment:
+ m_shouldSkipLeadingNewline = false;
processComment(token);
return;
case HTMLTokenTypes::Character:
processCharacter(token);
break;
case HTMLTokenTypes::EndOfFile:
+ m_shouldSkipLeadingNewline = false;
processEndOfFile(token);
break;
}
@@ -810,7 +824,7 @@
if (token.name() == preTag || token.name() == listingTag) {
processFakePEndTagIfPInButtonScope();
m_tree.insertHTMLElement(token);
- m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+ m_shouldSkipLeadingNewline = true;
m_framesetOk = false;
return;
}
@@ -937,7 +951,7 @@
}
if (token.name() == textareaTag) {
m_tree.insertHTMLElement(token);
- m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+ m_shouldSkipLeadingNewline = true;
m_parser->tokenizer()->setState(HTMLTokenizerState::RCDATAState);
m_originalInsertionMode = m_insertionMode;
m_framesetOk = false;
@@ -2272,6 +2286,24 @@
void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
{
ReprocessBuffer:
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
+ // Note that this logic is different than the generic \r\n collapsing
+ // handled in the input stream preprocessor. This logic is here as an
+ // "authoring convenience" so folks can write:
+ //
+ // <pre>
+ // lorem ipsum
+ // lorem ipsum
+ // </pre>
+ //
+ // without getting an extra newline at the start of their <pre> element.
+ if (m_shouldSkipLeadingNewline) {
+ m_shouldSkipLeadingNewline = false;
+ buffer.skipAtMostOneLeadingNewline();
+ if (buffer.isEmpty())
+ return;
+ }
+
switch (insertionMode()) {
case InitialMode: {
ASSERT(insertionMode() == InitialMode);
Modified: trunk/Source/WebCore/html/parser/HTMLTreeBuilder.h (103101 => 103102)
--- trunk/Source/WebCore/html/parser/HTMLTreeBuilder.h 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/html/parser/HTMLTreeBuilder.h 2011-12-16 21:44:45 UTC (rev 103102)
@@ -83,6 +83,8 @@
// Done, close any open tags, etc.
void finished();
+ void setShouldSkipLeadingNewline(bool shouldSkip) { m_shouldSkipLeadingNewline = shouldSkip; }
+
static bool scriptEnabled(Frame*);
static bool pluginsEnabled(Frame*);
@@ -239,6 +241,8 @@
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
StringBuilder m_pendingTableCharacters;
+ bool m_shouldSkipLeadingNewline;
+
// We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
// from within parser actions. We also need it to track the current position.
HTMLDocumentParser* m_parser;
Modified: trunk/Source/WebCore/html/parser/TextDocumentParser.cpp (103101 => 103102)
--- trunk/Source/WebCore/html/parser/TextDocumentParser.cpp 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/html/parser/TextDocumentParser.cpp 2011-12-16 21:44:45 UTC (rev 103102)
@@ -66,6 +66,10 @@
AtomicHTMLToken fakePre(HTMLTokenTypes::StartTag, preTag.localName(), attributes.release());
treeBuilder()->constructTreeFromAtomicToken(fakePre);
+ // Normally we would skip the first \n after a <pre> element, but we don't
+ // want to skip the first \n for text documents!
+ treeBuilder()->setShouldSkipLeadingNewline(false);
+
m_haveInsertedFakePreElement = true;
}
Modified: trunk/Source/WebCore/xml/parser/MarkupTokenizerBase.h (103101 => 103102)
--- trunk/Source/WebCore/xml/parser/MarkupTokenizerBase.h 2011-12-16 21:33:59 UTC (rev 103101)
+++ trunk/Source/WebCore/xml/parser/MarkupTokenizerBase.h 2011-12-16 21:44:45 UTC (rev 103102)
@@ -197,7 +197,6 @@
Token* m_token;
int m_lineNumber;
- bool m_skipLeadingNewLineForListing;
bool m_forceNullCharacterReplacement;
// http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character