jenkins-bot has submitted this change and it was merged.
Change subject: Fixes PreHandler nested <pre> problem.
......................................................................
Fixes PreHandler nested <pre> problem.
When running the tests with just --html2html, a bunch of unexpected
passed show up. Upon investigation, it seems the tokenTransformers per
key array contained duplicates (ex. 2.95 occurred twice), causing extra
newlines in the output, and such.
The source of the duplicates is the PreHandler. It seems confused by
wikitext like `<pre>\n string\n</pre>` where a space before string would
normally indicate indentation and kick off pre wrapping. This is
undesirable because we're already inside a pre. It ends up emitting nested
<pre>s. Once that happens, the ParagraphWrapper gets all out of sorts
and tries to add transformers out of order. In the test suite, this starts
at "HTML-pre: 2: indented text".
The change asserts uniqueness in the array to avoid this situation again.
The blacklist was rewritten with a bunch test fixes. Microdata
selser tests generate differently as a result of the changes but continue
to fail.
Bug: 52046
Change-Id: I8dd9b197be92de1cab4408a14f1dad302f4b9944
---
M js/lib/ext.core.PreHandler.js
M js/lib/mediawiki.TokenTransformManager.js
M js/lib/pegTokenizer.pegjs.txt
M js/tests/parserTests-blacklist.js
4 files changed, 39 insertions(+), 60 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/ext.core.PreHandler.js b/js/lib/ext.core.PreHandler.js
index 72e2296..2b7e5df 100644
--- a/js/lib/ext.core.PreHandler.js
+++ b/js/lib/ext.core.PreHandler.js
@@ -28,7 +28,7 @@
+ --------------+-----------------+---------------+--------------------------+
| SOL | --- nl --> | SOL | purge |
| SOL | --- eof --> | SOL | purge |
- | SOL | --- ws --> | PRE | save whitespace token(##)|
+ | SOL | --- ws --> | PRE|SOL | save ws token|purge(#,##)|
| SOL | --- sol-tr --> | SOL | TOKS << tok |
| SOL | --- other --> | IGNORE | purge |
+ --------------+-----------------+---------------+--------------------------+
@@ -55,6 +55,10 @@
| IGNORE | --- eof --> | SOL | purge |
+ --------------+-----------------+---------------+--------------------------+
+ # We're being careful to avoid a situation where we generate a pre when we're
+ already inside a pre. If we've seen an open pre tag (marked as inPre), stay
+ in SOL and purge. Otherwise, save the whitespace token and transition to
PRE.
+
## In these states, check if the whitespace token is a single space or has
additional chars (white-space or non-whitespace) -- if yes, slice it off
and pass it through the FSM
@@ -71,6 +75,10 @@
EndTagTk = defines.EndTagTk;
var init; // forward declaration.
+
+function isPre( token, tag ) {
+ return token.constructor === tag && token.isHTMLTag() &&
token.name.toUpperCase() === "PRE";
+}
// Constructor
function PreHandler( manager, options ) {
@@ -252,6 +260,8 @@
};
PreHandler.prototype.onEnd = function (token, manager, cb) {
+ this.inPre = false;
+
if (this.state !== PreHandler.STATE_IGNORE) {
console.error("!ERROR! Not IGNORE! Cannot get here: " +
this.state + "; " + JSON.stringify(token));
init(this, false);
@@ -279,6 +289,12 @@
PreHandler.prototype.onAny = function ( token, manager, cb ) {
+ if ( isPre( token, TagTk ) ) {
+ this.inPre = true;
+ } else if ( isPre( token, EndTagTk ) ) {
+ this.inPre = false;
+ }
+
if (this.trace) {
if (this.debug) { console.warn("----------"); }
console.warn("T:pre:any: " + PreHandler.STATE_STR[this.state] +
" : " + JSON.stringify(token));
@@ -305,11 +321,12 @@
}
// reset for next use of this pipeline!
+ this.inPre = false;
init(this, false);
} else {
switch (this.state) {
case PreHandler.STATE_SOL:
- if ((tc === String) && token.match(/^\s/)) {
+ if ((tc === String) && token.match(/^\s/) &&
!this.inPre) {
ret = this.tokens;
this.tokens = [];
this.preWSToken = token[0];
diff --git a/js/lib/mediawiki.TokenTransformManager.js
b/js/lib/mediawiki.TokenTransformManager.js
index 5690f6f..d27303b 100644
--- a/js/lib/mediawiki.TokenTransformManager.js
+++ b/js/lib/mediawiki.TokenTransformManager.js
@@ -169,6 +169,12 @@
if (!tArray) {
tArray = this.tokenTransformers[key] = [];
}
+
+ // assure no duplicate transformers
+ console.assert( tArray.every(function ( tr ) {
+ return tr.rank !== t.rank;
+ }) );
+
tArray.push(t);
tArray.sort(this._cmpTransformations);
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index e0f59d4..57a09c3 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -1371,7 +1371,7 @@
stops.dec('pre');
// return nowiki tags as well?
return [ new TagTk( 'pre', attribs, { stx: 'html', tsr: [pos0, endpos]
} ) ]
- .concat(flatten_stringlist(ts), [ new EndTagTk( 'pre', [],
{ tsr: [pos - 6, pos] } ) ]);
+ .concat(flatten_stringlist(ts), [ new EndTagTk( 'pre', [],
{ stx: 'html', tsr: [pos - 6, pos] } ) ]);
}
/ "</" pre_tag_name ">" { stops.dec('pre'); return "</pre>"; }
/ & { return stops.dec('pre'); }
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index eb19782..a969afb 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -567,7 +567,6 @@
add("wt2wt", "5. White-space in indent-pre\nNOTE: the white-space char on 2nd
line is significant");
add("wt2wt", "6. Pre-blocks should extend across lines with leading WS even
when there is no wrappable content");
add("wt2wt", "HTML-pre: 1. embedded newlines");
-add("wt2wt", "HTML-pre: 3: other wikitext");
add("wt2wt", "Definition lists: self-closed tag");
add("wt2wt", "External links: open square bracket forbidden in URL (named)
(bug 4377)");
add("wt2wt", "External links: Clickable images");
@@ -668,8 +667,6 @@
add("wt2wt", "Transclusion of nonexistent MediaWiki message");
add("wt2wt", "Transclusion of MediaWiki message with underscore");
add("wt2wt", "Transclusion of MediaWiki message with space");
-add("wt2wt", "Section extraction, <pre> around bogus header (bug 10309)");
-add("wt2wt", "Section replacement, <pre> around bogus header (bug 10309)");
add("wt2wt", "5 quotes, code coverage +1 line (parsoid)");
add("wt2wt", "Say the magic word");
add("wt2wt", "ISBN code coverage");
@@ -1026,16 +1023,10 @@
add("html2html", "CSS line continuation 2");
add("html2html", "Expansion of multi-line templates in attribute values (bug
6255 sanity check 2)");
add("html2html", "Parser hook: empty input");
-add("html2html", "Parser hook: empty input using terminated empty elements");
-add("html2html", "Parser hook: empty input using terminated empty elements
(space before)");
-add("html2html", "Parser hook: basic input");
-add("html2html", "Parser hook: case insensitive");
-add("html2html", "Parser hook: case insensitive, redux");
add("html2html", "Parser hook: nested tags");
add("html2html", "Parser hook: basic arguments");
add("html2html", "Parser hook: argument containing a forward slash (bug
5344)");
add("html2html", "Parser hook: empty input using terminated empty elements
(bug 2374)");
-add("html2html", "Parser hook: basic arguments using terminated empty elements
(bug 2374)");
add("html2html", "Parser hook: static parser hook inside a comment");
add("html2html", "Sanitizer: Closing of open tags");
add("html2html", "Sanitizer: Escaping of spaces, multibyte characters, colons
& other stuff in id=\"\"");
@@ -2538,17 +2529,6 @@
add("selser", "HTML-pre: 1. embedded newlines [0,4,0,2,0,0,1]");
add("selser", "HTML-pre: 1. embedded newlines [0,0,[2],3,0,4,4]");
add("selser", "HTML-pre: 1. embedded newlines [0,3,[4],3,0,3,[2]]");
-add("selser", "HTML-pre: 3: other wikitext [2,2]");
-add("selser", "HTML-pre: 3: other wikitext [2,0]");
-add("selser", "HTML-pre: 3: other wikitext [0,2]");
-add("selser", "HTML-pre: 3: other wikitext [[2],4]");
-add("selser", "HTML-pre: 3: other wikitext [[4],4]");
-add("selser", "HTML-pre: 3: other wikitext [[4],3]");
-add("selser", "HTML-pre: 3: other wikitext [0,4]");
-add("selser", "HTML-pre: 3: other wikitext [[4],0]");
-add("selser", "HTML-pre: 3: other wikitext [[4],2]");
-add("selser", "HTML-pre: 3: other wikitext [[3],0]");
-add("selser", "HTML-pre: 3: other wikitext [0,3]");
add("selser", "Simple definition [[[3,2],3]]");
add("selser", "Definition list with URL link [[[4,2,2],4]]");
add("selser", "Definition list with URL link [[1,0]]");
@@ -3409,21 +3389,21 @@
add("selser", "Sanitizer: Validating the contents of the id attribute (bug
4515) [1]");
add("selser", "Sanitizer: Validating the contents of the id attribute (bug
4515) [2]");
add("selser", "Sanitizer: Validating the contents of the id attribute (bug
4515) [[2]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,1,4,[0,0,0,4,0,0],0,[3],0]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[2,0,2,[0,0,0,0,0,2],0,0,4]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,1,4,[0,0,0,4,0,0],0]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[2,0,2,[0,0,0,0,0,2],0]]");
add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [2]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,1,4,[0,0,0,0,0,2],2,[2],3]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[4,1,0,[0,2,0,0,0,0],2,4,3]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[4,[3],0,0,3,4,4]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,0,4,[0,0,0,2,0,3],4,[4],0]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[4,1,0,[0,0,0,0,0,4],3,0,0]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,3,4,[0,2,0,0,0,0],4,1,4]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,[2],0,0,0,[4],4]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,0,0,2,3,[4],0]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,1,4,[0,0,0,0,0,2],2]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[4,1,0,[0,2,0,0,0,0],2]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[4,[3],0,0,3]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,0,4,[0,0,0,2,0,3],4]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[4,1,0,[0,0,0,0,0,4],3]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,3,4,[0,2,0,0,0,0],4]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,[2],0,0,0]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[0,0,0,2,3]]");
add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [1]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[2,2,0,2,0,4,0]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[2,4,0,2,3,0,0]]");
-add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[3,0,4,2,0,4,3]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[2,2,0,2,0]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[2,4,0,2,3]]");
+add("selser", "Sanitizer: Validating that <meta> and <link> work, but only for
Microdata [[3,0,4,2,0]]");
add("selser", "Fuzz testing: Parser13 [2]");
add("selser", "Fuzz testing: Parser13 [1]");
add("selser", "Fuzz testing: Parser13 [[2,2]]");
@@ -3525,30 +3505,6 @@
add("selser", "Transclusion of MediaWiki message with space [[3]]");
add("selser", "Transclusion of MediaWiki message with space [[4]]");
add("selser", "Transclusion of MediaWiki message with space [[2]]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[0,0,0,4,[3],0,[4],3,0]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[2,0,[2],0,[1],0,0,2,2]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[2,3,[4],2,1,4,4,0,1]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[4,3,2,0,3,2,3,3,2]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[3,0,0,0,[4],0,[3],0,2]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[[3],0,2,4,[3],2,0,0,2]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[1,3,2,0,1,3,4,4,3]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[1,0,0,3,[2],2,1,3,1]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[4,0,[3],2,4,0,0,0,[2]]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[0,0,[3],4,1,0,2,0,0]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[1,3,0,0,4,0,2,2,4]");
-add("selser", "Section extraction, <pre> around bogus header (bug 10309)
[[2],2,[3],3,0,0,2,4,0]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[1,2,0,4,0,0,[2],0,3]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,0,2,0,3,3,1,2,0]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[2,3,[3],0,[1],0,[4],4,[4]]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,0,2,3,2,4,4,3,3]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,3,2,4,0,0,0,2,0]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,2,0,0,2,4,[2],0,4]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[[3],4,[2],0,[2],0,4,2,[2]]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,0,2,0,0,0,4,0,2]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,3,0,3,3,2,4,4,4]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[[3],0,[4],0,[2],3,1,3,0]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,3,[3],0,4,0,0,2,4]");
-add("selser", "Section replacement, <pre> around bogus header (bug 10309)
[0,3,[4],0,0,0,[4],2,4]");
add("selser", "Handling of 
 in URLs [[[2]]]");
add("selser", "Handling of 
 in URLs [[[[2]]]]");
add("selser", "Handling of 
 in URLs [[[1]]]");
--
To view, visit https://gerrit.wikimedia.org/r/78195
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8dd9b197be92de1cab4408a14f1dad302f4b9944
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits