Subramanya Sastry has uploaded a new change for review.
https://gerrit.wikimedia.org/r/68115
Change subject: Use SelfclosingTagTk for mw-quote, url-link, behavior-switch WT
......................................................................
Use SelfclosingTagTk for mw-quote, url-link, behavior-switch WT
* url-link and behavior-switch are self-contained and so
SelfclosingTagTk make more sense for them anyway. ext-links
and wiki-links already use SelfclosingTagTk and so using TagTk
for url-links seems to have been an oversight.
* mw-quotes are special in that there is no way to know whether
a mw-quote seen in the tokenizer will be a opening tag, closing
tag, or be split into multiple tags. So, SelfclosingTagTk for
each individual quote sequence does make more sense.
This change has the beneficial side-effect of eliminating incorrect
nowiki-escaping in tranclusion-arg wikitext. That escaping code
relies on SelfclosingTagTks and well-balanced TagTk, EndTagTk pairs
and escapes wikitext chars in unclosed segments.
This was a problem for quotes since something like
{{echo|''[[Foo]]''}} would RT to {{echo|<nowiki>''[[Foo]]''</nowiki>}}
since the two quote sequences were tokenized to TagTks
(and thus, unabalanced).
* One additional html2html failure. The failure is legitimate and
was being hidden by the wikitext escaping code in the serializer
which assumed that the tokenizer generated SelfclosingTagTk for
url-links (which it did not till now).
Change-Id: Ib573c1347ad279425577a42845e0226e7b36a06c
---
M js/lib/ext.core.QuoteTransformer.js
M js/lib/mediawiki.WikitextSerializer.js
M js/lib/pegTokenizer.pegjs.txt
M js/tests/parserTests-blacklist.js
4 files changed, 7 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid
refs/changes/15/68115/1
diff --git a/js/lib/ext.core.QuoteTransformer.js
b/js/lib/ext.core.QuoteTransformer.js
index 307c60a..a491df4 100644
--- a/js/lib/ext.core.QuoteTransformer.js
+++ b/js/lib/ext.core.QuoteTransformer.js
@@ -8,6 +8,7 @@
// define some constructor shortcuts
var NlTk = defines.NlTk,
TagTk = defines.TagTk,
+ SelfclosingTagTk = defines.SelfclosingTagTk,
EndTagTk = defines.EndTagTk;
function QuoteTransformer ( dispatcher ) {
@@ -200,7 +201,8 @@
}
}
} else if ( ( ctxPrevToken.constructor === NlTk
||
-
ctxPrevToken.constructor === TagTk ) &&
+
ctxPrevToken.constructor === TagTk ||
+
ctxPrevToken.constructor === SelfclosingTagTk ) &&
firstmultiletterword === -1 ) {
// This is an approximation, as the
original doQuotes
// operates on the source and just
looks at space vs.
diff --git a/js/lib/mediawiki.WikitextSerializer.js
b/js/lib/mediawiki.WikitextSerializer.js
index 17698f8..790f36c 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -213,7 +213,6 @@
var tc = t.constructor;
if (tc === pd.SelfclosingTagTk) {
-
// Ignore extlink tokens without valid urls
if (t.name === 'extlink' &&
!this.urlParser.tokenizeURL(t.getAttribute("href"))) {
continue;
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index a22c629..43fac4e 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -756,7 +756,7 @@
= '__' behavior:behavior_text '__'
{
return [
- new TagTk(
+ new SelfclosingTagTk(
'behavior-switch',
[new KV('word', '__' + behavior + '__')],
{
@@ -781,7 +781,7 @@
urllink
= target:url {
- return [ new TagTk( 'urllink', [new KV('href', target)], { tsr: [pos0,
pos] } ) ];
+ return [ new SelfclosingTagTk( 'urllink', [new KV('href', target)], {
tsr: [pos0, pos] } ) ];
}
extlink
@@ -1194,7 +1194,7 @@
* bolds/italics and MediaWiki's special heuristics for apostrophes, which are
* all not context free. */
quote = "''" x:"'"* {
- var res = new TagTk( 'mw-quote', [], { tsr: [pos0, pos] } ); // Will be
consumed in token transforms
+ var res = new SelfclosingTagTk( 'mw-quote', [], { tsr: [pos0, pos] } ); //
Will be consumed in token transforms
res.value = "''" + x.join('');
return res;
}
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index f7a640b..97fd9a2 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -655,6 +655,7 @@
add("html2html", "Bug 4781, 5267: %28, %29 in bracketed URL");
add("html2html", "External link containing double-single-quotes with no space
separating the url from text in italics");
add("html2html", "Brackets in urls");
+add("html2html", "IPv6 urls (bug 21261)");
add("html2html", "Unclosed and unmatched quotes (parsoid)");
add("html2html", "Table security: embedded pipes
(http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)");
add("html2html", "Indented table markup mixed with indented pre content
(proposed in bug 6200)");
--
To view, visit https://gerrit.wikimedia.org/r/68115
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib573c1347ad279425577a42845e0226e7b36a06c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits