Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/68115


Change subject: Use SelfclosingTagTk for mw-quote, url-link, behavior-switch WT
......................................................................

Use SelfclosingTagTk for mw-quote, url-link, behavior-switch WT

* url-link and behavior-switch are self-contained and so
  SelfclosingTagTk make more sense for them anyway.  ext-links
  and wiki-links already use SelfclosingTagTk and so using TagTk
  for url-links seems to have been an oversight.

* mw-quotes are special in that there is no way to know whether
  a mw-quote seen in the tokenizer will be a opening tag, closing
  tag, or be split into multiple tags.  So, SelfclosingTagTk for
  each individual quote sequence does make more sense.

  This change has the beneficial side-effect of eliminating incorrect
  nowiki-escaping in tranclusion-arg wikitext.  That escaping code
  relies on SelfclosingTagTks and well-balanced TagTk, EndTagTk pairs
  and escapes wikitext chars in unclosed segments.

  This was a problem for quotes since something like
  {{echo|''[[Foo]]''}} would RT to {{echo|<nowiki>''[[Foo]]''</nowiki>}}
  since the two quote sequences were tokenized to TagTks
  (and thus, unabalanced).

* One additional html2html failure.  The failure is legitimate and
  was being hidden by the wikitext escaping code in the serializer
  which assumed that the tokenizer generated SelfclosingTagTk for
  url-links (which it did not till now).

Change-Id: Ib573c1347ad279425577a42845e0226e7b36a06c
---
M js/lib/ext.core.QuoteTransformer.js
M js/lib/mediawiki.WikitextSerializer.js
M js/lib/pegTokenizer.pegjs.txt
M js/tests/parserTests-blacklist.js
4 files changed, 7 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/15/68115/1

diff --git a/js/lib/ext.core.QuoteTransformer.js 
b/js/lib/ext.core.QuoteTransformer.js
index 307c60a..a491df4 100644
--- a/js/lib/ext.core.QuoteTransformer.js
+++ b/js/lib/ext.core.QuoteTransformer.js
@@ -8,6 +8,7 @@
 // define some constructor shortcuts
 var NlTk = defines.NlTk,
     TagTk = defines.TagTk,
+    SelfclosingTagTk = defines.SelfclosingTagTk,
     EndTagTk = defines.EndTagTk;
 
 function QuoteTransformer ( dispatcher ) {
@@ -200,7 +201,8 @@
                                                }
                                        }
                                } else if ( ( ctxPrevToken.constructor === NlTk 
||
-                                                               
ctxPrevToken.constructor === TagTk ) &&
+                                                               
ctxPrevToken.constructor === TagTk ||
+                                                               
ctxPrevToken.constructor === SelfclosingTagTk ) &&
                                                                
firstmultiletterword === -1 ) {
                                        // This is an approximation, as the 
original doQuotes
                                        // operates on the source and just 
looks at space vs.
diff --git a/js/lib/mediawiki.WikitextSerializer.js 
b/js/lib/mediawiki.WikitextSerializer.js
index 17698f8..790f36c 100644
--- a/js/lib/mediawiki.WikitextSerializer.js
+++ b/js/lib/mediawiki.WikitextSerializer.js
@@ -213,7 +213,6 @@
 
                var tc = t.constructor;
                if (tc === pd.SelfclosingTagTk) {
-
                        // Ignore extlink tokens without valid urls
                        if (t.name === 'extlink' && 
!this.urlParser.tokenizeURL(t.getAttribute("href"))) {
                                continue;
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index a22c629..43fac4e 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -756,7 +756,7 @@
   = '__' behavior:behavior_text '__'
 {
     return [
-        new TagTk(
+        new SelfclosingTagTk(
             'behavior-switch',
             [new KV('word', '__' + behavior + '__')],
             {
@@ -781,7 +781,7 @@
 
 urllink
   = target:url {
-      return [ new TagTk( 'urllink', [new KV('href', target)], { tsr: [pos0, 
pos] } ) ];
+      return [ new SelfclosingTagTk( 'urllink', [new KV('href', target)], { 
tsr: [pos0, pos] } ) ];
   }
 
 extlink
@@ -1194,7 +1194,7 @@
  * bolds/italics and MediaWiki's special heuristics for apostrophes, which are
  * all not context free. */
 quote = "''" x:"'"* {
-    var res = new TagTk( 'mw-quote', [], { tsr: [pos0, pos] } ); // Will be 
consumed in token transforms
+    var res = new SelfclosingTagTk( 'mw-quote', [], { tsr: [pos0, pos] } ); // 
Will be consumed in token transforms
     res.value = "''" + x.join('');
     return res;
 }
diff --git a/js/tests/parserTests-blacklist.js 
b/js/tests/parserTests-blacklist.js
index f7a640b..97fd9a2 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -655,6 +655,7 @@
 add("html2html", "Bug 4781, 5267: %28, %29 in bracketed URL");
 add("html2html", "External link containing double-single-quotes with no space 
separating the url from text in italics");
 add("html2html", "Brackets in urls");
+add("html2html", "IPv6 urls (bug 21261)");
 add("html2html", "Unclosed and unmatched quotes (parsoid)");
 add("html2html", "Table security: embedded pipes 
(http://lists.wikimedia.org/mailman/htdig/wikitech-l/2006-April/022293.html)");
 add("html2html", "Indented table markup mixed with indented pre content 
(proposed in bug 6200)");

-- 
To view, visit https://gerrit.wikimedia.org/r/68115
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib573c1347ad279425577a42845e0226e7b36a06c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to