Arlolra has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/406126 )

Change subject: Consolidate bailing out from ext/wiki links
......................................................................

Consolidate bailing out from ext/wiki links

So that extlinks get the same expanded attribute treatment.

Fixes the regression in,
http://localhost:8000/ar.wikipedia.org/v3/page/html/%D8%B0%D8%B1%D8%A9_%D8%A7%D9%84%D9%87%D9%8A%D8%AF%D8%B1%D9%88%D8%AC%D9%8A%D9%86/25945469

Follows up to 217c68a by adding typeof "mw:ExpandedAttrs" to the nodes
where xmlish attributes contain extension tags for expanding.

Change-Id: I188f1c903619320052b0502ba13c7b3be7040e6f
---
M lib/wt2html/tt/AttributeExpander.js
M lib/wt2html/tt/LinkHandler.js
M tests/parserTests.txt
3 files changed, 75 insertions(+), 102 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/26/406126/1

diff --git a/lib/wt2html/tt/AttributeExpander.js 
b/lib/wt2html/tt/AttributeExpander.js
index 59d88e8..bb7a751 100644
--- a/lib/wt2html/tt/AttributeExpander.js
+++ b/lib/wt2html/tt/AttributeExpander.js
@@ -124,7 +124,6 @@
  * ---------------------------------------------------------- */
 function stripMetaTags(env, tokens, wrapTemplates) {
        var buf = [];
-       var isPushed = false;
        var hasGeneratedContent = false;
 
        for (var i = 0, l = tokens.length; i < l; i++) {
@@ -141,10 +140,12 @@
                                        return prev + next.textContent;
                                }, '');
                                buf.push(str);
+                               hasGeneratedContent = true;
+                               continue;
                                // TODO: Maybe cleanup the remaining about 
sibbling wrappers
                                // but the sanitizer will drop them anyways
                        }
-                       isPushed = false;
+
                        if (wrapTemplates) {
                                // Strip all meta tags.
                                var type = t.getAttribute("typeof");
@@ -154,12 +155,12 @@
                                                hasGeneratedContent = true;
                                        }
                                } else {
-                                       isPushed = true;
                                        buf.push(t);
+                                       continue;
                                }
                        }
 
-                       if (!isPushed && t.name !== "meta") {
+                       if (t.name !== "meta") {
                                // Dont strip token if it is not a meta-tag
                                buf.push(t);
                        }
diff --git a/lib/wt2html/tt/LinkHandler.js b/lib/wt2html/tt/LinkHandler.js
index 7397b3c..a9aa56c 100644
--- a/lib/wt2html/tt/LinkHandler.js
+++ b/lib/wt2html/tt/LinkHandler.js
@@ -210,13 +210,71 @@
        });
 };
 
+var bailTokens = function(env, token, isExtLink) {
+       var count = isExtLink ? 1 : 2;
+       var tokens = ["[".repeat(count)];
+
+       if (/mw:ExpandedAttrs/.test(token.getAttribute("typeof"))) {
+               var dataMW = JSON.parse(token.getAttribute("data-mw")).attribs;
+               var html;
+               for (var i = 0; i < dataMW.length; i++) {
+                       if (dataMW[i][0].txt === "href") {
+                               html = dataMW[i][1].html;
+                               break;
+                       }
+               }
+
+               // Since we are splicing off '['s and ']'s from the incoming 
token,
+               // adjust TSR of the DOM-fragment by `count` each on both end.
+               var tsr = token.dataAttribs && token.dataAttribs.tsr;
+               if (tsr && typeof (tsr[0]) === 'number' && typeof (tsr[1]) === 
'number') {
+                       tsr = [tsr[0] + count, tsr[1] - count];
+               } else {
+                       tsr = null;
+               }
+
+               var body = DU.ppToDOM(html);
+               var dft = DU.buildDOMFragmentTokens(env, token, body, null, {
+                       noPWrapping: true,
+                       tsr: tsr,
+               });
+
+               tokens = tokens.concat(dft);
+       } else {
+               // FIXME: Duplicate work
+               tokens = tokens.concat(token.getAttribute("href"));
+       }
+
+       // Append rest of the attributes
+       if (isExtLink) {
+               var content = Util.lookup(token.attribs, 'mw:content');
+
+               // FIXME: Use this attribute in regular extline
+               // cases to rt spaces correctly maybe?  Unsure
+               // it is worth it.
+               var spaces = token.getAttribute('spaces') || '';
+
+               tokens = tokens.concat(spaces, content);
+       } else {
+               token.attribs.forEach(function(a) {
+                       if (a.k === "mw:maybeContent") {
+                               tokens = tokens.concat("|", a.v);
+                       }
+               });
+       }
+
+       tokens.push("]".repeat(count));
+
+       return tokens;
+};
+
 /**
  * Handle a mw:WikiLink token.
  */
 WikiLinkHandler.prototype.onWikiLink = function(token, frame, cb) {
        var env = this.manager.env;
        var hrefKV = Util.lookupKV(token.attribs, 'href');
-       var target, tokens, tsr;
+       var target;
 
        try {
                target = this.getWikiLinkTargetInfo(token, hrefKV);
@@ -226,47 +284,7 @@
        }
 
        if (!target) {
-               tokens = ["[["];
-               if (/mw:ExpandedAttrs/.test(token.getAttribute("typeof"))) {
-                       var dataMW = 
JSON.parse(token.getAttribute("data-mw")).attribs;
-                       var html;
-                       for (var i = 0; i < dataMW.length; i++) {
-                               if (dataMW[i][0].txt === "href") {
-                                       html = dataMW[i][1].html;
-                                       break;
-                               }
-                       }
-
-                       // Since we are splicing off '[[' and ']]' from the 
incoming token,
-                       // adjust TSR of the DOM-fragment by 2 each on both end.
-                       tsr = token.dataAttribs && token.dataAttribs.tsr;
-                       if (tsr && typeof (tsr[0]) === 'number' && typeof 
(tsr[1]) === 'number') {
-                               tsr = [tsr[0] + 2, tsr[1] - 2];
-                       } else {
-                               tsr = null;
-                       }
-
-                       var body = DU.ppToDOM(html);
-                       var dft = DU.buildDOMFragmentTokens(env, token, body, 
null, {
-                               noPWrapping: true,
-                               tsr: tsr,
-                       });
-
-                       tokens = tokens.concat(dft);
-               } else {
-                       // FIXME: Duplicate work
-                       tokens = tokens.concat(token.getAttribute("href"));
-               }
-
-               // Append rest of the attributes
-               token.attribs.forEach(function(a) {
-                       if (a.k === "mw:maybeContent") {
-                               tokens = tokens.concat("|", a.v);
-                       }
-               });
-
-               tokens.push("]]");
-               cb({ tokens: tokens });
+               cb({ tokens: bailTokens(env, token, false) });
                return;
        }
 
@@ -293,13 +311,13 @@
 
                var extToks = this.urlParser.tokenizeExtlink(src);
                if (extToks) {
-                       tsr = token.dataAttribs && token.dataAttribs.tsr;
+                       var tsr = token.dataAttribs && token.dataAttribs.tsr;
                        Util.shiftTokenTSR(extToks, 1 + (tsr ? tsr[0] : 0));
                } else {
                        extToks = src;
                }
 
-               tokens = ['['].concat(extToks, ']');
+               var tokens = ['['].concat(extToks, ']');
                tokens.rank = this.rank - 0.002;  // Magic rank, since extlink 
is -0.001
                cb({ tokens: tokens });
                return;
@@ -308,7 +326,6 @@
        // Ok, it looks like we have a sane href. Figure out which handler to 
use.
        var isRedirect = !!token.getAttribute('redirect');
        var handler = this.getWikiLinkHandler(token, target, isRedirect);
-       // and call it.
        handler(token, frame, cb, target);
 };
 
@@ -2137,51 +2154,7 @@
                });
        } else {
                // Not a link, convert href to plain text.
-               var tokens = ['['];
-               var closingTok = null;
-               var spaces = token.getAttribute('spaces') || '';
-
-               if ((token.getAttribute("typeof") || 
"").match(/mw:ExpandedAttrs/)) {
-                       // The token 'non-url' came from a template.
-                       // Introduce a span and capture the original source for 
RT purposes.
-                       var da = token.dataAttribs;
-                       // targetOff covers all spaces before content
-                       // and we need src without those spaces.
-                       var tsr0b = da.tsr[0] + 1;
-                       var tsr1b = da.targetOff - spaces.length;
-                       var span = Util.placeholder({}, {  // {} !== null
-                               tsr: [tsr0b, tsr1b],
-                               src: env.page.src.substring(tsr0b, tsr1b),
-                       })[0];  // We only want the first token.
-                       tokens.push(span);
-                       closingTok = new EndTagTk('span');
-               }
-
-               var hrefText = token.getAttribute("href");
-               if (Array.isArray(hrefText)) {
-                       tokens = tokens.concat(hrefText);
-               } else {
-                       tokens.push(hrefText);
-               }
-
-               if (closingTok) {
-                       tokens.push(closingTok);
-               }
-
-               // FIXME: Use this attribute in regular extline
-               // cases to rt spaces correctly maybe?  Unsure
-               // it is worth it.
-               if (spaces) {
-                       tokens.push(spaces);
-               }
-
-               if (content.length) {
-                       tokens = tokens.concat(content);
-               }
-
-               tokens.push(']');
-
-               cb({ tokens: tokens });
+               cb({ tokens: bailTokens(env, token, true) });
        }
 };
 
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index fd05393..7d87a78 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -5888,12 +5888,12 @@
 [foo <i>bar</i>]
 [fool's] errand
 [fool's errand]
-[<span typeof="mw:Placeholder" 
data-parsoid='{"src":"{{echo|foo}}"}'>foo</span>]
-[<span typeof="mw:Placeholder" 
data-parsoid='{"src":"{{echo|foo}}"}'>foo</span> bar]
-[<span typeof="mw:Placeholder" 
data-parsoid='{"src":"{{echo|foo}}"}'>foo</span> <i>bar</i>]
-[<span typeof="mw:Placeholder" 
data-parsoid='{"src":"{{echo|foo}}l&#39;s"}'>fool's</span>] errand
-[<span typeof="mw:Placeholder" 
data-parsoid='{"src":"{{echo|foo}}l&#39;s"}'>fool's</span> errand]
-[<span typeof="mw:Placeholder" 
data-parsoid='{"src":"url={{echo|foo}}"}'>url=foo</span>]
+[<span about="#mwt19" typeof="mw:Transclusion" 
data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>]
+[<span about="#mwt20" typeof="mw:Transclusion" 
data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>
 bar]
+[<span about="#mwt21" typeof="mw:Transclusion" 
data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>
 <i>bar</i>]
+[<span about="#mwt22" typeof="mw:Transclusion" 
data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>l's]
 errand
+[<span about="#mwt23" typeof="mw:Transclusion" 
data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>l's
 errand]
+[url=<span about="#mwt24" typeof="mw:Transclusion" 
data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"foo"}},"i":0}}]}'>foo</span>]
 [url=<a rel="mw:ExtLink" href="http://example.com";>http://example.com</a>]
 [http:// bare protocols don't count]</p>
 !! end
@@ -17397,13 +17397,12 @@
 <div id="title=" data-parsoid='{"stx":"html"}'>HTML rocks</div>
 !! end
 
-## QUESTION: Should this have ExpandedAttrs?
 !! test
 Extension tag in attribute value
 !! wikitext
 <span title="<translate>123</translate>">ok</span>
 !! html/parsoid
-<p><span title="123" 
data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"&lt;translate>123&lt;/translate>"}}'>ok</span></p>
+<p><span title="123" about="#mwt4" typeof="mw:ExpandedAttrs" 
data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"&lt;translate>123&lt;/translate>"}}'
 data-mw='{"attribs":[[{"txt":"title"},{"html":"&lt;translate 
typeof=\"mw:Extension/translate\" about=\"#mwt3\" 
data-parsoid=&apos;{\"dsr\":[13,39,2,2]}&apos; 
data-mw=&apos;{\"name\":\"translate\",\"attrs\":{},\"body\":{\"extsrc\":\"123\"}}&apos;>123&lt;/translate>"}]]}'>ok</span></p>
 !! end
 
 !! test

-- 
To view, visit https://gerrit.wikimedia.org/r/406126
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I188f1c903619320052b0502ba13c7b3be7040e6f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <abrea...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to