Arlolra has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/403319 )

Change subject: WIP: Permit extension tags in xmlish attribute values
......................................................................

WIP: Permit extension tags in xmlish attribute values

Bug: T183515
Authored-by: Arlo Breault <[email protected]>
Authored-by: Shannon Bailey <[email protected]>
Change-Id: I311d0fbbd2cdcf2a0c0a3dbf698912e6df5f3356
---
M lib/wt2html/pegTokenizer.pegjs
M lib/wt2html/tt/AttributeExpander.js
M tests/mockAPI.js
M tests/parserTests.txt
4 files changed, 41 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/19/403319/1

diff --git a/lib/wt2html/pegTokenizer.pegjs b/lib/wt2html/pegTokenizer.pegjs
index 1a0f97c..65d10f9 100644
--- a/lib/wt2html/pegTokenizer.pegjs
+++ b/lib/wt2html/pegTokenizer.pegjs
@@ -1412,6 +1412,12 @@
 tag_name_chars = [^\t\n\v />\0]
 tag_name = $([A-Za-z] tag_name_chars*)
 
+extension_tag =
+  &{ return !stops.onStack('extTag'); }
+  extToken:xmlish_tag
+  &{ return extToken.name === 'extension'; }
+  { return extToken; }
+
 xmlish_tag
   = & {
       // By the time we get to `doTableStuff` in the php parser, we've already
@@ -2258,7 +2264,7 @@
   = r:( $[^{}&<\-|/ \t\n\r\x0c>]+
   / !inline_breaks
     !'/>'
-    s:( directive / [{}&<\-|/] ) { return s; }
+    s:( directive / extension_tag / [{}&<\-|/] ) { return s; }
   )+ {
     return tu.flattenString(r);
   }
@@ -2268,7 +2274,7 @@
   = r:( $[^{}&<\-|/'>]+
   / !inline_breaks
     !'/>'
-    s:( directive / [{}&<\-|/] ) { return s; }
+    s:( directive / extension_tag / [{}&<\-|/] ) { return s; }
   )* {
     return tu.flattenString(r);
   }
@@ -2278,7 +2284,7 @@
   = r:( $[^{}&<\-|/">]+
   / !inline_breaks
     !'/>'
-    s:( directive / [{}&<\-|/] ) { return s; }
+    s:( directive / extension_tag / [{}&<\-|/] ) { return s; }
   )* {
     return tu.flattenString(r);
   }
@@ -2294,7 +2300,7 @@
 // The stop set is space_or_newline and | which matches table_att_value.
 table_attribute_preprocessor_text
   = r:( $[^{}&<\-!\[ \t\n\r\x0c|]+
-  / !inline_breaks s:( directive / [{}&<\-!\[] ) { return s; }
+  / !inline_breaks s:( directive / extension_tag / [{}&<\-!\[] ) { return s; }
   )+ {
     return tu.flattenString(r);
   }
@@ -2302,7 +2308,7 @@
 // The stop set is '\r\n| which matches table_att_value.
 table_attribute_preprocessor_text_single
   = r:( $[^{}&<\-!\['\r\n|]+
-  / !inline_breaks s:( directive / [{}&<\-!\[] ) { return s; }
+  / !inline_breaks s:( directive / extension_tag / [{}&<\-!\[] ) { return s; }
   )* {
     return tu.flattenString(r);
   }
@@ -2310,7 +2316,7 @@
 // The stop set is "\r\n| which matches table_att_value.
 table_attribute_preprocessor_text_double
   = r:( $[^{}&<\-!\["\r\n|]+
-  / !inline_breaks s:( directive / [{}&<\-!\[] ) { return s; }
+  / !inline_breaks s:( directive / extension_tag / [{}&<\-!\[] ) { return s; }
   )* {
     return tu.flattenString(r);
   }
diff --git a/lib/wt2html/tt/AttributeExpander.js 
b/lib/wt2html/tt/AttributeExpander.js
index 9a76502..59d88e8 100644
--- a/lib/wt2html/tt/AttributeExpander.js
+++ b/lib/wt2html/tt/AttributeExpander.js
@@ -122,7 +122,7 @@
  * This helper method strips all meta tags introduced by
  * transclusions, etc. and returns the content.
  * ---------------------------------------------------------- */
-function stripMetaTags(tokens, wrapTemplates) {
+function stripMetaTags(env, tokens, wrapTemplates) {
        var buf = [];
        var isPushed = false;
        var hasGeneratedContent = false;
@@ -130,6 +130,20 @@
        for (var i = 0, l = tokens.length; i < l; i++) {
                var t = tokens[i];
                if ([TagTk, SelfclosingTagTk].indexOf(t.constructor) !== -1) {
+                       // Reinsert expanded extension content that's been 
parsed to DOM
+                       // as a string.  This should match what the php parser 
does since
+                       // extension content is html being placed in an 
attribute context.
+                       if (t.getAttribute('typeof') === 'mw:DOMFragment') {
+                               var nodes = 
env.fragmentMap.get(t.dataAttribs.html);
+                               var str = nodes.reduce(function(prev, next) {
+                                       // We strip tags since the sanitizer 
would normally drop
+                                       // tokens but we're already at html
+                                       return prev + next.textContent;
+                               }, '');
+                               buf.push(str);
+                               // TODO: Maybe cleanup the remaining about 
sibbling wrappers
+                               // but the sanitizer will drop them anyways
+                       }
                        isPushed = false;
                        if (wrapTemplates) {
                                // Strip all meta tags.
@@ -348,7 +362,7 @@
                                                metaTokens = 
updatedK.metaTokens;
                                        } else {
                                                // Scenario 2 from the 
documentation comment above.
-                                               updatedK = 
stripMetaTags(expandedK, wrapTemplates);
+                                               updatedK = stripMetaTags(env, 
expandedK, wrapTemplates);
                                                expandedK = updatedK.value;
                                        }
 
@@ -438,7 +452,7 @@
                                                metaTokens = 
updatedV.metaTokens;
                                        } else {
                                                // Scenario 2 from the 
documentation comment above.
-                                               updatedV = 
stripMetaTags(attrValTokens, wrapTemplates);
+                                               updatedV = stripMetaTags(env, 
attrValTokens, wrapTemplates);
                                                attrValTokens = updatedV.value;
                                        }
                                        expandedA.v = attrValTokens;
diff --git a/tests/mockAPI.js b/tests/mockAPI.js
index 1e16673..2cbfe55 100644
--- a/tests/mockAPI.js
+++ b/tests/mockAPI.js
@@ -431,6 +431,9 @@
 };
 
 var parse = function(text, onlypst) {
+       // TODO: Something that mocks this stuff more generally since there're
+       // a few test that end up here
+       if (/<translate>/.test(text)) { return { text: text }; }
        var html = onlypst ? text.replace(/\{\{subst:echo\|([^}]+)\}\}/, "$1") 
: '\n';
        return { text: html };
 };
diff --git a/tests/parserTests.txt b/tests/parserTests.txt
index 4f0023c..ec3b371 100644
--- a/tests/parserTests.txt
+++ b/tests/parserTests.txt
@@ -17386,6 +17386,15 @@
 <div id="title=" data-parsoid='{"stx":"html"}'>HTML rocks</div>
 !! end
 
+## QUESTION: Should this have ExpandedAttrs?
+!! test
+Extension tag in attribute value
+!! wikitext
+<span title="<translate>123</translate>">ok</span>
+!! html/parsoid
+<p><span title="123" 
data-parsoid='{"stx":"html","a":{"title":"123"},"sa":{"title":"&lt;translate>123&lt;/translate>"}'>ok</span></p>
+!! end
+
 !! test
 table with multiple empty attribute values
 !! options

-- 
To view, visit https://gerrit.wikimedia.org/r/403319
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I311d0fbbd2cdcf2a0c0a3dbf698912e6df5f3356
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to