jenkins-bot has submitted this change and it was merged.
Change subject: Take #2: (Bug 49555): Support all nested ref scenarios
......................................................................
Take #2: (Bug 49555): Support all nested ref scenarios
* This patch now supports any template that generates nested refs
where the top-level token is just a single-ref.
Ex: {{efn|New timetable{{sfn|Vallance|1991|p=31}}}}
* The solution in this patch removes the hack from the previous
version (a34dfb15). As documented in the bug report, this
solution essentially "trusts" template authors and lets
nested refs through when they come from templates.
But, top-level nested refs are still not allowed.
<ref>foo<ref>bar</ref>baz</ref> in the toplevel page
will still generate a single reference with content
"foo<ref>bar</ref>baz"
* Updated the nested ref test output
- Regenerated selser changes because of the changed output.
- Regenerated blacklist because of the changed selser changes.
* This now handles:
- https://en.wikipedia.org/wiki/User:Edgepedia/VE/GNoSR
- https://en.wikipedia.org/wiki/Phellinus ellipsoideus
Change-Id: I627955f0be1c5e2bafc49647c94c2be68ce711a8
---
M js/lib/ext.Cite.js
M js/lib/ext.core.TemplateHandler.js
M js/lib/mediawiki.parser.js
M js/lib/pegTokenizer.pegjs.txt
M js/tests/parserTests-blacklist.js
M js/tests/parserTests.txt
M js/tests/selser.changes.json
7 files changed, 41 insertions(+), 91 deletions(-)
Approvals:
GWicke: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/lib/ext.Cite.js b/js/lib/ext.Cite.js
index 356750b..7bdb220 100644
--- a/js/lib/ext.Cite.js
+++ b/js/lib/ext.Cite.js
@@ -93,8 +93,9 @@
* Handle ref tokens
*/
Ref.prototype.handleRef = function ( manager, pipelineOpts, refTok, cb ) {
- // Nested <ref> tags are not supported
- if (!pipelineOpts.inTagRef && pipelineOpts.extTag === "ref" &&
pipelineOpts.wrapTemplates) {
+ // Nested <ref> tags at the top level are considered errors
+ // But, inside templates, they are supported
+ if (!pipelineOpts.inTemplate && pipelineOpts.extTag === "ref") {
cb({ tokens: [refTok.getAttribute("source")] });
return;
}
@@ -125,7 +126,7 @@
// Full pipeline for processing ref-content
pipelineType: 'text/x-mediawiki/full',
pipelineOpts: {
- inTagRef: refTok.getAttribute("inTagRef"),
+ inTemplate: pipelineOpts.inTemplate,
extTag: "ref"
},
res: [],
diff --git a/js/lib/ext.core.TemplateHandler.js
b/js/lib/ext.core.TemplateHandler.js
index c77ca3d..e8ac1f3 100644
--- a/js/lib/ext.core.TemplateHandler.js
+++ b/js/lib/ext.core.TemplateHandler.js
@@ -546,70 +546,7 @@
//console.log( tplArgs.name );
//console.log( "---------------------------------");
//console.log( src );
-
- /* -----------------------------------------------------------------
- * HACK! Bypass the "text/mediawiki" pipeline for "{{#tag:ref|...}}"
- *
- * We have to do this to support <ref> tags in #tag:ref (which is
- * effectively nested ref tags). See Bug 49555 for additional details.
- *
- * Consider this wikitext: "{{#tag:ref|X <ref>foo</ref> Y}}"
- * The PHP preprocessor returns "<ref>X <ref>foo</ref> Y</ref>"
- *
- * If we pass this through the regular pipeline, the tokenizer
- * will parse this into 2 tokens:
- * [ <extension name="ref" source="<ref>X <ref>foo</ref>" />,
- * " Y </ref>" ]
- * which is absolutely not what we want!
- *
- * Since we know this can be a nested-ref token and has to parse
- * into a single extension-token, we do the work of the tokenizer
- * and convert it to an ext-token ourselves.
- *
- * Since this is a single ext-token, we know it wouldn't have been
- * processed by any other handlers in stage 1 or stage 2 except
- * template encapsulation which we directly handle below.
- *
- * So, this hack effectively bypasses the normal stage 1 and stage 2
- * pipeline processing and shortcircuits it below with exactly the
- * tokens we want.
- * ----------------------------------------------------------------- */
- var tplName = (state.token.attribs[0].k || '');
- var inTagRef = tplName.constructor === String && tplName.toLowerCase()
=== "#tag:ref";
- if (inTagRef) {
- // Do not set data attribs here since _onChunk will strip tsr
- // from this ext token -- which we don't want stripped
- var extToken = new SelfclosingTagTk('extension',
- [
- new KV('inTagRef', '1'),
- new KV('typeof', 'mw:Extension'),
- new KV('name', 'ref'),
- new KV('about', "#" +
this.manager.env.newObjectId()),
- new KV('source', src),
- new KV('options', state.token.attribs.slice(2))
- ]
- );
-
- // Run normal tpl encapsulation on it
- this._onChunk(state, function(ret) {
- var toks = ret.tokens, n = toks.length;
- for (var i = 0; i < n; i++) {
- // Find the tag and set dataAttribs on it
- if (toks[i].constructor === SelfclosingTagTk &&
- toks[i].getAttribute('inTagRef'))
- {
- var dp =
Util.clone(state.token.dataAttribs),
- matchInfo =
src.match(/^(<ref[^<>]*>)[^]*(<\/ref>)$/i);
-
- dp.tagWidths =
[matchInfo[1].length,matchInfo[2].length];
- toks[i].dataAttribs = dp;
- }
- }
- cb(ret);
- this._onEnd(state, cb);
- }.bind(this), [extToken]);
- return;
- }
+ //console.log( "---------------------------------");
// Get a nested transformation pipeline for the input type. The input
// pipeline includes the tokenizer, synchronous stage-1 transforms for
@@ -617,6 +554,7 @@
//
// NOTE: No template wrapping required for nested templates.
var pipelineOpts = {
+ inTemplate: true,
isInclude: true,
wrapTemplates: false,
extTag: this.options.extTag
diff --git a/js/lib/mediawiki.parser.js b/js/lib/mediawiki.parser.js
index ab9dc73..52c170d 100644
--- a/js/lib/mediawiki.parser.js
+++ b/js/lib/mediawiki.parser.js
@@ -269,8 +269,8 @@
if ( options.inBlockToken ) {
cacheType += '::inBlockToken';
}
- if ( options.inTagRef ) {
- cacheType += '::inTagRef';
+ if ( options.inTemplate ) {
+ cacheType += '::inTemplate';
}
if ( options.extTag ) {
cacheType += '::'+options.extTag;
diff --git a/js/lib/pegTokenizer.pegjs.txt b/js/lib/pegTokenizer.pegjs.txt
index f30bce6..b5611a7 100644
--- a/js/lib/pegTokenizer.pegjs.txt
+++ b/js/lib/pegTokenizer.pegjs.txt
@@ -1401,26 +1401,48 @@
if (t.constructor !== EndTagTk && !isHtmlTag) {
if (t.constructor === TagTk) {
var tsr0 = dp.tsr[0],
+ endTagRE = new RegExp("^(?:.|\n)*?(</\\s*" + tagName +
">)", "mi"),
restOfInput = input.substring(tsr0),
- tagContent = restOfInput.match(new
RegExp("^(.|\n)*?(</\\s*" + tagName + ">)", "mi")),
+ tagContent = restOfInput.match(endTagRE),
extSrc = null,
- tagWidths = null;
+ tagWidths = null,
+ endTagWidth = 0;
if (tagContent) {
extSrc = tagContent[0];
+ endTagWidth = tagContent[1].length;
+
+ if (tagName === 'ref') {
+ // Support 1-level nesting of <ref> tags during
tokenizing.
+ // <ref> tags are the exception to the rule (no
nesting of ext tags)
+ //
+ // Expand extSrc as long as there is a <ref> tag found
in the
+ // extension source body.
+ var s = extSrc.substring(pos-tsr0);
+ while (s && s.match(new RegExp("<" + tagName +
"[^<>]*>"))) {
+ tagContent =
restOfInput.substring(extSrc.length).match(endTagRE);
+ if (tagContent) {
+ s = tagContent[0];
+ endTagWidth = tagContent[1].length;
+ extSrc += s;
+ } else {
+ s = null;
+ }
+ }
+ }
} else if (isInstalledExt || isIncludeTag) {
extSrc = restOfInput;
}
if (extSrc) {
- tagWidths = [pos-tsr0, (tagContent ? tagContent[2].length
: 0)];
+ tagWidths = [pos-tsr0, endTagWidth];
var extContentLen = extSrc.length - tagWidths[0] -
tagWidths[1];
// If the xml-tag is a known installed (not native)
extension,
// skip the end-tag as well.
var skipLen = extContentLen;
- if (isInstalledExt && !isIncludeTag && tagContent) {
- skipLen += tagContent[2].length;
+ if (isInstalledExt && !isIncludeTag) {
+ skipLen += endTagWidth;
}
// Extension content source
diff --git a/js/tests/parserTests-blacklist.js
b/js/tests/parserTests-blacklist.js
index b29ed21..b343dd4 100644
--- a/js/tests/parserTests-blacklist.js
+++ b/js/tests/parserTests-blacklist.js
@@ -3673,22 +3673,10 @@
add("selser", "Ref: 10. Unclosed HTML tags should not leak out of ref-body
[[2,0,0],2,0]");
add("selser", "Ref: 10. Unclosed HTML tags should not leak out of ref-body
[0,4,0]");
add("selser", "Ref: 10. Unclosed HTML tags should not leak out of ref-body
[[0,0,4],3,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[0,2,0]");
add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[0,3,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[1,3,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[2,0,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[2,3,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[1,2,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[1,0,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[1,4,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[2,4,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[[0,4],0,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[[0,3],0,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[[0,2],4,0]");
add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[0,4,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[[0,3],3,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[[0,2],0,0]");
-add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[[0,3],2,0]");
+add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[0,4,0]");
+add("selser", "Ref: 14. A nested ref-tag should be emitted as plain text
[0,2,0]");
add("selser", "References: 2. references tag with group only outputs
references from that group [1,4,0]");
add("selser", "References: 2. references tag with group only outputs
references from that group [4,0,0]");
add("selser", "References: 2. references tag with group only outputs
references from that group [0,3,0]");
diff --git a/js/tests/parserTests.txt b/js/tests/parserTests.txt
index deaed44..d224eff 100644
--- a/js/tests/parserTests.txt
+++ b/js/tests/parserTests.txt
@@ -14720,9 +14720,10 @@
<references />
!!result
-<p><span about="#mwt1" class="reference"
data-mw='{"name":"ref","body":{"html":"foo
&lt;ref&gt;bar"},"attrs":{}}' id="cite_ref-1-0" rel="dc:references"
typeof="mw:Extension/ref"><a href="#cite_note-1">[1]</a></span>
baz</ref></p>
+<span about="#mwt1" class="reference"
data-mw="{"name":"ref","body":{"html":"foo
&lt;ref&gt;bar&lt;/ref&gt; baz"},"attrs":{}}"
id="cite_ref-1-0" rel="dc:references" typeof="mw:Extension/ref"><a
href="#cite_note-1">[1]</a></span>
-<ol about="#mwt2" class="references"
data-mw='{"name":"references","attrs":{}}' typeof="mw:Extension/references"><li
about="#cite_note-1" id="cite_note-1"><span rel="mw:referencedBy"><a
href="#cite_ref-1-0">↑</a></span> foo <ref>bar</li></ol>
+<ol class="references" typeof="mw:Extension/references" about="#mwt2"
data-mw="{"name":"references","attrs":{}}">
+<li about="#cite_note-1" id="cite_note-1" data-parsoid="{}"><span
rel="mw:referencedBy"><a href="#cite_ref-1-0">↑</a></span> foo
<ref>bar</ref> baz</li></ol>
!!end
!!test
diff --git a/js/tests/selser.changes.json b/js/tests/selser.changes.json
index 608650c..e79007f 100644
--- a/js/tests/selser.changes.json
+++ b/js/tests/selser.changes.json
@@ -1047,7 +1047,7 @@
"Ref: 11. ref-tags acts like an inline element wrt
P-wrapping":[[[0,0,4,0,4]],[[2,0,4,0,0]],[[3,0,3,0,0]],[3],0,[2],[[4,0,4,0,2]],[1],[[4,0,0,0,0]],[[2,0,2,0,0]],[[0,0,4,0,2]],[[2,0,2,0,3]],[4],[[0,0,2,0,0]],[[0,0,0,0,3]],[[2,0,0,0,0]],[[4,0,0,0,2]],[[0,0,3,0,2]],[[0,0,0,0,4]],[[0,0,4,0,0]]],
"Ref: 12. ref-tags act as trailing newline migration
barrier":[[0,2,[2,0,0,0,0,0],0,0],[[2,4],3,[3,0,4,0,0,0],0,1],[4,0,1,2,2],[1,0,[0,3,0,0,0,0],2,[4]],[[3,4],0,[3,2,4,0,0,0],0,[2]],[3,0,1,3,2],[0,3,1,0,[3]],[0,0,3,0,4],[1,3,3,4,0],[[0,2],3,2,4,0],[1,0,2,0,[4]],[[2,0],3,[0,0,4,0,0,0],2,4],[1,0,[0,4,2,0,0,0],3,[3]],[[2,2],0,[0,2,3,0,0,0],3,1],[2,0,2,0,0],[4,2,[3,4,0,0,3,0],0,3],[4,2,[4,4,4,0,0,0],4,0],[[3,0],4,3,4,3],[[2,2],0,3,0,1],[3,4,2,0,1]],
"Ref: 13. ref-tags are not SOL-transparent and block
indent-pres":[[[0,0,0,3]],[1],0,[4],[2],[[0,4,0,2]],[[0,0,0,2]],[[0,4,0,3]],[3],[[0,2,0,4]],[[0,4,0,0]],[[0,2,0,0]],[[0,3,0,0]],[[0,2,0,3]],[[0,0,0,4]],[[0,4,0,4]],[[0,3,0,2]],[[0,3,0,4]],[[0,2,0,2]],[[0,3,0,3]]],
-"Ref: 14. A nested ref-tag should be emitted as plain
text":[[3,3,0],[4,3,0],[0,2,0],[4,4,0],[3,0,0],[0,3,0],[1,3,0],[2,0,0],0,[1,2,0],[1,0,0],[1,4,0],[2,4,0],[[0,4],0,0],[[0,3],0,0],[[0,2],4,0],[0,4,0],[[0,3],3,0],[[0,2],0,0],[[0,3],2,0]],
+"Ref: 14. A nested ref-tag should be emitted as plain
text":[[0,3,0],[0,4,0],0,[0,2,0]],
"Ref: 15. ref-tags with identical names should get identical
indexes":[[[0,0,4,0,0,0,3,0],0,0],[[3,0,0,0,4,0,4,0],4,0],[[4,0,0,0,0,0,0,0],2,0],[4,0,0],[2,0,0],[2,3,0],[[2,0,0,0,0,0,4,0],3,0],[[2,0,0,0,3,0,2,0],0,0],[3,0,0],[1,4,0],[4,3,0],[[0,0,2,0,2,0,0,0],4,0],[[0,0,4,0,2,0,3,0],4,0],[4,2,0],[2,2,0],[[0,0,0,0,0,0,2,0],2,0],[[0,0,0,0,4,0,0,0],0,0],[1,2,0],[[2,0,0,0,0,0,2,0],0,0],[3,3,0]],
"References: 1. references tag without any refs should be handled
properly":[0],
"References: 2. references tag with group only outputs references from that
group":[[1,4,0],[4,0,0],[0,3,0],[[2,0,0,0],3,0],[[4,0,0,0],2,0],[2,2,0],[0,4,0],[[4,0,0,0],4,0],[[2,0,3,0],0,0],[[3,0,4,0],0,0],[3,0,0],[3,4,0],[4,2,0],[[2,0,0,0],0,0],0,[[4,0,4,0],0,0],[[0,0,4,0],3,0],[1,0,0],[[4,0,0,0],0,0],[[0,0,3,0],0,0]],
--
To view, visit https://gerrit.wikimedia.org/r/73886
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I627955f0be1c5e2bafc49647c94c2be68ce711a8
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits