Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/55833


Change subject: WIP: Updated native <ref> and <references> tag implementations.
......................................................................

WIP: Updated native <ref> and <references> tag implementations.

* <references> tag was not being processed properly by Parsoid.
  This led to lost references on the BO page.  This patch fixes
  it which fills out references and more closely matches output
  en:WP.

* To be tested, documented, and fixed some more.

Change-Id: I2d243656e9e903d8dadb55ee7c0630824c65cc01
---
M js/lib/ext.Cite.js
M js/lib/ext.core.ExtensionHandler.js
M js/lib/ext.core.TemplateHandler.js
M js/lib/mediawiki.DOMPostProcessor.js
M js/lib/mediawiki.ParsoidConfig.js
M js/lib/mediawiki.parser.js
6 files changed, 203 insertions(+), 102 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/33/55833/1

diff --git a/js/lib/ext.Cite.js b/js/lib/ext.Cite.js
index 766aa60..f013836 100644
--- a/js/lib/ext.Cite.js
+++ b/js/lib/ext.Cite.js
@@ -1,3 +1,7 @@
+/* ----------------------------------------------------------------------
+ * This file implements <ref> and <references> extension tag handling
+ * natively in Parsoid.
+ * ---------------------------------------------------------------------- */
 "use strict";
 
 var Util = require( './mediawiki.Util.js' ).Util,
@@ -9,7 +13,7 @@
        this.indexByName = {};
 }
 
-RefGroup.prototype.add = function(refName) {
+RefGroup.prototype.add = function(refName, skipLinkback) {
        var ref;
        if (refName && this.indexByName[refName]) {
                ref = this.indexByName[refName];
@@ -36,7 +40,11 @@
                        this.indexByName[refName] = ref;
                }
        }
-       ref.linkbacks.push('cite_ref-' + ref.key + '-' + ref.linkbacks.length);
+
+       if (!skipLinkback) {
+               ref.linkbacks.push('cite_ref-' + ref.key + '-' + 
ref.linkbacks.length);
+       }
+
        return ref;
 };
 
@@ -86,13 +94,57 @@
        return refGroups[group];
 }
 
+// Helper function to process extension source
+// FIXME: Move out to some common helper file?
+function processExtSource(manager, extToken, opts) {
+       var extSrc = extToken.getAttribute('source'),
+               tagWidths = extToken.dataAttribs.tagWidths,
+               content = extSrc.substring(tagWidths[0], extSrc.length - 
tagWidths[1]);
+
+       if (!content || content.length === 0) {
+               opts.emptyContentCB(opts.res);
+       } else {
+               // Pass an async signal since the ext-content is not processed 
completely.
+               opts.parentCB({tokens: opts.res, async: true});
+
+               // Pipeline for processing ext-content
+               var pipeline = manager.pipeFactory.getPipeline(
+                       opts.pipelineType,
+                       Util.extendProps({}, opts.pipelineOpts, {
+                               wrapTemplates: true,
+                               isExtension: true,
+                               // SSS: Doesn't seem right. Should this be the 
default in all cases?
+                               inBlockToken: true
+                       })
+               );
+
+               // Set source offsets for this pipeline's content
+               var tsr = extToken.dataAttribs.tsr;
+               pipeline.setSourceOffsets(tsr[0]+tagWidths[0], 
tsr[1]-tagWidths[1]);
+
+               // Set up provided callbacks
+               if (opts.chunkCB) {
+                       pipeline.addListener('chunk', opts.chunkCB);
+               }
+               if (opts.endCB) {
+                       pipeline.addListener('end', opts.endCB);
+               }
+               if (opts.documentCB) {
+                       pipeline.addListener('document', opts.documentCB);
+               }
+
+               // Off the starting block ... ready, set, go!
+               pipeline.process(content);
+       }
+}
+
 /**
- * Simple token transform version of the Cite extension.
+ * Simple token transform version of the Ref extension tag
  *
  * @class
  * @constructor
  */
-function Cite () {
+function Ref() {
        this.resetState();
 }
 
@@ -100,111 +152,115 @@
  * Reset state before each top-level parse -- this lets us share a pipeline
  * to parse unrelated pages.
  */
-Cite.prototype.resetState = function() {
+Ref.prototype.resetState = function() {
        this.refGroups = {};
 };
 
 /**
  * Handle ref tokens
  */
-Cite.prototype.handleRef = function ( manager, refTok, cb ) {
+Ref.prototype.handleRef = function ( manager, options, refTok, cb ) {
+
        var tsr = refTok.dataAttribs.tsr,
-               options = $.extend({ name: null, group: null }, 
Util.KVtoHash(refTok.getAttribute("options"))),
-               group = this.refGroups[options.group] || 
newRefGroup(this.refGroups, options.group),
-               ref = group.add(options.name),
+               refOpts = $.extend({ name: null, group: null }, 
Util.KVtoHash(refTok.getAttribute("options"))),
+               group = this.refGroups[refOpts.group] || 
newRefGroup(this.refGroups, refOpts.group),
+               ref = group.add(refOpts.name, options.inReferencesTag),
                //console.warn( 'added tokens: ' + JSON.stringify( 
this.refGroups, null, 2 ));
                linkback = ref.linkbacks[ref.linkbacks.length - 1],
                bits = [];
 
-       if (options.group) {
-               bits.push(options.group);
+       if (refOpts.group) {
+               bits.push(refOpts.group);
        }
 
        //bits.push(Util.formatNum( ref.groupIndex + 1 ));
        bits.push(ref.groupIndex + 1);
 
-       var about = "#" + manager.env.newObjectId(),
-               span  = new TagTk('span', [
+       var about, res;
+       if (options.inReferencesTag) {
+               about = '';
+               res = [];
+       } else {
+               about = "#" + manager.env.newObjectId();
+
+               var span = new TagTk('span', [
                                new KV('id', linkback),
                                new KV('class', 'reference'),
                                new KV('about', about),
-                               new KV('typeof', 'mw:Object/Ext/Cite')
-                       ], { src: refTok.dataAttribs.src }),
-               endMeta = new SelfclosingTagTk( 'meta', [
-                               new KV( 'typeof', 'mw:Object/Ext/Cite/End' ),
+                               new KV('typeof', 'mw:Object/Ext/Ref')
+                       ], {
+                               src: refTok.dataAttribs.src
+                       }),
+                       endMeta = new SelfclosingTagTk( 'meta', [
+                               new KV( 'typeof', 'mw:Object/Ext/Ref/End' ),
                                new KV( 'about', about)
                        ]);
 
-       if (tsr) {
-               span.dataAttribs.tsr = tsr;
-               endMeta.dataAttribs.tsr = [null, tsr[1]];
+               if (tsr) {
+                       span.dataAttribs.tsr = tsr;
+                       endMeta.dataAttribs.tsr = [null, tsr[1]];
+               }
+
+               res = [
+                       span,
+                       new TagTk( 'a', [ new KV('href', '#' + ref.target) ]),
+                       '[' + bits.join(' ')  + ']',
+                       new EndTagTk( 'a' ),
+                       new EndTagTk( 'span' ),
+                       endMeta
+               ];
        }
 
-       var res = [
-               span,
-               new TagTk( 'a', [ new KV('href', '#' + ref.target) ]),
-               '[' + bits.join(' ')  + ']',
-               new EndTagTk( 'a' ),
-               new EndTagTk( 'span' ),
-               endMeta
-       ];
+       var finalCB = function(toks, content) {
+                       toks.push(new SelfclosingTagTk( 'meta', [
+                               new KV('typeof', 'mw:Ext/Ref/Content'),
+                               new KV('about', about),
+                               new KV('group', refOpts.group || ''),
+                               new KV('name', refOpts.name || ''),
+                               new KV('content', content || ''),
+                               new KV('skipLinkback', options.inReferencesTag 
|| false)
+                       ]));
 
-       var extSrc = refTok.getAttribute('source'),
-               tagWidths = refTok.dataAttribs.tagWidths,
-               content = extSrc.substring(tagWidths[0], extSrc.length - 
tagWidths[1]);
-
-       if (!content || content.length === 0) {
-               var contentMeta = new SelfclosingTagTk( 'meta', [
-                               new KV( 'typeof', 'mw:Ext/Ref/Content' ),
-                               new KV( 'about', about),
-                               new KV( 'group', options.group || ''),
-                               new KV( 'name', options.name || ''),
-                               new KV( 'content', '')
-                       ]);
-               res.push(contentMeta);
-               cb({tokens: res, async: false});
-       } else {
-               // The content meta-token is yet to be emitted and depends on
-               // the ref-content getting processed completely.
-               cb({tokens: res, async: true});
-
-               // Full pipeline for processing ref-content
-               // No need to encapsulate templates in extension content
-               var pipeline = 
manager.pipeFactory.getPipeline('text/x-mediawiki/full', {
-                       wrapTemplates: true,
-                       isExtension: true,
-                       inBlockToken: true
-               });
-               pipeline.setSourceOffsets(tsr[0]+tagWidths[0], 
tsr[1]-tagWidths[1]);
-               pipeline.addListener('document', function(refContentDoc) {
-                       var contentMeta = new SelfclosingTagTk( 'meta', [
-                                       new KV( 'typeof', 'mw:Ext/Ref/Content' 
),
-                                       new KV( 'about', about),
-                                       new KV( 'group', options.group || ''),
-                                       new KV( 'name', options.name || ''),
-                                       new KV( 'content', 
refContentDoc.body.innerHTML)
-                               ]);
                        // All done!
-                       cb ({ tokens: [contentMeta], async: false });
-               });
+                       cb({tokens: toks, async: false});
+               };
 
-               pipeline.process(content);
-       }
+       processExtSource(manager, refTok, {
+               // Full pipeline for processing ref-content
+               pipelineType: 'text/x-mediawiki/full',
+               pipelineOpts: {
+                       inReferencesTag: options.inReferencesTag,
+                       // Always wrap templates for ref-tags
+                       // SSS FIXME: Document why this is so
+                       // I wasted an hour because I failed to set this flag
+                       wrapTemplates: true
+               },
+               res: res,
+               parentCB: cb,
+               emptyContentCB: finalCB,
+               documentCB: function(refContentDoc) {
+                       finalCB([], refContentDoc.body.innerHTML);
+               }
+       });
+};
+
+function References () {
+       this.reset();
+}
+
+References.prototype.reset = function() {
+       this.refGroups = { };
 };
 
 /**
  * Sanitize the references tag and convert it into a meta-token
  */
-Cite.prototype.handleReferences = function ( manager, refsTok, cb ) {
+References.prototype.handleReferences = function ( manager, options, refsTok, 
cb ) {
        refsTok = refsTok.clone();
 
-       var placeHolder = new SelfclosingTagTk('meta',
-               refsTok.attribs,
-               refsTok.dataAttribs);
-
        // group is the only recognized option?
-       var options = Util.KVtoHash(refsTok.getAttribute("options")),
-               group = options.group;
+       var refsOpts = Util.KVtoHash(refsTok.getAttribute("options")),
+               group = refsOpts.group;
 
        if ( group && group.constructor === Array ) {
                // Array of tokens, convert to string.
@@ -221,29 +277,62 @@
                group = null;
        }
 
-       // Update properties
-       if (group) {
-               placeHolder.setAttribute('group', group);
-       }
-       placeHolder.setAttribute('typeof', 'mw:Ext/References');
-       placeHolder.dataAttribs.stx = undefined;
+       // Emit a placeholder meta for the references token
+       // so that the dom post processor can generate and
+       // emit references at this point in the DOM.
+       var emitPlaceholderMeta = function() {
+               var placeHolder = new SelfclosingTagTk('meta',
+                       refsTok.attribs,
+                       refsTok.dataAttribs);
 
-       cb({ tokens: [placeHolder], async: false });
-};
+               // Update properties
+               if (group) {
+                       placeHolder.setAttribute('group', group);
+               }
+               placeHolder.setAttribute('typeof', 'mw:Ext/References');
+               placeHolder.dataAttribs.stx = undefined;
 
-function References () {
-       this.reset();
-}
+               // All done!
+               cb({ tokens: [placeHolder], async: false });
+       };
 
-References.prototype.reset = function() {
-       this.refGroups = { };
+       processExtSource(manager, refsTok, {
+               // Partial pipeline for processing ref-content
+               // Expand till stage 2 so that all embedded
+               // ref tags get processed
+               pipelineType: 'text/x-mediawiki',
+               pipelineOpts: {
+                       inReferencesTag: true,
+                       wrapTemplates: options.wrapTemplates
+               },
+               res: [],
+               parentCB: cb,
+               emptyContentCB: emitPlaceholderMeta,
+               chunkCB: function(chunk) {
+                       // Extract ref-content tokens and discard the rest
+                       var res = [];
+                       for (var i = 0, n = chunk.length; i < n; i++) {
+                               var t = chunk[i];
+                               if (t.constructor === SelfclosingTagTk &&
+                                       t.name === 'meta' &&
+                                       
t.getAttribute('typeof').match(/mw:Ext\/Ref\/Content/))
+                               {
+                                       res.push(t);
+                               }
+                       }
+
+                       // Pass along the ref toks
+                       cb({ tokens: res, async: true });
+               },
+               endCB: emitPlaceholderMeta
+       });
 };
 
 References.prototype.extractRefFromNode = function(node) {
        var group = node.getAttribute("group"),
                refName = node.getAttribute("name"),
                refGroup = this.refGroups[group] || newRefGroup(this.refGroups, 
group),
-               ref = refGroup.add(refName);
+               ref = refGroup.add(refName, node.getAttribute("skipLinkback"));
 
        // This effectively ignores content from later references with the same 
name.
        // The implicit assumption is that that all those identically named 
refs. are
@@ -274,6 +363,6 @@
 };
 
 if (typeof module === "object") {
-       module.exports.Cite = Cite;
+       module.exports.Ref = Ref;
        module.exports.References = References;
 }
diff --git a/js/lib/ext.core.ExtensionHandler.js 
b/js/lib/ext.core.ExtensionHandler.js
index 2f719f0..a355cf6 100644
--- a/js/lib/ext.core.ExtensionHandler.js
+++ b/js/lib/ext.core.ExtensionHandler.js
@@ -13,10 +13,13 @@
                        (manager.env.conf.parsoid.apiURI !== null);
 
        // Native extension handlers
-       this.citeHandler = manager.env.conf.parsoid.nativeExtensions.cite;
+       var nativeExts = manager.env.conf.parsoid.nativeExtensions,
+               ref = nativeExts.ref,
+           references = nativeExts.references;
+
        this.nativeExtHandlers = {
-               "ref": this.citeHandler.handleRef.bind(this.citeHandler, 
manager),
-               "references": 
this.citeHandler.handleReferences.bind(this.citeHandler, manager)
+               "ref": ref.handleRef.bind(ref, manager, options),
+               "references": references.handleReferences.bind(references, 
manager, options)
        };
 
        // Extension content expansion
@@ -53,7 +56,7 @@
 /**
  * Fetch the preprocessed wikitext for an extension
  */
-ExtensionHandler.prototype.fetchExpandedExtension = function ( title, text, 
processor, parentCB, cb ) {
+ExtensionHandler.prototype.fetchExpandedExtension = function ( title, text, 
parentCB, cb ) {
        var env = this.manager.env;
        if ( ! env.conf.parsoid.expandExtensions ) {
                parentCB(  { tokens: [ 'Warning: Extension tag expansion 
disabled, and no cache for ' +
@@ -66,7 +69,7 @@
                //env.dp( 'requestQueue: ', env.requestQueue );
                if ( env.requestQueue[text] === undefined ) {
                        env.tp( 'Note: Starting new request for ' + text );
-                       env.requestQueue[text] = new processor( env, title, 
text );
+                       env.requestQueue[text] = new PHPParseRequest( env, 
title, text );
                }
                // append request, process in document order
                env.requestQueue[text].listeners( 'src' ).push( cb );
@@ -85,7 +88,6 @@
                this.fetchExpandedExtension(
                        extensionName,
                        token.getAttribute('source'),
-                       PHPParseRequest,
                        cb,
                        this.parseExtensionHTML.bind(this, token, cb)
                );
diff --git a/js/lib/ext.core.TemplateHandler.js 
b/js/lib/ext.core.TemplateHandler.js
index 8772a49..4557599 100644
--- a/js/lib/ext.core.TemplateHandler.js
+++ b/js/lib/ext.core.TemplateHandler.js
@@ -314,7 +314,7 @@
        }
 
        //console.log( "=================================");
-       //console.log( name );
+       //console.log( tplArgs.name );
        //console.log( "---------------------------------");
        //console.log( src );
 
diff --git a/js/lib/mediawiki.DOMPostProcessor.js 
b/js/lib/mediawiki.DOMPostProcessor.js
index 0e14a73..bb60526 100644
--- a/js/lib/mediawiki.DOMPostProcessor.js
+++ b/js/lib/mediawiki.DOMPostProcessor.js
@@ -2124,6 +2124,9 @@
        // Ex: {{compactTOC8|side=yes|seealso=yes}} generates a 
mw:PageProp/notoc meta
        // that gets the mw:Object/Template typeof attached to it.  It is not 
okay to
        // delete it!
+       //
+       // SSS FIXME: This strips out all "Ext/Ref/Content" meta-tags that the 
VE needs
+       // to regenerate references on demand.  To be fixed.
        var metaType = node.getAttribute("typeof");
        if (metaType
                && 
metaType.match(/^\bmw:(Object|EndTag|TSRMarker|Ext)\/?[^\s]*\b/)
@@ -2321,7 +2324,7 @@
        }
 
        // References
-       this.processors.push(generateReferences.bind(null, 
env.conf.parsoid.nativeExtensions.refs));
+       this.processors.push(generateReferences.bind(null, 
env.conf.parsoid.nativeExtensions.references));
 
        // DOM traverser for passes that can be combined and will run at the end
        // 1. Link prefixes and suffixes
@@ -2364,7 +2367,6 @@
        document.getElementsByTagName('head')[0].appendChild(baseMeta);
        this.emit( 'document', document );
 };
-
 
 /**
  * Register for the 'document' event, normally emitted from the HTML5 tree
diff --git a/js/lib/mediawiki.ParsoidConfig.js 
b/js/lib/mediawiki.ParsoidConfig.js
index f717d27..f4e3035 100644
--- a/js/lib/mediawiki.ParsoidConfig.js
+++ b/js/lib/mediawiki.ParsoidConfig.js
@@ -6,7 +6,7 @@
 
 var $ = require( './fakejquery' ),
        citejs = require('./ext.Cite.js'),
-       Cite = citejs.Cite,
+       Ref = citejs.Ref,
        References = citejs.References;
 
 var wikipedias = 
"en|de|fr|nl|it|pl|es|ru|ja|pt|zh|sv|vi|uk|ca|no|fi|cs|hu|ko|fa|id|tr|ro|ar|sk|eo|da|sr|lt|ms|eu|he|sl|bg|kk|vo|war|hr|hi|et|az|gl|simple|nn|la|th|el|new|roa-rup|oc|sh|ka|mk|tl|ht|pms|te|ta|be-x-old|ceb|br|be|lv|sq|jv|mg|cy|lb|mr|is|bs|yo|an|hy|fy|bpy|lmo|pnb|ml|sw|bn|io|af|gu|zh-yue|ne|nds|ku|ast|ur|scn|su|qu|diq|ba|tt|my|ga|cv|ia|nap|bat-smg|map-bms|wa|kn|als|am|bug|tg|gd|zh-min-nan|yi|vec|hif|sco|roa-tara|os|arz|nah|uz|sah|mn|sa|mzn|pam|hsb|mi|li|ky|si|co|gan|glk|ckb|bo|fo|bar|bcl|ilo|mrj|fiu-vro|nds-nl|tk|vls|se|gv|ps|rue|dv|nrm|pag|koi|pa|rm|km|kv|udm|csb|mhr|fur|mt|wuu|lij|ug|lad|pi|zea|sc|bh|zh-classical|nov|ksh|or|ang|kw|so|nv|xmf|stq|hak|ay|frp|frr|ext|szl|pcd|ie|gag|haw|xal|ln|rw|pdc|pfl|krc|crh|eml|ace|gn|to|ce|kl|arc|myv|dsb|vep|pap|bjn|as|tpi|lbe|wo|mdf|jbo|kab|av|sn|cbk-zam|ty|srn|kbd|lo|ab|lez|mwl|ltg|ig|na|kg|tet|za|kaa|nso|zu|rmy|cu|tn|chr|got|sm|bi|mo|bm|iu|chy|ik|pih|ss|sd|pnt|cdo|ee|ha|ti|bxr|om|ks|ts|ki|ve|sg|rn|dz|cr|lg|ak|tum|fj|st|tw|ch|ny|ff|xh|ng|ii|cho|mh|aa|kj|ho|mus|kr|hz";
@@ -48,8 +48,8 @@
        // SSS FIXME: Hardcoded right now, but need a generic registration 
mechanism
        // for native handlers
        this.nativeExtensions = {
-               cite: new Cite(),
-               refs: new References()
+               ref: new Ref(),
+               references: new References()
        };
 };
 
diff --git a/js/lib/mediawiki.parser.js b/js/lib/mediawiki.parser.js
index cb5c4e5..2723e13 100644
--- a/js/lib/mediawiki.parser.js
+++ b/js/lib/mediawiki.parser.js
@@ -194,6 +194,11 @@
                options.isExtension = false;
        }
 
+       // default: not in references tag
+       if (options.inReferencesTag === undefined) {
+               options.inReferencesTag = false;
+       }
+
        return options;
 };
 
@@ -276,7 +281,10 @@
                cacheType += '::inBlockToken';
        }
        if ( options.isExtension ) {
-               cacheType += '::isExtension';
+               cacheType += '::isExt';
+       }
+       if ( options.inReferencesTag ) {
+               cacheType += '::inRefs';
        }
        return cacheType;
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/55833
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2d243656e9e903d8dadb55ee7c0630824c65cc01
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to