jenkins-bot has submitted this change and it was merged.

Change subject: Allow extensions to handle specific contentmodels.
......................................................................


Allow extensions to handle specific contentmodels.

Some extensions (for example, Extension:ProofreadPage) do more than
register specific extension tags: they also hook the parser to declare
responsibility for a specific contentmodel (like "proofread-page" or "json").
These are
https://www.mediawiki.org/wiki/Category:ContentHandler_extensions
(as opposed to https://www.mediawiki.org/wiki/Category:Tag_extensions).
See https://www.mediawiki.org/wiki/Manual:ContentHandler for more
details.

We abstract the top-level parser entry points to allow dispatching
to extensions to parse alternative content models and add a
core extension as a demonstration that handles the "json"
content model, rendering it in DOM as an HTML table (as
the json content model in mediawiki core does).

Bug: T48580
Bug: T133320
Change-Id: I7ca31c99de8e04b1359bc521df121db0eb69e384
---
M bin/parse.js
M bin/parserTests.js
M bin/roundtrip-test.js
M lib/api/apiUtils.js
M lib/api/routes.js
M lib/config/MWParserEnvironment.js
M lib/config/ParsoidConfig.js
M lib/config/WikiConfig.js
M lib/config/extapi.js
A lib/ext/JSON/index.js
M lib/jsapi.js
M lib/utils/DOMUtils.js
M lib/wt2html/DOMPostProcessor.js
M package.json
M tests/mocha/api.js
M tests/mocha/parse.js
M tests/mocha/test.helpers.js
M tests/mockAPI.js
18 files changed, 544 insertions(+), 89 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/bin/parse.js b/bin/parse.js
index ac81cb9..39c939b 100755
--- a/bin/parse.js
+++ b/bin/parse.js
@@ -73,6 +73,11 @@
                'boolean': false,
                'default': ParserEnv.prototype.defaultPageName,
        },
+       'contentmodel': {
+               description: 'The content model of the input.  Defaults to 
"wikitext" but extensions may support others (for example, "json").',
+               'boolean': false,
+               'default': null,
+       },
        'oldid': {
                description: 'Oldid of the given page.',
                'boolean': false,
@@ -171,7 +176,9 @@
        if (pb) {
                DU.applyPageBundle(doc, pb);
        }
-       return DU.serializeDOM(env, doc.body, argv.selser).then(function(out) {
+       var handler = env.getContentHandler(argv.contentmodel);
+       return handler.fromHTML(env, doc.body, argv.selser)
+       .then(function(out) {
                if (argv.html2wt || argv.wt2wt) {
                        return { trailingNL: true, out: out, env: env };
                } else {
@@ -182,8 +189,8 @@
 
 startsAtWikitext = function(argv, env, input) {
        env.setPageSrcInfo(input);
-       // Kick off the pipeline by feeding the input into the parser pipeline
-       return env.pipelineFactory.parse(env.page.src)
+       var handler = env.getContentHandler(argv.contentmodel);
+       return handler.toHTML(env)
        .then(function(doc) {
                if (argv.lint) {
                        env.log("end/parse");
@@ -298,7 +305,12 @@
                        var target = env.normalizeAndResolvePageTitle();
                        return TemplateRequest
                                .setPageSrcInfo(env, target, argv.oldid)
-                               .then(function() { return env.page.src; });
+                               .then(function() {
+                                       // Preserve fetched contentmodel.
+                                       argv.contentmodel = argv.contentmodel ||
+                                               
env.page.meta.revision.contentmodel;
+                                       return env.page.src;
+                               });
                });
        }).then(function(str) {
                str = str.replace(/\r/g, '');
diff --git a/bin/parserTests.js b/bin/parserTests.js
index c0aaef4..f05c36f 100755
--- a/bin/parserTests.js
+++ b/bin/parserTests.js
@@ -21,6 +21,7 @@
 var yargs = require('yargs');
 var Alea = require('alea');
 var DU = require('../lib/utils/DOMUtils.js').DOMUtils;
+var Promise = require('../lib/utils/promise.js');
 var ParsoidLogger = require('../lib/logger/ParsoidLogger.js').ParsoidLogger;
 var PEG = require('pegjs');
 var Util = require('../lib/utils/Util.js').Util;
@@ -417,30 +418,27 @@
  * @param {string|null} processWikitextCB.res
  */
 ParserTests.prototype.convertHtml2Wt = function(options, mode, item, body, 
processWikitextCB) {
-       var startsAtWikitext = mode === 'wt2wt' || mode === 'wt2html' || mode 
=== 'selser';
        var self = this;
-       var cb = function(err, wt) {
-               self.env.setPageSrcInfo(null);
-               self.env.page.dom = null;
-               processWikitextCB(err, wt);
-       };
-       try {
+       return Promise.try(function() {
+               var startsAtWikitext = mode === 'wt2wt' || mode === 'wt2html' 
|| mode === 'selser';
                if (startsAtWikitext) {
                        // FIXME: All tests share an env.
                        // => we need to initialize this each time over here.
-                       this.env.page.dom = 
DU.parseHTML(item.cachedBODYstr).body;
+                       self.env.page.dom = 
DU.parseHTML(item.cachedBODYstr).body;
                }
                if (mode === 'selser') {
-                       this.env.setPageSrcInfo(item.wikitext);
+                       self.env.setPageSrcInfo(item.wikitext);
                } else if (booleanOption(options.use_source) && 
startsAtWikitext) {
-                       this.env.setPageSrcInfo(item.wikitext);
+                       self.env.setPageSrcInfo(item.wikitext);
                } else {
-                       this.env.setPageSrcInfo(null);
+                       self.env.setPageSrcInfo(null);
                }
-               DU.serializeDOM(this.env, body, (mode === 'selser'), cb);
-       } catch (err) {
-               cb(err, null);
-       }
+               var handler = self.env.getContentHandler();
+               return handler.fromHTML(self.env, body, (mode === 'selser'));
+       }).finally(function() {
+               self.env.setPageSrcInfo(null);
+               self.env.page.dom = null;
+       }).nodify(processWikitextCB);
 };
 
 /**
@@ -890,7 +888,7 @@
 ParserTests.prototype.convertWt2Html = function(mode, wikitext, processHtmlCB) 
{
        var env = this.env;
        env.setPageSrcInfo(wikitext);
-       env.pipelineFactory.parse(env.page.src)
+       env.getContentHandler().toHTML(env)
        .then(function(doc) {
                return doc.body;
        })
diff --git a/bin/roundtrip-test.js b/bin/roundtrip-test.js
index 77e5305..e408b2b 100755
--- a/bin/roundtrip-test.js
+++ b/bin/roundtrip-test.js
@@ -534,9 +534,10 @@
        var offsets = Diff.convertDiffToOffsetPairs(diff);
        if (!diff.length || !offsets.length) { return []; }
 
+       var contentmodel = data.contentmodel || 'wikitext';
        var options = Object.assign({
                wt2html: true,
-               data: { wikitext: data.newWt },
+               data: { wikitext: data.newWt, contentmodel: contentmodel },
        }, parsoidOptions);
        return parsoidPost(profile, options).then(function(body) {
                data.newHTML = body.html;
@@ -619,11 +620,12 @@
                // oldid for later use in selser.
                data.oldid = res.request.path.replace(/^(.*)\//, '');
                data.oldWt = body;
+               data.contentmodel = res.headers['x-contentmodel'] || 'wikitext';
                // First, fetch the HTML for the requested page's wikitext
                var opts = Object.assign({
                        wt2html: true,
                        recordSizes: true,
-                       data: { wikitext: data.oldWt },
+                       data: { wikitext: data.oldWt, contentmodel: 
data.contentmodel },
                }, parsoidOptions);
                return parsoidPost(profile, opts);
        }).then(function(body) {
@@ -636,6 +638,7 @@
                        recordSizes: true,
                        data: {
                                html: data.oldHTML.body,
+                               contentmodel: data.contentmodel,
                                original: {
                                        'data-parsoid': data.oldDp,
                                        'data-mw': data.oldMw,
@@ -662,6 +665,7 @@
                        oldid: data.oldid,
                        data: {
                                html: newDocument.outerHTML,
+                               contentmodel: data.contentmodel,
                                original: {
                                        'data-parsoid': data.oldDp,
                                        'data-mw': data.oldMw,
diff --git a/lib/api/apiUtils.js b/lib/api/apiUtils.js
index 175c375..e3d77e9 100644
--- a/lib/api/apiUtils.js
+++ b/lib/api/apiUtils.js
@@ -159,7 +159,8 @@
        // Re-parse the HTML to uncover foster-parenting issues
        doc = domino.createDocument(doc.outerHTML);
 
-       return DU.serializeDOM(env, doc.body, useSelser).then(function(out) {
+       var handler = env.getContentHandler();
+       return handler.fromHTML(env, doc.body, useSelser).then(function(out) {
                // Strip selser trigger comment
                out = out.replace(/<!--rtSelserEditTestComment-->\n*$/, '');
 
@@ -494,6 +495,7 @@
 apiUtils.wt2htmlRes = function(env, res, html, pb) {
        if (env.pageBundle) {
                var response = {
+                       contentmodel: env.page.meta.revision.contentmodel,
                        html: {
                                headers: { 'content-type': 
apiUtils.htmlContentType(env) },
                                body: html,
diff --git a/lib/api/routes.js b/lib/api/routes.js
index 92fbad5..9a56651 100644
--- a/lib/api/routes.js
+++ b/lib/api/routes.js
@@ -320,7 +320,7 @@
 
                return TemplateRequest.setPageSrcInfo(env, target, 
oldid).then(function() {
                        env.log('info', 'started parsing');
-                       return env.pipelineFactory.parse(env.page.src);
+                       return env.getContentHandler().toHTML(env);
                })
                .then(apiUtils.roundTripDiff.bind(null, env, req, res, false))
                // .timeout(REQ_TIMEOUT)
@@ -348,7 +348,7 @@
 
                return TemplateRequest.setPageSrcInfo(env, target, 
oldid).then(function() {
                        env.log('info', 'started parsing');
-                       return env.pipelineFactory.parse(env.page.src);
+                       return env.getContentHandler().toHTML(env);
                }).then(function(doc) {
                        // strip newlines from the html
                        var html = doc.innerHTML.replace(/[\r\n]/g, '');
@@ -378,7 +378,7 @@
 
                return TemplateRequest.setPageSrcInfo(env, target, 
oldid).then(function() {
                        env.log('info', 'started parsing');
-                       return env.pipelineFactory.parse(env.page.src);
+                       return env.getContentHandler().toHTML(env);
                }).then(function(doc) {
                        doc = DU.parseHTML(DU.toXML(doc));
                        var comment = 
doc.createComment('rtSelserEditTestComment');
@@ -412,7 +412,7 @@
                env.setPageSrcInfo(req.body.content);
                env.log('info', 'started parsing');
 
-               return env.pipelineFactory.parse(env.page.src)
+               return env.getContentHandler().toHTML(env)
                .then(apiUtils.roundTripDiff.bind(null, env, req, res, false))
                .then(apiUtils.rtResponse.bind(null, env, req, res))
                .catch(function(err) {
@@ -426,6 +426,7 @@
 
        var wt2html = Promise.method(function(req, res, wt) {
                var env = res.locals.env;
+               var opts = res.locals.opts;
                var oldid = res.locals.oldid;
                var target = env.normalizeAndResolvePageTitle();
 
@@ -475,6 +476,9 @@
                        var p2;
                        if (typeof wikitext === 'string') {
                                env.setPageSrcInfo(wikitext);
+                               if (opts.contentmodel) {
+                                       env.page.meta.revision.contentmodel = 
opts.contentmodel;
+                               }
 
                                // Don't cache requests when wt is set in case 
somebody uses
                                // GET for wikitext parsing
@@ -492,7 +496,7 @@
                                        env.page.name = '';
                                }
 
-                               p2 = env.pipelineFactory.parse(env.page.src);
+                               p2 = env.getContentHandler().toHTML(env);
                        } else if (oldid) {
                                // Indicate the MediaWiki revision in a header 
as well for
                                // ease of extraction in clients.
@@ -505,7 +509,7 @@
                                        
metrics.timing('wt2html.pageWithOldid.size.input', env.page.src.length);
                                }
 
-                               p2 = env.pipelineFactory.parse(env.page.src)
+                               p2 = env.getContentHandler().toHTML(env)
                                .tap(function() {
                                        if (req.headers.cookie) {
                                                // Don't cache requests with a 
session.
@@ -560,6 +564,10 @@
 
                env.page.reset();
                env.page.meta.revision.revid = res.locals.oldid;
+               env.page.meta.revision.contentmodel =
+                       opts.contentmodel ||
+                       (opts.original && opts.original.contentmodel) ||
+                       env.page.meta.revision.contentmodel;
 
                env.bumpSerializerResourceUse('htmlSize', html.length);
                env.log('info', 'started serializing');
@@ -660,7 +668,8 @@
                var hasOldId = !!env.page.meta.revision.revid;
                var useSelser = hasOldId && env.conf.parsoid.useSelser;
 
-               return DU.serializeDOM(env, doc.body, useSelser)
+               var handler = env.getContentHandler();
+               return handler.fromHTML(env, doc.body, useSelser)
                // .timeout(REQ_TIMEOUT)
                .then(function(output) {
                        if (metrics) {
@@ -687,6 +696,15 @@
                if (env.originalVersion === null) {
                        return apiUtils.fatalRequest(env, 'Content-type of 
revision html is missing.', 400);
                }
+
+               // Set the contentmodel here for downgrades.
+               // Reuse will overwrite it when setting the src.
+               if (!env.page.meta) {
+                       env.page.meta = { revision: {} };
+               }
+               env.page.meta.revision.contentmodel =
+                       (revision && revision.contentmodel) ||
+                       env.page.meta.revision.contentmodel;
 
                // Downgrade (2 -> 1)
                if (revision === opts.original &&  // Maybe provide a stronger 
assertion.
@@ -716,6 +734,9 @@
                                        return apiUtils.redirectToOldid(req, 
res);
                                }
                                apiUtils.setHeader(res, env, 'content-type', 
apiUtils.wikitextContentType(env));
+                               if (env.page.meta && env.page.meta.revision && 
env.page.meta.revision.contentmodel) {
+                                       apiUtils.setHeader(res, env, 
'x-contentmodel', env.page.meta.revision.contentmodel);
+                               }
                                apiUtils.sendResponse(res, env, env.page.src);
                        });
                } else {
diff --git a/lib/config/MWParserEnvironment.js 
b/lib/config/MWParserEnvironment.js
index d25f2e7..dfccf46 100644
--- a/lib/config/MWParserEnvironment.js
+++ b/lib/config/MWParserEnvironment.js
@@ -322,7 +322,7 @@
  * @param {String|Object} srcOrMetadata page source or metadata
  */
 MWParserEnvironment.prototype.setPageSrcInfo = function(srcOrMetadata) {
-       if (typeof (srcOrMetadata) === 'string' || srcOrMetadata === null) {
+       if (typeof srcOrMetadata === 'string' || !srcOrMetadata) {
                this.page.reset();
                this.page.src = srcOrMetadata || '';
                return;
@@ -773,6 +773,27 @@
        }
 };
 
+/**
+ * @method
+ *
+ * Get an appropriate content handler, given a contentmodel.
+ *
+ * @param {String} [forceContentModel] An optional content model
+ *   which will override whatever the source specifies.
+ * @return {Object} An appropriate content handler with `toHTML` and `fromHTML`
+ *   methods.
+ */
+MWParserEnvironment.prototype.getContentHandler = function(forceContentModel) {
+       var contentmodel = forceContentModel ||
+                       this.page.meta.revision.contentmodel ||
+                       'wikitext';
+       if (!this.conf.wiki.extContentModel.has(contentmodel)) {
+               this.log('error', 'Unknown contentmodel', contentmodel);
+               contentmodel = 'wikitext';
+       }
+       return this.conf.wiki.extContentModel.get(contentmodel);
+};
+
 
 if (typeof module === "object") {
        module.exports.MWParserEnvironment = MWParserEnvironment;
diff --git a/lib/config/ParsoidConfig.js b/lib/config/ParsoidConfig.js
index 337923c..10a5df5 100644
--- a/lib/config/ParsoidConfig.js
+++ b/lib/config/ParsoidConfig.js
@@ -464,6 +464,9 @@
        // Give them some default extensions.
        if (!Array.isArray(apiConf.extensions)) {
                // Native support for certain extensions (Cite, etc)
+               // Note that in order to remain compatible with mediawiki core,
+               // core extensions (for example, for the JSON content model)
+               // must take precedence over other extensions.
                apiConf.extensions = Util.clone(this.defaultNativeExtensions);
                /* Include global user extensions */
                ParsoidConfig._collectExtensions(
@@ -578,7 +581,11 @@
        try {
                if (!fs.statSync(base).isDirectory()) { return; /* not dir */}
        } catch (e) { return; /* no file there */ }
-       fs.readdirSync(base).forEach(function(d) {
+       var files = fs.readdirSync(base);
+       // Sort! To ensure that we have a repeatable order in which we load
+       // and process extensions.
+       files.sort();
+       files.forEach(function(d) {
                var p = isNative ? path.join(base, d) : path.join(base, d, 
'parsoid');
                try {
                        if (!fs.statSync(p).isDirectory()) { return; /* not dir 
*/ }
diff --git a/lib/config/WikiConfig.js b/lib/config/WikiConfig.js
index 06cb9d0..905d89d 100644
--- a/lib/config/WikiConfig.js
+++ b/lib/config/WikiConfig.js
@@ -7,6 +7,7 @@
 var semver = require('semver');
 var baseConfig = require('./baseconfig/enwiki.json').query;
 var JSUtils = require('../utils/jsutils.js').JSUtils;
+var DU = require('../utils/DOMUtils.js').DOMUtils;
 var Util = require('../utils/Util.js').Util;
 
 // Make sure our base config is never modified
@@ -498,6 +499,17 @@
        // Register native extension handlers second to overwrite the above.
        this.extensionPostProcessors = [];
        this.extensionStyles = new Set();
+       this.extContentModel = new Map();
+       this.extContentModel.set('wikitext', {
+               toHTML: function(env) {
+                       // Default: wikitext parser.
+                       return env.pipelineFactory.parse(env.page.src);
+               },
+               fromHTML: function(env, body, useSelser) {
+                       // Default: wikitext serializer.
+                       return DU.serializeDOM(env, body, useSelser);
+               },
+       });
        mwApiConf.extensions.forEach(function(Ext) {
                var ext = new Ext();
                var tags = ext.config.hasOwnProperty('tags') ? ext.config.tags 
: [];
@@ -515,6 +527,12 @@
                                this.extensionStyles.add(s);
                        }, this);
                }
+               Object.keys(ext.config.contentmodels || 
{}).forEach(function(cm) {
+                       // For compatibility with mediawiki core, the first
+                       // registered extension wins.
+                       if (this.extContentModel.has(cm)) { return; }
+                       this.extContentModel.set(cm, 
ext.config.contentmodels[cm]);
+               }, this);
        }, this);
 
        // Function hooks on this wiki, indexed by their normalized form
diff --git a/lib/config/extapi.js b/lib/config/extapi.js
index 2f5a4f7..02f3dbc 100644
--- a/lib/config/extapi.js
+++ b/lib/config/extapi.js
@@ -30,6 +30,7 @@
                        // functions are changed.
                        Util: require('../utils/Util.js').Util,
                        DOMUtils: require('../utils/DOMUtils.js').DOMUtils,
+                       addMetaData: 
require('../wt2html/DOMPostProcessor.js').DOMPostProcessor.addMetaData,
                        defines: require('../wt2html/parser.defines.js'),
                };
        },
diff --git a/lib/ext/JSON/index.js b/lib/ext/JSON/index.js
new file mode 100644
index 0000000..37797ae
--- /dev/null
+++ b/lib/ext/JSON/index.js
@@ -0,0 +1,246 @@
+/* ----------------------------------------------------------------------
+ * This is a demonstration of content model handling in extensions for
+ * Parsoid.  It implements the "json" content model, to allow editing
+ * JSON data structures using Visual Editor.  It represents the JSON
+ * structure as a nested table.
+ * ---------------------------------------------------------------------- */
+'use strict';
+
+var ParsoidExtApi = 
module.parent.require('./extapi.js').versionCheck('^0.5.1');
+var DU = ParsoidExtApi.DOMUtils;
+var Promise = ParsoidExtApi.Promise;
+var addMetaData = ParsoidExtApi.addMetaData;
+
+/**
+ * Native Parsoid implementation of the "json" contentmodel.
+ */
+var JSONExt = function() {
+       this.config = {
+               contentmodels: {
+                       json: this,
+               },
+       };
+};
+
+var PARSE_ERROR_HTML =
+       '<!DOCTYPE html><html>' +
+       '<body>' +
+       '<table data-mw=\'{"errors":[{"key":"bad-json"}]}\' typeof="mw:Error">' 
+
+       '</body>';
+
+// JSON to HTML
+// Implementation matches that from includes/content/JsonContent.php in
+// mediawiki core, except that we add some additional classes to distinguish
+// value types.
+JSONExt.prototype.toHTML = Promise.method(function(env) {
+       var document = DU.parseHTML('<!DOCTYPE html><html><body>');
+       var rootValueTable;
+       var objectTable;
+       var objectRow;
+       var arrayTable;
+       var valueCell;
+       var primitiveValue;
+       var src;
+
+       rootValueTable = function(parent, val) {
+               if (Array.isArray(val)) {
+                       // Wrap arrays in another array so they're visually 
boxed in a
+                       // container.  Otherwise they are visually 
indistinguishable from
+                       // a single value.
+                       return arrayTable(parent, [ val ]);
+               }
+               if (val && typeof val === "object") {
+                       return objectTable(parent, val);
+               }
+               parent.innerHTML =
+                       '<table class="mw-json 
mw-json-single-value"><tbody><tr><td>';
+               return primitiveValue(parent.querySelector('td'), val);
+       };
+       objectTable = function(parent, val) {
+               parent.innerHTML = '<table class="mw-json 
mw-json-object"><tbody>';
+               var tbody = parent.firstElementChild.firstElementChild;
+               var keys = Object.keys(val);
+               if (keys.length) {
+                       keys.forEach(function(k) {
+                               objectRow(tbody, k, val[k]);
+                       });
+               } else {
+                       tbody.innerHTML =
+                               '<tr><td class="mw-json-empty">';
+               }
+       };
+       objectRow = function(parent, key, val) {
+               var tr = document.createElement('tr');
+               if (key !== undefined) {
+                       var th = document.createElement('th');
+                       th.textContent = key;
+                       tr.appendChild(th);
+               }
+               valueCell(tr, val);
+               parent.appendChild(tr);
+       };
+       arrayTable = function(parent, val) {
+               parent.innerHTML = '<table class="mw-json 
mw-json-array"><tbody>';
+               var tbody = parent.firstElementChild.firstElementChild;
+               if (val.length) {
+                       for (var i = 0; i < val.length; i++) {
+                               objectRow(tbody, undefined, val[i]);
+                       }
+               } else {
+                       tbody.innerHTML =
+                               '<tr><td class="mw-json-empty">';
+               }
+       };
+       valueCell = function(parent, val) {
+               var td = document.createElement('td');
+               if (Array.isArray(val)) {
+                       arrayTable(td, val);
+               } else if (val && typeof val === 'object') {
+                       objectTable(td, val);
+               } else {
+                       td.classList.add('value');
+                       primitiveValue(td, val);
+               }
+               parent.appendChild(td);
+       };
+       primitiveValue = function(parent, val) {
+               if (val === null) {
+                       parent.classList.add('mw-json-null');
+               } else if (val === true || val === false) {
+                       parent.classList.add('mw-json-boolean');
+               } else if (typeof val === 'number') {
+                       parent.classList.add('mw-json-number');
+               } else if (typeof val === 'string') {
+                       parent.classList.add('mw-json-string');
+               }
+               parent.textContent = '' + val;
+       };
+
+       try {
+               src = JSON.parse(env.page.src);
+               rootValueTable(document.body, src);
+       } catch (e) {
+               document = DU.parseHTML(PARSE_ERROR_HTML);
+       }
+       // We're responsible for running the standard DOMPostProcessor on our
+       // resulting document.
+       if (env.pageBundle) {
+               DU.setDataParsoid(document, {
+                       pagebundle: {
+                               parsoid: { counter: -1, ids: {} },
+                               mw: { ids: {} },
+                       },
+               });
+               DU.visitDOM(document.body, DU.storeDataAttribs, {
+                       storeInPageBundle: env.pageBundle,
+                       env: env,
+               });
+       }
+       addMetaData(env, document);
+       return document;
+});
+
+// HTML to JSON
+JSONExt.prototype.fromHTML = Promise.method(function(env, body, useSelser) {
+       var rootValueTable;
+       var objectTable;
+       var objectRow;
+       var arrayTable;
+       var valueCell;
+       var primitiveValue;
+
+       console.assert(DU.isBody(body), 'Expected a body node.');
+
+       rootValueTable = function(el) {
+               if (el.classList.contains('mw-json-single-value')) {
+                       return primitiveValue(el.querySelector('tr > td'));
+               } else if (el.classList.contains('mw-json-array')) {
+                       return arrayTable(el)[0];
+               } else {
+                       return objectTable(el);
+               }
+       };
+       objectTable = function(el) {
+               console.assert(el.classList.contains('mw-json-object'));
+               var tbody = el;
+               if (
+                       tbody.firstElementChild &&
+                       tbody.firstElementChild.tagName === 'TBODY'
+               ) {
+                       tbody = tbody.firstElementChild;
+               }
+               var rows = tbody.children;
+               var obj = {};
+               var empty = rows.length === 0 || (
+                       rows[0].firstElementChild &&
+                       
rows[0].firstElementChild.classList.contains('mw-json-empty')
+               );
+               if (!empty) {
+                       for (var i = 0; i < rows.length; i++) {
+                               objectRow(rows[i], obj, undefined);
+                       }
+               }
+               return obj;
+       };
+       objectRow = function(tr, obj, key) {
+               var td = tr.firstElementChild;
+               if (key === undefined) {
+                       key = td.textContent;
+                       td = td.nextElementSibling;
+               }
+               obj[key] = valueCell(td);
+       };
+       arrayTable = function(el) {
+               console.assert(el.classList.contains('mw-json-array'));
+               var tbody = el;
+               if (
+                       tbody.firstElementChild &&
+                       tbody.firstElementChild.tagName === 'TBODY'
+               ) {
+                       tbody = tbody.firstElementChild;
+               }
+               var rows = tbody.children;
+               var arr = [];
+               var empty = rows.length === 0 || (
+                       rows[0].firstElementChild &&
+                       
rows[0].firstElementChild.classList.contains('mw-json-empty')
+               );
+               if (!empty) {
+                       for (var i = 0; i < rows.length; i++) {
+                               objectRow(rows[i], arr, i);
+                       }
+               }
+               return arr;
+       };
+       valueCell = function(el) {
+               console.assert(el.tagName === 'TD');
+               var table = el.firstElementChild;
+               if (table && table.classList.contains('mw-json-array')) {
+                       return arrayTable(table);
+               } else if (table && table.classList.contains('mw-json-object')) 
{
+                       return objectTable(table);
+               } else {
+                       return primitiveValue(el);
+               }
+       };
+       primitiveValue = function(el) {
+               if (el.classList.contains('mw-json-null')) {
+                       return null;
+               } else if (el.classList.contains('mw-json-boolean')) {
+                       return /true/.test(el.textContent);
+               } else if (el.classList.contains('mw-json-number')) {
+                       return +el.textContent;
+               } else if (el.classList.contains('mw-json-string')) {
+                       return '' + el.textContent;
+               } else {
+                       return undefined; // shouldn't happen.
+               }
+       };
+       var table = body.firstElementChild;
+       console.assert(table && table.tagName === 'TABLE');
+       return JSON.stringify(rootValueTable(table), null, 4);
+});
+
+if (typeof module === "object") {
+       module.exports = JSONExt;
+}
diff --git a/lib/jsapi.js b/lib/jsapi.js
index 0888f32..4c8d166 100644
--- a/lib/jsapi.js
+++ b/lib/jsapi.js
@@ -34,7 +34,7 @@
                        body.appendChild(nodes[i].cloneNode(true));
                }
        }
-       return DU.serializeDOM(env, body, false);
+       return env.getContentHandler().fromHTML(env, body, false);
 };
 
 // toString helper
diff --git a/lib/utils/DOMUtils.js b/lib/utils/DOMUtils.js
index c2fbd0e..d44c928 100644
--- a/lib/utils/DOMUtils.js
+++ b/lib/utils/DOMUtils.js
@@ -2642,7 +2642,59 @@
 /**
  * @method
  *
- * The main serializer handler.
+ * Fetch prior DOM for selser.  This is factored out of
+ * DU.serializeDOM so that it can be reused by alternative
+ * content handlers which support selser.
+ *
+ * @param {Object} env The environment.
+ * @param {Boolean} useSelser Use the selective serializer, or not.
+ * @return {Promise} a promise that is resolved after selser information
+ *   has been loaded.
+ */
+DOMUtils.fetchSelser = function(env, useSelser) {
+       var hasOldId = !!env.page.meta.revision.revid;
+       var needsContent = useSelser && hasOldId && (env.page.src === null);
+       var needsOldDOM = useSelser && !(env.page.dom || env.page.domdiff);
+
+       var p = Promise.resolve();
+       if (needsContent) {
+               p = p.then(function() {
+                       var target = env.normalizeAndResolvePageTitle();
+                       return TemplateRequest.setPageSrcInfo(
+                               env, target, env.page.meta.revision.revid
+                       ).catch(function(err) {
+                               env.log('error', 'Error while fetching page 
source.');
+                       });
+               });
+       }
+       if (needsOldDOM) {
+               p = p.then(function() {
+                       if (env.page.src === null) {
+                               // The src fetch failed or we never had an 
oldid.
+                               // We'll just fallback to non-selser.
+                               return;
+                       }
+                       return env.getContentHandler().toHTML(env)
+                       .then(function(doc) {
+                               env.page.dom = DU.parseHTML(DU.toXML(doc)).body;
+                       })
+                       .catch(function(err) {
+                               env.log('error', 'Error while parsing original 
DOM.');
+                       });
+               });
+       }
+
+       return p;
+};
+
+/**
+ * @method
+ *
+ * The main serializer from DOM to *wikitext*.
+ *
+ * If you could be handling non-wikitext content, use
+ * `env.getContentHandler().fromHTML(env, body, useSelser)` instead.
+ * See {@link MWParserEnvironment#getContentHandler}.
  *
  * @param {Object} env The environment.
  * @param {Node} body The document body to serialize.
@@ -2657,52 +2709,9 @@
                SelectiveSerializer = 
require('../html2wt/SelectiveSerializer.js')
                        .SelectiveSerializer;
        }
-
        console.assert(DU.isBody(body), 'Expected a body node.');
 
-       var hasOldId = !!env.page.meta.revision.revid;
-       var needsWt = useSelser && hasOldId && (env.page.src === null);
-       var needsOldDOM = useSelser && !(env.page.dom || env.page.domdiff);
-
-       var steps = [];
-       if (needsWt) {
-               steps.push(function() {
-                       var target = env.normalizeAndResolvePageTitle();
-                       return TemplateRequest.setPageSrcInfo(
-                               env, target, env.page.meta.revision.revid
-                       ).catch(function(err) {
-                               env.log('error', 'Error while fetching page 
source.');
-                       });
-               });
-       }
-       if (needsOldDOM) {
-               steps.push(function() {
-                       if (env.page.src === null) {
-                               // The src fetch failed or we never had an 
oldid.
-                               // We'll just fallback to non-selser.
-                               return;
-                       }
-                       return env.pipelineFactory.parse(env.page.src)
-                       .then(function(doc) {
-                               env.page.dom = DU.parseHTML(DU.toXML(doc)).body;
-                       })
-                       .catch(function(err) {
-                               env.log('error', 'Error while parsing original 
DOM.');
-                       });
-               });
-       }
-
-       // If we can, perform these steps in parallel (w/ map).
-       var p;
-       if (!useSelser) {
-               p = Promise.resolve();
-       } else {
-               p = Promise.reduce(steps, function(prev, func) {
-                       return func();
-               }, null);
-       }
-
-       return p.then(function() {
+       return DOMUtils.fetchSelser(env, useSelser).then(function() {
                var Serializer = useSelser ? SelectiveSerializer : 
WikitextSerializer;
                var serializer = new Serializer({ env: env });
                // TODO(arlolra): There's probably an opportunity to refactor 
callers
diff --git a/lib/wt2html/DOMPostProcessor.js b/lib/wt2html/DOMPostProcessor.js
index 08d5180..35f7d58 100644
--- a/lib/wt2html/DOMPostProcessor.js
+++ b/lib/wt2html/DOMPostProcessor.js
@@ -187,7 +187,7 @@
 
 DOMPostProcessor.prototype.resetState = function(opts) {
        this.atTopLevel = opts && opts.toplevel;
-       this.displayTitle = null;
+       this.env.page.meta.displayTitle = null;
 };
 
 /**
@@ -206,7 +206,7 @@
                // Set title to display when present (last one wins).
                if (DU.hasNodeName(node, "meta") &&
                                node.getAttribute("property") === 
"mw:PageProp/displaytitle") {
-                       this.displayTitle = node.getAttribute("content");
+                       env.page.meta.displayTitle = 
node.getAttribute("content");
                }
        } else if (DU.isComment(node) && /^\{[^]+\}$/.test(node.data)) {
                // Convert serialized meta tags back from comments.
@@ -239,9 +239,8 @@
        return true;
 };
 
-DOMPostProcessor.prototype.addMetaData = function(document) {
-       var env = this.env;
-
+// FIXME: consider moving to DOMUtils or MWParserEnvironment.
+DOMPostProcessor.addMetaData = function(env, document) {
        // add <head> element if it was missing
        if (!document.head) {
                document.documentElement.
@@ -320,7 +319,7 @@
        appendToHead(document, 'link',
                { rel: 'dc:isVersionOf', href: wikiPageUrl });
 
-       document.title = this.displayTitle || env.page.meta.title || '';
+       document.title = env.page.meta.displayTitle || env.page.meta.title || 
'';
 
        // Add base href pointing to the wiki root
        appendToHead(document, 'base', { href: env.conf.wiki.baseURI });
@@ -441,11 +440,10 @@
        // For sub-pipeline documents, we are done.
        // For the top-level document, we generate <head> and add it.
        if (this.atTopLevel) {
-               this.addMetaData(document);
+               DOMPostProcessor.addMetaData(env, document);
                if (psd.traceFlags && psd.traceFlags.indexOf('time') !== -1) {
                        env.printTimeProfile();
                }
-
                if (psd.dumpFlags && psd.dumpFlags.indexOf('wt2html:limits') 
!== -1) {
                        env.printParserResourceUsage({'HTML Size': 
document.outerHTML.length});
                }
diff --git a/package.json b/package.json
index ea71182..8b7c07b 100644
--- a/package.json
+++ b/package.json
@@ -57,7 +57,7 @@
     "dump-tokenizer": "node lib/wt2html/tokenizer.js",
     "mocha": "mocha --opts tests/mocha/mocha.opts tests/mocha",
     "parserTests": "node bin/parserTests.js --wt2html --wt2wt --html2wt 
--html2html --selser --no-color --quiet --blacklist",
-    "roundtrip": "node bin/roundtrip-test.js -c 'Barack Obama' && node 
bin/roundtrip-test.js -c --prefix frwiki Chope && node bin/roundtrip-test.js -c 
--xml Parkour",
+    "roundtrip": "node bin/roundtrip-test.js -c 'Barack Obama' && node 
bin/roundtrip-test.js -c --prefix frwiki Chope && node bin/roundtrip-test.js -c 
--xml Parkour && node bin/roundtrip-test.js -c --domain www.mediawiki.org 
--oldid 2170316 'User:Legoktm/test_this_is_json'",
     "toolcheck": "bin/toolcheck.sh",
     "test": "npm run nsp && npm run lint && npm run parserTests && npm run 
mocha",
     "cover-mocha": "istanbul cover _mocha --dir ./coverage/mocha --  --opts 
tests/mocha/mocha.opts tests/mocha",
diff --git a/tests/mocha/api.js b/tests/mocha/api.js
index 6af99b5..e3f6d55 100644
--- a/tests/mocha/api.js
+++ b/tests/mocha/api.js
@@ -416,10 +416,28 @@
                        .end(done);
                });
 
+               it('should get from a title and revision (html, json content)', 
function(done) {
+                       request(api)
+                       .get(mockDomain + '/v3/page/html/JSON_Page/101')
+                       .expect(validHtmlResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                       }))
+                       .end(done);
+               });
+
                it('should get from a title and revision (pagebundle)', 
function(done) {
                        request(api)
                        .get(mockDomain + '/v3/page/pagebundle/Main_Page/1')
                        .expect(validPageBundleResponse())
+                       .end(done);
+               });
+
+               it('should get from a title and revision (pagebundle, json 
content)', function(done) {
+                       request(api)
+                       .get(mockDomain + '/v3/page/pagebundle/JSON_Page/101')
+                       .expect(validPageBundleResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                       }))
                        .end(done);
                });
 
@@ -442,6 +460,19 @@
                        .end(done);
                });
 
+               it('should accept json contentmodel as a string for html', 
function(done) {
+                       request(api)
+                       .post(mockDomain + '/v3/transform/wikitext/to/html/')
+                       .send({
+                               wikitext: '{"1":2}',
+                               contentmodel: 'json',
+                       })
+                       .expect(validHtmlResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                       }))
+                       .end(done);
+               });
+
                it('should accept wikitext as a string for pagebundle', 
function(done) {
                        request(api)
                        .post(mockDomain + 
'/v3/transform/wikitext/to/pagebundle/')
@@ -450,6 +481,20 @@
                        })
                        .expect(validPageBundleResponse(function(doc) {
                                doc.body.firstChild.nodeName.should.equal('H2');
+                       }))
+                       .end(done);
+               });
+
+               it('should accept json contentmodel as a string for 
pagebundle', function(done) {
+                       request(api)
+                       .post(mockDomain + 
'/v3/transform/wikitext/to/pagebundle/')
+                       .send({
+                               wikitext: '{"1":2}',
+                               contentmodel: 'json',
+                       })
+                       .expect(validPageBundleResponse(function(doc) {
+                               
doc.body.firstChild.nodeName.should.equal('TABLE');
+                               
should.not.exist(doc.querySelector('*[typeof="mw:Error"]'));
                        }))
                        .end(done);
                });
@@ -753,6 +798,17 @@
                        .end(done);
                });
 
+               it('should accept html for json contentmodel as a string', 
function(done) {
+                       request(api)
+                       .post(mockDomain + '/v3/transform/html/to/wikitext/')
+                       .send({
+                               html: '<!DOCTYPE html>\n<html prefix="dc: 
http://purl.org/dc/terms/ mw: http://mediawiki.org/rdf/";><head prefix="mwr: 
http://en.wikipedia.org/wiki/Special:Redirect/";><meta charset="utf-8"/><meta 
property="mw:articleNamespace" content="0"/><meta property="mw:html:version" 
content="1.2.1"/><meta property="mw:data-parsoid:version" 
content="0.0.2"/><link rel="dc:isVersionOf" 
href="//en.wikipedia.org/wiki/Main_Page"/><title></title><base 
href="//en.wikipedia.org/wiki/"/><link rel="stylesheet" 
href="//en.wikipedia.org/w/load.php?modules=mediawiki.legacy.commonPrint,shared|mediawiki.skinning.elements|mediawiki.skinning.content|mediawiki.skinning.interface|skins.vector.styles|site|mediawiki.skinning.content.parsoid|ext.cite.style&amp;only=styles&amp;skin=vector"/></head><body
 lang="en" class="mw-content-ltr sitedir-ltr ltr mw-body mw-body-content 
mediawiki" dir="ltr"><table class="mw-json 
mw-json-object"><tbody><tr><th>a</th><td class="value 
mw-json-number">4</td></tr><tr><th>b</th><td class="value 
mw-json-number">3</td></tr></tbody></table></body></html>',
+                               contentmodel: 'json',
+                       })
+                       .expect(validWikitextResponse('{\n    "a": 4,\n    "b": 
3\n}'))
+                       .end(done);
+               });
+
                it('should accept html with headers', function(done) {
                        request(api)
                        .post(mockDomain + '/v3/transform/html/to/wikitext/')
diff --git a/tests/mocha/parse.js b/tests/mocha/parse.js
index fd82f8e..a65f2f5 100644
--- a/tests/mocha/parse.js
+++ b/tests/mocha/parse.js
@@ -47,6 +47,36 @@
                        });
                });
 
+               it('should support json contentmodel', function() {
+                       var opts = { contentmodel: 'json' };
+                       var testval = {a: "a", b: [2, true, ""], c: null};
+                       return parse(JSON.stringify(testval), 
opts).then(function(doc) {
+                               doc.should.have.property('nodeName', 
'#document');
+                               doc.outerHTML.startsWith('<!DOCTYPE 
html><html').should.equal(true);
+                               
doc.outerHTML.endsWith('</body></html>').should.equal(true);
+                               // verify that body has only one <html> tag, 
one <body> tag, etc.
+                               doc.childNodes.length.should.equal(2);// 
<!DOCTYPE> and <html>
+                               doc.firstChild.nodeName.should.equal('html');
+                               doc.lastChild.nodeName.should.equal('HTML');
+                               // <html> children should be <head> and <body>
+                               var html = doc.documentElement;
+                               html.childNodes.length.should.equal(2);
+                               html.firstChild.nodeName.should.equal('HEAD');
+                               html.lastChild.nodeName.should.equal('BODY');
+                               // <body> should have one child, <table>
+                               var body = doc.body;
+                               body.childElementCount.should.equal(1);
+                               
body.firstElementChild.nodeName.should.equal('TABLE');
+                               var table = doc.body.firstElementChild;
+                               
table.classList.contains('mw-json').should.equal(true);
+                               // Now convert back to JSON
+                               return serialize(doc, null, opts);
+                       }).then(function(result) {
+                               var v = JSON.parse(result); // shouldn't throw 
an error!
+                               v.should.eql(testval);
+                       });
+               });
+
                ['no subpages', 'subpages'].forEach(function(desc, subpages) {
                        describe('should handle page titles with embedded ? (' 
+ desc + ')', function() {
                                var linktests = [
diff --git a/tests/mocha/test.helpers.js b/tests/mocha/test.helpers.js
index 7838bc3..b028c2f 100644
--- a/tests/mocha/test.helpers.js
+++ b/tests/mocha/test.helpers.js
@@ -12,7 +12,10 @@
                        env = options.tweakEnv(env) || env;
                }
                env.setPageSrcInfo(src);
-               return env.pipelineFactory.parse(env.page.src)
+               if (options.contentmodel) {
+                       env.page.meta.revision.contentmodel = 
options.contentmodel;
+               }
+               return env.getContentHandler().toHTML(env)
                .then(function(doc) {
                        // linter tests need the env object
                        return { env: env, doc: doc };
@@ -30,11 +33,17 @@
                if (options.tweakEnv) {
                        env = options.tweakEnv(env) || env;
                }
+               if (!env.page.meta) {
+                       env.page.meta = { revision: {} };
+               }
+               if (options.contentmodel) {
+                       env.page.meta.revision.contentmodel = 
options.contentmodel;
+               }
                pb = pb || DU.extractPageBundle(doc);
                if (pb) {
                        DU.applyPageBundle(doc, pb);
                }
-               return DU.serializeDOM(env, doc.body, false);
+               return env.getContentHandler().fromHTML(env, doc.body, false);
        });
 };
 
diff --git a/tests/mockAPI.js b/tests/mockAPI.js
index 3729893..55b4ba1 100644
--- a/tests/mockAPI.js
+++ b/tests/mockAPI.js
@@ -151,6 +151,27 @@
        },
 };
 
+var jsonPage = {
+       query: {
+               pages: {
+                       '101': {
+                               pageid: 101,
+                               ns: 0,
+                               title: 'JSON_Page',
+                               revisions: [
+                                       {
+                                               revid: 101,
+                                               parentid: 0,
+                                               contentmodel: 'json',
+                                               contentformat: 'text/json',
+                                               '*': '[1]',
+                                       },
+                               ],
+                       },
+               },
+       },
+};
+
 var fnames = {
        'Image:Foobar.jpg': 'Foobar.jpg',
        'File:Foobar.jpg': 'Foobar.jpg',
@@ -251,6 +272,8 @@
                                return cb(null , largePage);
                        } else if (body.revids === '100' || body.titles === 
'Reuse_Page') {
                                return cb(null , reusePage);
+                       } else if (body.revids === '101' || body.titles === 
'JSON_Page') {
+                               return cb(null , jsonPage);
                        }
                }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/295707
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I7ca31c99de8e04b1359bc521df121db0eb69e384
Gerrit-PatchSet: 27
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: C. Scott Ananian <canan...@wikimedia.org>
Gerrit-Reviewer: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Jforrester <jforres...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: Tpt <thoma...@hotmail.fr>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to