jenkins-bot has submitted this change and it was merged. Change subject: Roundtrip 2.0.0 content ......................................................................
Roundtrip 2.0.0 content * This is a follow up to b692ba97. * Applies data-mw passed in a pagebundle, in a backwards compatible way. * Adds a mocha test that could have been part of 06e08a00, to emphasize when data-mw isn't being applied. Change-Id: I834a6640eb982d760cdbba0c64d9ce48025bc65d --- M bin/parse.js M bin/roundtrip-test.js M lib/api/apiUtils.js M lib/api/routes.js M tests/mocha/api.js M tests/rttest.localsettings.js 6 files changed, 112 insertions(+), 20 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/bin/parse.js b/bin/parse.js index a326223..852cecd 100755 --- a/bin/parse.js +++ b/bin/parse.js @@ -102,7 +102,7 @@ 'boolean': false, 'default': false, }, - 'contentversion': { + 'contentVersion': { description: 'The acceptable content version.', 'boolean': false, 'default': ParserEnv.prototype.contentVersion, @@ -219,8 +219,8 @@ env.pageBundle = argv.pagebundle; // The content version to output - if (argv.contentversion) { - env.setContentVersion(argv.contentversion); + if (argv.contentVersion) { + env.setContentVersion(argv.contentVersion); } if (!argv.wt2html) { diff --git a/bin/roundtrip-test.js b/bin/roundtrip-test.js index 28c1d91..cc1aca2 100755 --- a/bin/roundtrip-test.js +++ b/bin/roundtrip-test.js @@ -8,10 +8,13 @@ var Promise = require('../lib/utils/promise.js'); var Util = require('../lib/utils/Util.js').Util; var DU = require('../lib/utils/DOMUtils.js').DOMUtils; +var apiUtils = require('../lib/api/apiUtils'); var ParsoidConfig = require('../lib/config/ParsoidConfig.js').ParsoidConfig; var Diff = require('../lib/utils/Diff.js').Diff; var gzip = Promise.promisify(require('zlib').gzip, false); + +var defaultContentVersion = '1.2.1'; function displayDiff(type, count) { @@ -382,8 +385,14 @@ var newBody = domino.createDocument(data.newHTML.body).body; // Merge pagebundles so that HTML nodes can be compared and diff'ed. - DU.applyPageBundle(oldBody.ownerDocument, { parsoid: data.oldDp.body }); - DU.applyPageBundle(newBody.ownerDocument, { parsoid: data.newDp.body }); + DU.applyPageBundle(oldBody.ownerDocument, { + parsoid: data.oldDp.body, + mw: data.oldMw && data.oldMw.body, + }); + DU.applyPageBundle(newBody.ownerDocument, { + parsoid: data.newDp.body, + mw: data.newMw && data.newMw.body, + }); // Strip 'mw..' ids from the DOMs. This matters for 2 scenarios: // * reduces noise in visual diffs @@ -477,6 +486,9 @@ httpOptions.body.scrub_wikitext = true; } else { // wt2html uri += 'wikitext/to/pagebundle/' + options.title; + httpOptions.headers = { + Accept: apiUtils.pagebundleContentType(null, options.contentVersion), + }; } httpOptions.uri = uri; @@ -520,6 +532,7 @@ return parsoidPost(profile, options).then(function(body) { data.newHTML = body.html; data.newDp = body['data-parsoid']; + data.newMw = body['data-mw']; return checkIfSignificant(offsets, data); }); } @@ -573,6 +586,7 @@ var parsoidOptions = { uri: uri + domain + '/v3/', title: encodeURIComponent(title), + contentVersion: options.contentVersion || defaultContentVersion, }; var data = {}; @@ -597,6 +611,7 @@ }).then(function(body) { data.oldHTML = body.html; data.oldDp = body['data-parsoid']; + data.oldMw = body['data-mw']; // Now, request the wikitext for the obtained HTML var opts = Object.assign({ html2wt: true, @@ -605,6 +620,7 @@ html: data.oldHTML, original: { 'data-parsoid': data.oldDp, + 'data-mw': data.oldMw, wikitext: { body: data.oldWt, }, }, }, @@ -630,6 +646,7 @@ html: newDocument.outerHTML, original: { 'data-parsoid': data.oldDp, + 'data-mw': data.oldMw, wikitext: { body: data.oldWt }, html: data.oldHTML, }, @@ -687,7 +704,12 @@ description: 'http path to remote API,' + ' e.g. http://en.wikipedia.org/w/api.php', boolean: false, - default: null, + default: '', + }, + contentVersion: { + description: 'The acceptable content version.', + boolean: false, + default: defaultContentVersion, }, }; diff --git a/lib/api/apiUtils.js b/lib/api/apiUtils.js index 54e1def..d64b383 100644 --- a/lib/api/apiUtils.js +++ b/lib/api/apiUtils.js @@ -295,12 +295,14 @@ }; /** - * Validates that data-parsoid was provided in the expected format. + * Validates the pagebundle was provided in the expected format. * * @method * @param {Object} obj */ -apiUtils.validateDp = function(obj) { +apiUtils.validatePageBundle = function(obj) { + // FIXME(arlolra): This should also accept the content-type of the + // supplied html to determine which attributes are expected. var dp = obj['data-parsoid']; if (!dp || !dp.body || dp.body.constructor !== Object || !dp.body.ids) { var err = new Error('Invalid data-parsoid was provided.'); diff --git a/lib/api/routes.js b/lib/api/routes.js index 3b029b2..e5585d8 100644 --- a/lib/api/routes.js +++ b/lib/api/routes.js @@ -587,20 +587,26 @@ if (opts.original) { var dp = opts.original['data-parsoid']; // This is optional to support serializing html with inlined - // data-parsoid. + // data-* attributes. if (dp) { - apiUtils.validateDp(opts.original); - DU.applyPageBundle(doc, { parsoid: dp.body }); + apiUtils.validatePageBundle(opts.original); + DU.applyPageBundle(doc, { + parsoid: dp.body, + mw: opts.original['data-mw'] && opts.original['data-mw'].body, + }); + // FIXME(arlolra): use input content-type for this. env.page.dpContentType = (dp.headers || {})['content-type']; } if (opts.original.html) { env.page.dom = DU.parseHTML(opts.original.html.body).body; - // However, if we're given stored html, data-parsoid - // should be provided as well. We have no use case for - // stored inlined dp anymore. - apiUtils.validateDp(opts.original); - DU.applyPageBundle(env.page.dom.ownerDocument, { parsoid: dp.body }); - env.page.htmlContentType = (opts.original.html.headers || {})['content-type']; + // However, if we're given stored html, data-* attributes + // should be provided as well. We have no use case for + // stored inlined data-* attributes anymore. + apiUtils.validatePageBundle(opts.original); + DU.applyPageBundle(env.page.dom.ownerDocument, { + parsoid: dp.body, + mw: opts.original['data-mw'] && opts.original['data-mw'].body, + }); } } @@ -638,9 +644,12 @@ if (revision) { var doc = DU.parseHTML(revision.html.body); // Similar to the html2wt case, stored html is expected - // to also pass in dp. - apiUtils.validateDp(revision); - DU.applyPageBundle(doc, { parsoid: revision['data-parsoid'].body }); + // to also pass in data-* attributes. + apiUtils.validatePageBundle(revision); + DU.applyPageBundle(doc, { + parsoid: revision['data-parsoid'].body, + mw: revision['data-mw'] && revision['data-mw'].body, + }); DU.visitDOM(doc.body, DU.loadDataAttribs); var expansions = DU.extractExpansions(doc.body); Object.keys(opts.updates || {}).forEach(function(mode) { @@ -719,6 +728,12 @@ // Accept html as a string or object{body,headers} var html = (typeof opts.html === 'string') ? opts.html : (opts.html.body || ''); + + // FIXME(arlolra): what content-type is this!? from headers, + // original headers, or inlined version. Also, bikeshed a + // name for this (inputVersion, etc.) since contentVersion is + // for the output. + p = html2wt(req, res, html); } else { p = html2html(req, res); diff --git a/tests/mocha/api.js b/tests/mocha/api.js index 4f85007..461aecd 100644 --- a/tests/mocha/api.js +++ b/tests/mocha/api.js @@ -1042,6 +1042,57 @@ .end(done); }); + it('should return a 400 for missing data-mw', function(done) { + request(api) + .post(mockDomain + '/v3/transform/html/to/wikitext/') + .send({ + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.0.0"', + }, + body: '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">hi</p>', + }, + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { "mwAQ": { "pi": [[{ "k": "1" }]] } }, + }, + }, + }, + }) + .expect(400) + .end(done); + }); + + it('should apply supplied data-mw', function(done) { + request(api) + .post(mockDomain + '/v3/transform/html/to/wikitext/') + .send({ + html: { + headers: { + 'content-type': 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.0.0"', + }, + body: '<p about="#mwt1" typeof="mw:Transclusion" id="mwAQ">hi</p>', + }, + original: { + title: 'Doesnotexist', + 'data-parsoid': { + body: { + ids: { "mwAQ": { "pi": [[{ "k": "1" }]] } }, + }, + }, + 'data-mw': { + body: { + ids: { "mwAQ": { "parts": [{ "template": { "target": { "wt": "1x", "href": "./Template:1x" }, "params": { "1": { "wt": "hi" } }, "i": 0 } }] } }, + }, + }, + }, + }) + .expect(validWikitextResponse('{{1x|hi}}')) + .end(done); + }); + it('should apply extra normalizations (scrub_wikitext)', function(done) { request(api) .post(mockDomain + '/v3/transform/html/to/wikitext/') diff --git a/tests/rttest.localsettings.js b/tests/rttest.localsettings.js index 275ac2d..cb4582c 100644 --- a/tests/rttest.localsettings.js +++ b/tests/rttest.localsettings.js @@ -74,4 +74,6 @@ count: function() {}, timing: function() {}, }; + + parsoidConfig.strictAcceptCheck = true; }; -- To view, visit https://gerrit.wikimedia.org/r/293658 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I834a6640eb982d760cdbba0c64d9ce48025bc65d Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Mobrovac <mobro...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits