jenkins-bot has submitted this change and it was merged.
Change subject: Implement Parsoid v3 API
......................................................................
Implement Parsoid v3 API
The test suite updates for the new v3 API are split into a follow-up
patch (Ie474bde283a4e9c98d5139fba127ba3fae6f7151) to demonstrate that
this patch doesn't affect any v2 functionality.
The `v23` local should be renamed; bikeshedding for that split off
into follow-up patch (I3d5c6a57d6b9cf28abbf665abfd503044e4669ff).
As described in the linked bug, the v3 API changes are:
* The "body" parameter to the wt2html end point is renamed "bodyOnly", and
it emits the *children* of the <body> element, not the <body> element
itself.
* The "wt" format is renamed "wikitext".
* The result of the html2wt endpoint is wikitext content (like in Parsoid's
v1 API and RESTBase), not a JSON wrapper around the same.
* The GET routes are /page/$format/ instead of just $format/
* The PUT routes are /transform/X/to/Y/ instead of just Y/
* The version and hostname have been reordered for consistency with
VirtualRESTService (which puts $wgServerName before version) and
RESTBase v1 (which puts $wgServerName before version).
Bug: T100680
Change-Id: I90f584c23e7057e278e7bf05039beaccd298532d
---
M api/ParsoidService.js
M api/routes.js
M api/utils.js
M lib/mediawiki.DOMUtils.js
4 files changed, 97 insertions(+), 47 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
jenkins-bot: Verified
diff --git a/api/ParsoidService.js b/api/ParsoidService.js
index b4b4a7e..e460ebe 100644
--- a/api/ParsoidService.js
+++ b/api/ParsoidService.js
@@ -116,9 +116,10 @@
// Routes
- var i = routes.interParams;
var p = routes.parserEnvMw;
- var v = routes.v2Middle;
+ var v1 = routes.v1Middle;
+ var v2 = routes.v2Middle;
+ var v3 = routes.v3Middle;
function re(str) { return new RegExp(str); }
@@ -130,22 +131,31 @@
app.get('/robots.txt', routes.robots);
// private routes
- app.get(re('^/_html/(?:(' + mwApiRe + ')/(.*))?'), i, p,
routes.html2wtForm);
- app.get(re('^/_wikitext/(?:(' + mwApiRe + ')/(.*))?'), i, p,
routes.wt2htmlForm);
- app.get(re('^/_rt/(?:(' + mwApiRe + ')/(.*))?'), i, p,
routes.roundtripTesting);
- app.get(re('^/_rtve/(' + mwApiRe + ')/(.*)'), i, p,
routes.roundtripTestingNL);
- app.get(re('^/_rtselser/(' + mwApiRe + ')/(.*)'), i, p,
routes.roundtripSelser);
- app.get(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), i, p,
routes.getRtForm);
- app.post(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), i, p,
routes.postRtForm);
+ app.get(re('^/_html/(?:(' + mwApiRe + ')/(.*))?'), v1, p,
routes.html2wtForm);
+ app.get(re('^/_wikitext/(?:(' + mwApiRe + ')/(.*))?'), v1, p,
routes.wt2htmlForm);
+ app.get(re('^/_rt/(?:(' + mwApiRe + ')/(.*))?'), v1, p,
routes.roundtripTesting);
+ app.get(re('^/_rtve/(' + mwApiRe + ')/(.*)'), v1, p,
routes.roundtripTestingNL);
+ app.get(re('^/_rtselser/(' + mwApiRe + ')/(.*)'), v1, p,
routes.roundtripSelser);
+ app.get(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), v1, p,
routes.getRtForm);
+ app.post(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), v1, p,
routes.postRtForm);
+
+ // Put v3 routes before v1 routes so they have a chance to match even
+ // if the user configured prefix === domain. The 'v3' in the path will
+ // disambiguate. (Article titles should be capitalized, which will
+ // prevent an article named 'v3' from being an additional source of
+ // ambiguity.)
+
+ // v3 API routes
+ app.get('/:domain/v3/page/:format/:title/:revision?', v3, p,
routes.v3Get);
+ app.post('/:domain/v3/transform/:from/to/:format/:title?/:revision?',
v3, p, routes.v3Post);
// v1 API routes
- app.get(re('^/(' + mwApiRe + ')/(.*)'), i, p, routes.v1Get);
- app.post(re('^/(' + mwApiRe + ')/(.*)'), i, p, routes.v1Post);
+ app.get(re('^/(' + mwApiRe + ')/(.*)'), v1, p, routes.v1Get);
+ app.post(re('^/(' + mwApiRe + ')/(.*)'), v1, p, routes.v1Post);
// v2 API routes
- app.get('/v2/:domain/:format/:title/:revision?', v, p, routes.v2Get);
- app.post('/v2/:domain/:format/:title?/:revision?', v, p, routes.v2Post);
-
+ app.get('/v2/:domain/:format/:title/:revision?', v2, p, routes.v2Get);
+ app.post('/v2/:domain/:format/:title?/:revision?', v2, p,
routes.v2Post);
// Get host and port from the environment, if available
var port = parsoidConfig.serverPort || process.env.PORT || 8000;
diff --git a/api/routes.js b/api/routes.js
index 733b2c1..e953daf 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -25,21 +25,23 @@
// Middlewares
- routes.interParams = function(req, res, next) {
+ routes.v1Middle = function(req, res, next) {
+ res.locals.apiVersion = 1;
res.locals.iwp = req.params[0] || parsoidConfig.defaultWiki ||
'';
res.locals.pageName = req.params[1] || '';
res.locals.oldid = req.body.oldid || req.query.oldid || null;
// "body" flag to return just the body (instead of the entire
HTML doc)
- res.locals.body = !!(req.query.body || req.body.body);
+ res.locals.bodyOnly = !!(req.query.body || req.body.body);
// "subst" flag to perform {{subst:}} template expansion
res.locals.subst = !!(req.query.subst || req.body.subst);
next();
};
var wt2htmlFormats = new Set(['pagebundle', 'html']);
- var supportedFormats = new Set(['pagebundle', 'html', 'wt']);
+ var v2SupportedFormats = new Set(['pagebundle', 'html', 'wt']);
+ var v3SupportedFormats = new Set(['pagebundle', 'html', 'wikitext']);
- routes.v2Middle = function(req, res, next) {
+ routes.v23Middle = function(version, req, res, next) {
function errOut(err, code) {
apiUtils.sendResponse(res, {}, err, code || 404);
}
@@ -49,29 +51,42 @@
return errOut('Invalid domain: ' + req.params.domain);
}
+ res.locals.apiVersion = version;
res.locals.iwp = iwp;
res.locals.pageName = req.params.title || '';
res.locals.oldid = req.params.revision || null;
- // "body" flag to return just the body (instead of the entire
HTML doc)
- res.locals.body = !!(req.query.body || req.body.body);
+ // "bodyOnly" flag to return just the body (instead of the
entire HTML doc)
+ if (version > 2) {
+ res.locals.bodyOnly = !!(req.query.bodyOnly ||
req.body.bodyOnly);
+ } else {
+ // in v2 this flag was named "body"
+ res.locals.bodyOnly = !!(req.query.body ||
req.body.body);
+ }
- var v2 = Object.assign({ format: req.params.format }, req.body);
+ var v23 = Object.assign({ format: req.params.format },
req.body);
+ var supportedFormats = (version > 2) ?
+ v3SupportedFormats : v2SupportedFormats;
- if (!supportedFormats.has(v2.format) ||
- (req.method === 'GET' &&
!wt2htmlFormats.has(v2.format))) {
- return errOut('Invalid format.');
+ if (!supportedFormats.has(v23.format) ||
+ (req.method === 'GET' &&
!wt2htmlFormats.has(v23.format))) {
+ return errOut('Invalid format: ' + v23.format);
+ }
+
+ // In v2 the "wikitext" format was named "wt"
+ if (v23.format === 'wt') {
+ v23.format = 'wikitext';
}
// "subst" flag to perform {{subst:}} template expansion
res.locals.subst = !!(req.query.subst || req.body.subst);
// This is only supported for the html format
- if (res.locals.subst && v2.format !== 'html') {
+ if (res.locals.subst && v23.format !== 'html') {
return errOut('Substitution is only supported for the
HTML format.', 501);
}
if (req.method === 'POST') {
- var original = v2.original || {};
+ var original = v23.original || {};
if (original.revid) {
res.locals.oldid = original.revid;
}
@@ -80,9 +95,11 @@
}
}
- res.locals.v2 = v2;
+ res.locals.v23 = v23;
next();
};
+ routes.v2Middle = routes.v23Middle.bind(routes, 2);
+ routes.v3Middle = routes.v23Middle.bind(routes, 3);
routes.parserEnvMw = function(req, res, next) {
function errBack(env, logData, callback) {
@@ -116,7 +133,7 @@
apiUtils.setHeader(res, env,
'Access-Control-Allow-Origin',
env.conf.parsoid.allowCORS);
}
- if (res.locals.v2 && res.locals.v2.format ===
'pagebundle') {
+ if (res.locals.v23 && res.locals.v23.format ===
'pagebundle') {
env.storeDataParsoid = true;
}
if (req.body.hasOwnProperty('scrubWikitext')) {
@@ -419,7 +436,7 @@
var v2Wt2html = function(req, res, wt) {
var env = res.locals.env;
- var v2 = res.locals.v2;
+ var v2 = res.locals.v23;
var p = apiUtils.startWt2html(req, res, wt).then(function(ret) {
if (typeof ret.wikitext === 'string') {
return apiUtils.parseWt(ret)
@@ -460,13 +477,21 @@
.then(apiUtils.v2endWt2html.bind(null, ret));
} else {
var revid = env.page.meta.revision.revid;
- var path = [
- '/v2',
+ var path = (res.locals.apiVersion > 2 ? [
+ '',
env.conf.parsoid.mwApiMap.get(ret.prefix).domain,
+ 'v3',
+ 'page',
v2.format,
encodeURIComponent(ret.target),
revid,
- ].join('/');
+ ] : [
+ '/v2',
+
env.conf.parsoid.mwApiMap.get(ret.prefix).domain,
+ v2.format === 'wikitext' ? 'wt' :
v2.format,
+ encodeURIComponent(ret.target),
+ revid,
+ ]).join('/');
if (Object.keys(req.query).length > 0) {
path += '?' + qs.stringify(req.query);
}
@@ -478,13 +503,13 @@
};
// GET requests
- routes.v2Get = function(req, res) {
+ routes.v2Get = routes.v3Get = function(req, res) {
return v2Wt2html(req, res);
};
// POST requests
- routes.v2Post = function(req, res) {
- var v2 = res.locals.v2;
+ routes.v2Post = routes.v3Post = function(req, res) {
+ var v2 = res.locals.v23;
var env = res.locals.env;
function errOut(err, code) {
@@ -538,12 +563,18 @@
}
return ret;
}).then(apiUtils.endHtml2wt).then(function(output) {
- apiUtils.jsonResponse(res, env, {
- wikitext: {
- headers: { 'content-type':
apiUtils.WIKITEXT_CONTENT_TYPE },
- body: output,
- },
- });
+ if (res.locals.apiVersion > 2) {
+ apiUtils.setHeader(res, env,
'content-type', apiUtils.WIKITEXT_CONTENT_TYPE);
+ apiUtils.sendResponse(res, env, output);
+ } else {
+ // In API v2 we used to send a JSON
object here
+ apiUtils.jsonResponse(res, env, {
+ wikitext: {
+ headers: {
'content-type': apiUtils.WIKITEXT_CONTENT_TYPE },
+ body: output,
+ },
+ });
+ }
});
return apiUtils.cpuTimeout(p, res)
.catch(apiUtils.timeoutResp.bind(null, env));
diff --git a/api/utils.js b/api/utils.js
index a8fc6d7..563060d 100644
--- a/api/utils.js
+++ b/api/utils.js
@@ -490,7 +490,10 @@
var startTimers = ret.startTimers;
if (doc) {
- output = DU.serializeNode(res.locals.body ? doc.body : doc).str;
+ output = DU.serializeNode(res.locals.bodyOnly ? doc.body : doc,
{
+ // in v3 api, just the children of the body
+ innerXML: res.locals.bodyOnly && res.locals.apiVersion
> 2,
+ }).str;
apiUtils.setHeader(res, env, 'content-type',
apiUtils.HTML_CONTENT_TYPE);
apiUtils.endResponse(res, env, output);
}
@@ -515,9 +518,13 @@
apiUtils.v2endWt2html = function(ret, doc) {
var env = ret.env;
var res = ret.res;
- var v2 = res.locals.v2;
+ var v2 = res.locals.v23;
if (v2.format === 'pagebundle') {
- var out = DU.extractDpAndSerialize(doc, res.locals.body);
+ var out = DU.extractDpAndSerialize(doc, {
+ bodyOnly: res.locals.bodyOnly,
+ // in v3 api, just the children of the body
+ innerXML: res.locals.bodyOnly && res.locals.apiVersion
> 2,
+ });
apiUtils.jsonResponse(res, env, {
html: {
headers: { 'content-type':
apiUtils.HTML_CONTENT_TYPE },
diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js
index 5dc91a2..4373730 100644
--- a/lib/mediawiki.DOMUtils.js
+++ b/lib/mediawiki.DOMUtils.js
@@ -2716,11 +2716,13 @@
};
// Pull the data-parsoid script element out of the doc before serializing.
-DOMUtils.extractDpAndSerialize = function(doc, justBody) {
+DOMUtils.extractDpAndSerialize = function(doc, options) {
var dpScriptElt = doc.getElementById('mw-data-parsoid');
dpScriptElt.parentNode.removeChild(dpScriptElt);
- var options = { captureOffsets: true };
- var out = DU.serializeNode(justBody ? doc.body : doc, options);
+ var out = DU.serializeNode(options.bodyOnly ? doc.body : doc, {
+ captureOffsets: true,
+ innerXML: options.innerXML,
+ });
out.dp = JSON.parse(dpScriptElt.text);
out.type = dpScriptElt.getAttribute('type');
// Add the wt offsets.
--
To view, visit https://gerrit.wikimedia.org/r/233107
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I90f584c23e7057e278e7bf05039beaccd298532d
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
Gerrit-Reviewer: Alex Monk <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits