jenkins-bot has submitted this change and it was merged.

Change subject: Implement Parsoid v3 API
......................................................................


Implement Parsoid v3 API

The test suite updates for the new v3 API are split into a follow-up
patch (Ie474bde283a4e9c98d5139fba127ba3fae6f7151) to demonstrate that
this patch doesn't affect any v2 functionality.

The `v23` local should be renamed; bikeshedding for that split off
into follow-up patch (I3d5c6a57d6b9cf28abbf665abfd503044e4669ff).

As described in the linked bug, the v3 API changes are:

* The "body" parameter to the wt2html end point is renamed "bodyOnly", and
  it emits the *children* of the <body> element, not the <body> element
  itself.
* The "wt" format is renamed "wikitext".
* The result of the html2wt endpoint is wikitext content (like in Parsoid's
  v1 API and RESTBase), not a JSON wrapper around the same.
* The GET routes are /page/$format/ instead of just $format/
* The PUT routes are /transform/X/to/Y/ instead of just Y/
* The version and hostname have been reordered for consistency with
  VirtualRESTService (which puts $wgServerName before version) and
  RESTBase v1 (which puts $wgServerName before version).

Bug: T100680
Change-Id: I90f584c23e7057e278e7bf05039beaccd298532d
---
M api/ParsoidService.js
M api/routes.js
M api/utils.js
M lib/mediawiki.DOMUtils.js
4 files changed, 97 insertions(+), 47 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/api/ParsoidService.js b/api/ParsoidService.js
index b4b4a7e..e460ebe 100644
--- a/api/ParsoidService.js
+++ b/api/ParsoidService.js
@@ -116,9 +116,10 @@
 
        // Routes
 
-       var i = routes.interParams;
        var p = routes.parserEnvMw;
-       var v = routes.v2Middle;
+       var v1 = routes.v1Middle;
+       var v2 = routes.v2Middle;
+       var v3 = routes.v3Middle;
 
        function re(str) { return new RegExp(str); }
 
@@ -130,22 +131,31 @@
        app.get('/robots.txt', routes.robots);
 
        // private routes
-       app.get(re('^/_html/(?:(' + mwApiRe + ')/(.*))?'), i, p, 
routes.html2wtForm);
-       app.get(re('^/_wikitext/(?:(' + mwApiRe + ')/(.*))?'), i, p, 
routes.wt2htmlForm);
-       app.get(re('^/_rt/(?:(' + mwApiRe + ')/(.*))?'), i, p, 
routes.roundtripTesting);
-       app.get(re('^/_rtve/(' + mwApiRe + ')/(.*)'), i, p, 
routes.roundtripTestingNL);
-       app.get(re('^/_rtselser/(' + mwApiRe + ')/(.*)'), i, p, 
routes.roundtripSelser);
-       app.get(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), i, p, 
routes.getRtForm);
-       app.post(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), i, p, 
routes.postRtForm);
+       app.get(re('^/_html/(?:(' + mwApiRe + ')/(.*))?'), v1, p, 
routes.html2wtForm);
+       app.get(re('^/_wikitext/(?:(' + mwApiRe + ')/(.*))?'), v1, p, 
routes.wt2htmlForm);
+       app.get(re('^/_rt/(?:(' + mwApiRe + ')/(.*))?'), v1, p, 
routes.roundtripTesting);
+       app.get(re('^/_rtve/(' + mwApiRe + ')/(.*)'), v1, p, 
routes.roundtripTestingNL);
+       app.get(re('^/_rtselser/(' + mwApiRe + ')/(.*)'), v1, p, 
routes.roundtripSelser);
+       app.get(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), v1, p, 
routes.getRtForm);
+       app.post(re('^/_rtform/(?:(' + mwApiRe + ')/(.*))?'), v1, p, 
routes.postRtForm);
+
+       // Put v3 routes before v1 routes so they have a chance to match even
+       // if the user configured prefix === domain.  The 'v3' in the path will
+       // disambiguate.  (Article titles should be capitalized, which will
+       // prevent an article named 'v3' from being an additional source of
+       // ambiguity.)
+
+       // v3 API routes
+       app.get('/:domain/v3/page/:format/:title/:revision?', v3, p, 
routes.v3Get);
+       app.post('/:domain/v3/transform/:from/to/:format/:title?/:revision?', 
v3, p, routes.v3Post);
 
        // v1 API routes
-       app.get(re('^/(' + mwApiRe + ')/(.*)'), i, p, routes.v1Get);
-       app.post(re('^/(' + mwApiRe + ')/(.*)'), i, p, routes.v1Post);
+       app.get(re('^/(' + mwApiRe + ')/(.*)'), v1, p, routes.v1Get);
+       app.post(re('^/(' + mwApiRe + ')/(.*)'), v1, p, routes.v1Post);
 
        // v2 API routes
-       app.get('/v2/:domain/:format/:title/:revision?', v, p, routes.v2Get);
-       app.post('/v2/:domain/:format/:title?/:revision?', v, p, routes.v2Post);
-
+       app.get('/v2/:domain/:format/:title/:revision?', v2, p, routes.v2Get);
+       app.post('/v2/:domain/:format/:title?/:revision?', v2, p, 
routes.v2Post);
 
        // Get host and port from the environment, if available
        var port = parsoidConfig.serverPort || process.env.PORT || 8000;
diff --git a/api/routes.js b/api/routes.js
index 733b2c1..e953daf 100644
--- a/api/routes.js
+++ b/api/routes.js
@@ -25,21 +25,23 @@
 
        // Middlewares
 
-       routes.interParams = function(req, res, next) {
+       routes.v1Middle = function(req, res, next) {
+               res.locals.apiVersion = 1;
                res.locals.iwp = req.params[0] || parsoidConfig.defaultWiki || 
'';
                res.locals.pageName = req.params[1] || '';
                res.locals.oldid = req.body.oldid || req.query.oldid || null;
                // "body" flag to return just the body (instead of the entire 
HTML doc)
-               res.locals.body = !!(req.query.body || req.body.body);
+               res.locals.bodyOnly = !!(req.query.body || req.body.body);
                // "subst" flag to perform {{subst:}} template expansion
                res.locals.subst = !!(req.query.subst || req.body.subst);
                next();
        };
 
        var wt2htmlFormats = new Set(['pagebundle', 'html']);
-       var supportedFormats = new Set(['pagebundle', 'html', 'wt']);
+       var v2SupportedFormats = new Set(['pagebundle', 'html', 'wt']);
+       var v3SupportedFormats = new Set(['pagebundle', 'html', 'wikitext']);
 
-       routes.v2Middle = function(req, res, next) {
+       routes.v23Middle = function(version, req, res, next) {
                function errOut(err, code) {
                        apiUtils.sendResponse(res, {}, err, code || 404);
                }
@@ -49,29 +51,42 @@
                        return errOut('Invalid domain: ' + req.params.domain);
                }
 
+               res.locals.apiVersion = version;
                res.locals.iwp = iwp;
                res.locals.pageName = req.params.title || '';
                res.locals.oldid = req.params.revision || null;
 
-               // "body" flag to return just the body (instead of the entire 
HTML doc)
-               res.locals.body = !!(req.query.body || req.body.body);
+               // "bodyOnly" flag to return just the body (instead of the 
entire HTML doc)
+               if (version > 2) {
+                       res.locals.bodyOnly = !!(req.query.bodyOnly || 
req.body.bodyOnly);
+               } else {
+                       // in v2 this flag was named "body"
+                       res.locals.bodyOnly = !!(req.query.body || 
req.body.body);
+               }
 
-               var v2 = Object.assign({ format: req.params.format }, req.body);
+               var v23 = Object.assign({ format: req.params.format }, 
req.body);
+               var supportedFormats = (version > 2) ?
+                       v3SupportedFormats : v2SupportedFormats;
 
-               if (!supportedFormats.has(v2.format) ||
-                               (req.method === 'GET' && 
!wt2htmlFormats.has(v2.format))) {
-                       return errOut('Invalid format.');
+               if (!supportedFormats.has(v23.format) ||
+                               (req.method === 'GET' && 
!wt2htmlFormats.has(v23.format))) {
+                       return errOut('Invalid format: ' + v23.format);
+               }
+
+               // In v2 the "wikitext" format was named "wt"
+               if (v23.format === 'wt') {
+                       v23.format = 'wikitext';
                }
 
                // "subst" flag to perform {{subst:}} template expansion
                res.locals.subst = !!(req.query.subst || req.body.subst);
                // This is only supported for the html format
-               if (res.locals.subst && v2.format !== 'html') {
+               if (res.locals.subst && v23.format !== 'html') {
                        return errOut('Substitution is only supported for the 
HTML format.', 501);
                }
 
                if (req.method === 'POST') {
-                       var original = v2.original || {};
+                       var original = v23.original || {};
                        if (original.revid) {
                                res.locals.oldid = original.revid;
                        }
@@ -80,9 +95,11 @@
                        }
                }
 
-               res.locals.v2 = v2;
+               res.locals.v23 = v23;
                next();
        };
+       routes.v2Middle = routes.v23Middle.bind(routes, 2);
+       routes.v3Middle = routes.v23Middle.bind(routes, 3);
 
        routes.parserEnvMw = function(req, res, next) {
                function errBack(env, logData, callback) {
@@ -116,7 +133,7 @@
                                apiUtils.setHeader(res, env, 
'Access-Control-Allow-Origin',
                                        env.conf.parsoid.allowCORS);
                        }
-                       if (res.locals.v2 && res.locals.v2.format === 
'pagebundle') {
+                       if (res.locals.v23 && res.locals.v23.format === 
'pagebundle') {
                                env.storeDataParsoid = true;
                        }
                        if (req.body.hasOwnProperty('scrubWikitext')) {
@@ -419,7 +436,7 @@
 
        var v2Wt2html = function(req, res, wt) {
                var env = res.locals.env;
-               var v2 = res.locals.v2;
+               var v2 = res.locals.v23;
                var p = apiUtils.startWt2html(req, res, wt).then(function(ret) {
                        if (typeof ret.wikitext === 'string') {
                                return apiUtils.parseWt(ret)
@@ -460,13 +477,21 @@
                                .then(apiUtils.v2endWt2html.bind(null, ret));
                        } else {
                                var revid = env.page.meta.revision.revid;
-                               var path = [
-                                       '/v2',
+                               var path = (res.locals.apiVersion > 2 ? [
+                                       '',
                                        
env.conf.parsoid.mwApiMap.get(ret.prefix).domain,
+                                       'v3',
+                                       'page',
                                        v2.format,
                                        encodeURIComponent(ret.target),
                                        revid,
-                               ].join('/');
+                               ] : [
+                                       '/v2',
+                                       
env.conf.parsoid.mwApiMap.get(ret.prefix).domain,
+                                       v2.format === 'wikitext' ? 'wt' : 
v2.format,
+                                       encodeURIComponent(ret.target),
+                                       revid,
+                               ]).join('/');
                                if (Object.keys(req.query).length > 0) {
                                        path += '?' + qs.stringify(req.query);
                                }
@@ -478,13 +503,13 @@
        };
 
        // GET requests
-       routes.v2Get = function(req, res) {
+       routes.v2Get = routes.v3Get = function(req, res) {
                return v2Wt2html(req, res);
        };
 
        // POST requests
-       routes.v2Post = function(req, res) {
-               var v2 = res.locals.v2;
+       routes.v2Post = routes.v3Post = function(req, res) {
+               var v2 = res.locals.v23;
                var env = res.locals.env;
 
                function errOut(err, code) {
@@ -538,12 +563,18 @@
                                }
                                return ret;
                        }).then(apiUtils.endHtml2wt).then(function(output) {
-                               apiUtils.jsonResponse(res, env, {
-                                       wikitext: {
-                                               headers: { 'content-type': 
apiUtils.WIKITEXT_CONTENT_TYPE },
-                                               body: output,
-                                       },
-                               });
+                               if (res.locals.apiVersion > 2) {
+                                       apiUtils.setHeader(res, env, 
'content-type', apiUtils.WIKITEXT_CONTENT_TYPE);
+                                       apiUtils.sendResponse(res, env, output);
+                               } else {
+                                       // In API v2 we used to send a JSON 
object here
+                                       apiUtils.jsonResponse(res, env, {
+                                               wikitext: {
+                                                       headers: { 
'content-type': apiUtils.WIKITEXT_CONTENT_TYPE },
+                                                       body: output,
+                                               },
+                                       });
+                               }
                        });
                        return apiUtils.cpuTimeout(p, res)
                                .catch(apiUtils.timeoutResp.bind(null, env));
diff --git a/api/utils.js b/api/utils.js
index a8fc6d7..563060d 100644
--- a/api/utils.js
+++ b/api/utils.js
@@ -490,7 +490,10 @@
        var startTimers = ret.startTimers;
 
        if (doc) {
-               output = DU.serializeNode(res.locals.body ? doc.body : doc).str;
+               output = DU.serializeNode(res.locals.bodyOnly ? doc.body : doc, 
{
+                       // in v3 api, just the children of the body
+                       innerXML: res.locals.bodyOnly && res.locals.apiVersion 
> 2,
+               }).str;
                apiUtils.setHeader(res, env, 'content-type', 
apiUtils.HTML_CONTENT_TYPE);
                apiUtils.endResponse(res, env, output);
        }
@@ -515,9 +518,13 @@
 apiUtils.v2endWt2html = function(ret, doc) {
        var env = ret.env;
        var res = ret.res;
-       var v2 = res.locals.v2;
+       var v2 = res.locals.v23;
        if (v2.format === 'pagebundle') {
-               var out = DU.extractDpAndSerialize(doc, res.locals.body);
+               var out = DU.extractDpAndSerialize(doc, {
+                       bodyOnly: res.locals.bodyOnly,
+                       // in v3 api, just the children of the body
+                       innerXML: res.locals.bodyOnly && res.locals.apiVersion 
> 2,
+               });
                apiUtils.jsonResponse(res, env, {
                        html: {
                                headers: { 'content-type': 
apiUtils.HTML_CONTENT_TYPE },
diff --git a/lib/mediawiki.DOMUtils.js b/lib/mediawiki.DOMUtils.js
index 5dc91a2..4373730 100644
--- a/lib/mediawiki.DOMUtils.js
+++ b/lib/mediawiki.DOMUtils.js
@@ -2716,11 +2716,13 @@
 };
 
 // Pull the data-parsoid script element out of the doc before serializing.
-DOMUtils.extractDpAndSerialize = function(doc, justBody) {
+DOMUtils.extractDpAndSerialize = function(doc, options) {
        var dpScriptElt = doc.getElementById('mw-data-parsoid');
        dpScriptElt.parentNode.removeChild(dpScriptElt);
-       var options = { captureOffsets: true };
-       var out = DU.serializeNode(justBody ? doc.body : doc, options);
+       var out = DU.serializeNode(options.bodyOnly ? doc.body : doc, {
+               captureOffsets: true,
+               innerXML: options.innerXML,
+       });
        out.dp = JSON.parse(dpScriptElt.text);
        out.type = dpScriptElt.getAttribute('type');
        // Add the wt offsets.

-- 
To view, visit https://gerrit.wikimedia.org/r/233107
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I90f584c23e7057e278e7bf05039beaccd298532d
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Cscott <[email protected]>
Gerrit-Reviewer: Alex Monk <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to