Arlolra has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/394601 )

Change subject: Use lib/parse.js in api/routes.js
......................................................................

Use lib/parse.js in api/routes.js

Change-Id: I20f6cc3078853bbc462e5d565269d97bf097f880
---
M lib/api/ParsoidService.js
M lib/api/apiUtils.js
M lib/api/routes.js
M lib/html2wt/WikitextSerializer.js
M lib/index.js
5 files changed, 98 insertions(+), 99 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/01/394601/1

diff --git a/lib/api/ParsoidService.js b/lib/api/ParsoidService.js
index b4d52ae..4953b67 100644
--- a/lib/api/ParsoidService.js
+++ b/lib/api/ParsoidService.js
@@ -18,6 +18,8 @@
 var util = require('util');
 
 var Promise = require('../utils/promise.js');
+var ParsoidConfig = require('../config/ParsoidConfig.js').ParsoidConfig;
+var parseJsPath = require.resolve('../parse.js');
 
 /**
  * ParsoidService
@@ -33,15 +35,17 @@
  * Instantiates an [express](http://expressjs.com/) server
  * to handle HTTP requests and begins listening on the configured port.
  *
- * @param {ParsoidConfig} parsoidConfig
+ * @param {Object} parsoidOptions
  * @param {Object} processLogger
  *   WARNING: `processLogger` is not necessarily an instance of `Logger`.
  *   The interface is merely that exposed by service-runner, `log(level, 
info)`.
  *   Don't expect it to exit after you've logged "fatal" and other such things.
  * @return {Promise} server
  */
-ParsoidService.init = Promise.method(function(parsoidConfig, processLogger) {
+ParsoidService.init = Promise.method(function(parsoidOptions, processLogger) {
        processLogger.log('info', 'loading ...');
+
+       var parsoidConfig = new ParsoidConfig(null, parsoidOptions);
 
        // Get host and port from the environment, if available
        // note: in production, the port is exposed via the 'port' config 
stanza and
@@ -55,8 +59,10 @@
        // and 'serverInterface' is the legacy option name
        var host = parsoidConfig.interface || parsoidConfig.serverInterface || 
process.env.INTERFACE;
 
+       var parse = require(parseJsPath);
+
        // Load routes
-       var routes = require('./routes')(parsoidConfig, processLogger);
+       var routes = require('./routes')(parsoidConfig, processLogger, 
parsoidOptions, parse);
 
        var app = express();
 
@@ -205,7 +211,8 @@
                        reject(err);
                });
                server = app.listen(port, host, resolve);
-       }).then(function() {
+       })
+       .then(function() {
                port = server.address().port;
                processLogger.log('info',
                        util.format('ready on %s:%s', host || '', port));
diff --git a/lib/api/apiUtils.js b/lib/api/apiUtils.js
index d6a1d35..8172d0f 100644
--- a/lib/api/apiUtils.js
+++ b/lib/api/apiUtils.js
@@ -236,13 +236,7 @@
        }
        // Now pass it to the MediaWiki API with onlypst set so that it
        // subst's the templates.
-       return PHPParseRequest.promise(env, target, wt, 
true).then(function(wikitext) {
-               // Set data-parsoid to be discarded, so that the subst'ed
-               // content is considered new when it comes back.
-               env.discardDataParsoid = true;
-               // Use the returned wikitext as the page source.
-               return wikitext;
-       });
+       return PHPParseRequest.promise(env, target, wt, true);
 };
 
 apiUtils.wikitextContentType = function(env) {
diff --git a/lib/api/routes.js b/lib/api/routes.js
index d09d554..c6ecb22 100644
--- a/lib/api/routes.js
+++ b/lib/api/routes.js
@@ -18,8 +18,7 @@
 
 var TemplateRequest = ApiRequest.TemplateRequest;
 
-
-module.exports = function(parsoidConfig, processLogger) {
+module.exports = function(parsoidConfig, processLogger, parsoidOptions, parse) 
{
        var routes = {};
        var metrics = parsoidConfig.metrics;
        var REQ_TIMEOUT = parsoidConfig.timeouts.request;
@@ -50,6 +49,10 @@
                res.locals.bodyOnly = !!(req.query.body || req.body.body);
                // "subst" flag to perform {{subst:}} template expansion
                res.locals.subst = !!(req.query.subst || req.body.subst);
+               res.locals.envOptions = {
+                       prefix: res.locals.iwp,
+                       pageName: res.locals.pageName,
+               };
                next();
        };
 
@@ -106,6 +109,14 @@
                        return errOut(res, 'Invalid domain: ' + 
req.params.domain);
                }
                res.locals.iwp = iwp;
+
+               res.locals.envOptions = {
+                       prefix: res.locals.iwp,
+                       pageName: res.locals.pageName,
+                       cookie: req.headers.cookie,
+                       reqId: req.headers['x-request-id'],
+                       userAgent: req.headers['user-agent'],
+               };
 
                // "subst" flag to perform {{subst:}} template expansion
                res.locals.subst = !!(req.query.subst || req.body.subst);
@@ -179,14 +190,7 @@
                        }
                        return Promise.resolve().nodify(callback);
                }
-               var options = {
-                       prefix: res.locals.iwp,
-                       pageName: res.locals.pageName,
-                       cookie: req.headers.cookie,
-                       reqId: req.headers['x-request-id'],
-                       userAgent: req.headers['user-agent'],
-               };
-               MWParserEnv.getParserEnv(parsoidConfig, options)
+               MWParserEnv.getParserEnv(parsoidConfig, res.locals.envOptions)
                .then(function(env) {
                        env.logger.registerBackend(/fatal(\/.*)?/, errBack);
                        res.locals.env = env;
@@ -399,20 +403,13 @@
 
        // Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
 
-       var wt2html = Promise.method(function(req, res, wt) {
+       var wt2html = Promise.method(function(req, res, wt, reuseExpansions) {
                var env = res.locals.env;
                var opts = res.locals.opts;
                var oldid = res.locals.oldid;
                var target = env.normalizeAndResolvePageTitle();
 
-               // VE, the only client using bodyOnly property,
-               // doesn't want section tags when this flag is set.
-               if (res.locals.bodyOnly) {
-                       env.wrapSections = false;
-               }
-
                var pageBundle = !!(res.locals.opts && res.locals.opts.format 
=== 'pagebundle');
-               env.pageBundle = pageBundle;
 
                // Performance Timing options
                var startTimers = new Map();
@@ -439,7 +436,8 @@
                                (typeof wt !== 'string' ? env.page.src : 
wt).length);
                });
 
-               if (typeof wt === 'string' && res.locals.subst) {
+               var doSubst = (typeof wt === 'string' && res.locals.subst);
+               if (doSubst) {
                        p = p.then(function(wikitext) {
                                // FIXME: reset limits after subst'ing
                                return apiUtils.substTopLevelTemplates(env, 
target, wikitext);
@@ -473,14 +471,20 @@
 
                        env.log('info', 'started parsing');
 
-                       var p2;
+                       var envOptions = Object.assign({
+                               pageBundle: pageBundle,
+                               // Set data-parsoid to be discarded, so that 
the subst'ed
+                               // content is considered new when it comes back.
+                               discardDataParsoid: doSubst,
+                       }, res.locals.envOptions);
+
+                       // VE, the only client using bodyOnly property,
+                       // doesn't want section tags when this flag is set.
+                       if (res.locals.bodyOnly) {
+                               envOptions.wrapSections = false;
+                       }
+
                        if (typeof wikitext === 'string') {
-                               env.setPageSrcInfo(wikitext);
-
-                               if (opts.contentmodel) {
-                                       env.page.meta.revision.contentmodel = 
opts.contentmodel;
-                               }
-
                                // Don't cache requests when wt is set in case 
somebody uses
                                // GET for wikitext parsing
                                apiUtils.setHeader(res, 'Cache-Control', 
'private,no-cache,s-maxage=0');
@@ -496,10 +500,13 @@
                                        // clear default page name
                                        env.page.name = '';
                                }
-
-                               p2 = env.getContentHandler().toHTML(env);
                        } else if (oldid) {
-                               env.pageWithOldid = true;
+                               envOptions.pageWithOldid = true;
+
+                               if (req.headers.cookie) {
+                                       // Don't cache requests with a session.
+                                       apiUtils.setHeader(res, 
'Cache-Control', 'private,no-cache,s-maxage=0');
+                               }
 
                                // Indicate the MediaWiki revision in a header 
as well for
                                // ease of extraction in clients.
@@ -511,47 +518,30 @@
                                        
startTimers.set('wt2html.pageWithOldid.parse', Date.now());
                                        
metrics.timing('wt2html.pageWithOldid.size.input', env.page.src.length);
                                }
-
-                               p2 = env.getContentHandler().toHTML(env)
-                               .tap(function() {
-                                       if (req.headers.cookie) {
-                                               // Don't cache requests with a 
session.
-                                               apiUtils.setHeader(res, 
'Cache-Control', 'private,no-cache,s-maxage=0');
-                                       }
-                               });
                        } else {
                                console.assert(false, 'Should be unreachable');
                        }
 
-                       return p2
-                       .tap(function(doc) {
-                               if (env.conf.parsoid.useBatchAPI) {
-                                       return DU.addRedLinks(env, doc);
-                               }
+                       return parse({
+                               // NOTE: This causes another TemplateRequest 
but otherwise
+                               // we don't have all the metadata.
+                               input: (typeof wikitext === 'string') ? 
wikitext : undefined,
+                               mode: 'wt2html',
+                               parsoidOptions: parsoidOptions,
+                               envOptions: envOptions,
+                               oldid: oldid,
+                               contentmodel: opts.contentmodel,
+                               contentVersion: env.contentVersion,
+                               bodyOnly: res.locals.bodyOnly,
+                               cacheConfig: true,
+                               reuseExpansions: reuseExpansions,
                        })
-                       // .timeout(REQ_TIMEOUT)
-                       .then(function(doc) {
-                               var out;
-                               if (pageBundle) {
-                                       out = 
DU.extractDpAndSerialize(res.locals.bodyOnly ? doc.body : doc, {
-                                               innerXML: res.locals.bodyOnly,
-                                       });
-                               } else {
-                                       out = {
-                                               html: 
DU.toXML(res.locals.bodyOnly ? doc.body : doc, {
-                                                       innerXML: 
res.locals.bodyOnly,
-                                               }),
-                                       };
-                               }
-
+                       .then(function(out) {
                                if (opts.format === 'lint') {
-                                       apiUtils.jsonResponse(res, 
env.lintLogger.buffer);
+                                       apiUtils.jsonResponse(res, out.lint);
                                } else {
-                                       var contentmodel = 
env.page.meta.revision.contentmodel;
-                                       apiUtils.wt2htmlRes(env, res, out.html, 
out.pb, contentmodel);
-                                       env.log('end/response');  // Flush log 
buffer for linter
+                                       apiUtils.wt2htmlRes(env, res, out.html, 
out.pb, out.contentmodel);
                                }
-
                                var html = out.html;
                                if (metrics) {
                                        if 
(startTimers.has('wt2html.wt.parse')) {
@@ -565,7 +555,6 @@
                                        }
                                        metrics.endTiming('wt2html.total', 
startTimers.get('wt2html.total'));
                                }
-
                                apiUtils.logTime(env, res, 'parsing');
                        });
                });
@@ -575,17 +564,12 @@
                var env = res.locals.env;
                var opts = res.locals.opts;
 
-               env.scrubWikitext = apiUtils.shouldScrub(req, 
env.scrubWikitext);
+               var envOptions = Object.assign({
+                       scrubWikitext: apiUtils.shouldScrub(req, 
env.scrubWikitext),
+               }, res.locals.envOptions);
 
                // Performance Timing options
                var startTimers = new Map();
-
-               env.page.reset();
-               env.page.meta.revision.revid = res.locals.oldid;
-               env.page.meta.revision.contentmodel =
-                       opts.contentmodel ||
-                       (opts.original && opts.original.contentmodel) ||
-                       env.page.meta.revision.contentmodel;
 
                env.bumpSerializerResourceUse('htmlSize', html.length);
                env.log('info', 'started serializing');
@@ -640,6 +624,9 @@
                        DU.applyPageBundle(doc, pb);
                }
 
+               var oldhtml;
+               var oldtext = null;
+
                if (original) {
                        if (opts.from === 'pagebundle') {
                                // Apply the pagebundle to the parsed doc.  
This supports the
@@ -660,22 +647,23 @@
                                // independently, but we leave this for 
backwards compatibility
                                // until content version <= 1.2.0 is 
deprecated.  Anything new
                                // should only depend on `env.originalVersion`.
-                               env.page.dpContentType = 
(original['data-parsoid'].headers || {})['content-type'];
+                               envOptions.dpContentType = 
(original['data-parsoid'].headers || {})['content-type'];
                        }
 
                        // If we got original src, set it
                        if (original.wikitext) {
                                // Don't overwrite env.page.meta!
-                               env.page.src = original.wikitext.body;
+                               oldtext = original.wikitext.body;
                        }
 
                        // If we got original html, parse it
                        if (original.html) {
-                               env.page.dom = 
DU.parseHTML(original.html.body).body;
+                               var oldbody = 
DU.parseHTML(original.html.body).body;
                                if (opts.from === 'pagebundle') {
                                        apiUtils.validatePageBundle(origPb, 
env.originalVersion);
-                                       
DU.applyPageBundle(env.page.dom.ownerDocument, origPb);
+                                       
DU.applyPageBundle(oldbody.ownerDocument, origPb);
                                }
+                               oldhtml = DU.toXML(oldbody);
                        }
                }
 
@@ -683,20 +671,33 @@
                //   "Both it and the oldid parameter are needed for
                //    clean round-tripping of HTML retrieved earlier with"
                // So, no oldid => no selser
-               var hasOldId = !!env.page.meta.revision.revid;
+               var hasOldId = !!res.locals.oldid;
                var useSelser = hasOldId && env.conf.parsoid.useSelser;
 
-               var handler = env.getContentHandler();
-               return handler.fromHTML(env, doc.body, useSelser)
-               // .timeout(REQ_TIMEOUT)
-               .then(function(output) {
+               var selser;
+               if (useSelser) {
+                       selser = { oldtext: oldtext, oldhtml: oldhtml };
+               }
+
+               return parse({
+                       input: DU.toXML(doc),
+                       mode: useSelser ? 'selser' : 'html2wt',
+                       parsoidOptions: parsoidOptions,
+                       envOptions: envOptions,
+                       oldid: res.locals.oldid,
+                       selser: selser,
+                       contentmodel: opts.contentmodel ||
+                               (opts.original && opts.original.contentmodel),
+                       cacheConfig: true,
+               })
+               .then(function(out) {
                        if (metrics) {
                                metrics.endTiming('html2wt.total',
                                        startTimers.get('html2wt.total'));
-                               metrics.timing('html2wt.size.output', 
output.length);
+                               metrics.timing('html2wt.size.output', 
out.wt.length);
                        }
                        apiUtils.logTime(env, res, 'serializing');
-                       apiUtils.plainResponse(res, output, undefined, 
apiUtils.wikitextContentType(env));
+                       apiUtils.plainResponse(res, out.wt, undefined, 
apiUtils.wikitextContentType(env));
                });
        });
 
@@ -742,8 +743,7 @@
                                        updates: opts.updates,
                                        html: DU.toXML(doc),
                                };
-                               env.cacheReusableExpansions(reuseExpansions);
-                               return wt2html(req, res);
+                               return wt2html(req, res, undefined, 
reuseExpansions);
                        }
                } else {
                        return apiUtils.fatalRequest(env, 'We do not know how 
to do this conversion.', 415);
diff --git a/lib/html2wt/WikitextSerializer.js 
b/lib/html2wt/WikitextSerializer.js
index 14a8ce0..2fc2a1a 100644
--- a/lib/html2wt/WikitextSerializer.js
+++ b/lib/html2wt/WikitextSerializer.js
@@ -420,7 +420,7 @@
        var defaultBlockSpc  = '{{_\n| _ = _\n}}'; // "block"
        var defaultInlineSpc = '{{_|_=_}}'; // "inline"
        // FIXME: Do a full regexp test maybe?
-       if (/.*data-parsoid\/0.0.1"$/.test(this.env.page.dpContentType)) {
+       if (/.*data-parsoid\/0.0.1"$/.test(this.env.dpContentType)) {
                // For previous versions of data-parsoid,
                // wt2html pipeline used "|foo = bar" style args
                // as the default.
diff --git a/lib/index.js b/lib/index.js
index 9170bae..b22fba4 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -5,7 +5,6 @@
 var path = require('path');
 var json = require('../package.json');
 var parseJs = require('./parse.js');
-var ParsoidConfig = require('./config/ParsoidConfig.js').ParsoidConfig;
 var ParsoidService = require('./api/ParsoidService.js');
 
 /**
@@ -49,6 +48,5 @@
        if (parsoidOptions.localsettings) {
                parsoidOptions.localsettings = 
path.resolve(options.appBasePath, parsoidOptions.localsettings);
        }
-       var parsoidConfig = new ParsoidConfig(null, parsoidOptions);
-       return ParsoidService.init(parsoidConfig, options.logger);
+       return ParsoidService.init(parsoidOptions, options.logger);
 };

-- 
To view, visit https://gerrit.wikimedia.org/r/394601
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I20f6cc3078853bbc462e5d565269d97bf097f880
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to