jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/394601 )
Change subject: Use lib/parse.js in api/routes.js
......................................................................
Use lib/parse.js in api/routes.js
Change-Id: I20f6cc3078853bbc462e5d565269d97bf097f880
---
M lib/api/ParsoidService.js
M lib/api/apiUtils.js
M lib/api/routes.js
M lib/html2wt/WikitextSerializer.js
M lib/index.js
5 files changed, 98 insertions(+), 99 deletions(-)
Approvals:
Subramanya Sastry: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/api/ParsoidService.js b/lib/api/ParsoidService.js
index b4d52ae..4953b67 100644
--- a/lib/api/ParsoidService.js
+++ b/lib/api/ParsoidService.js
@@ -18,6 +18,8 @@
var util = require('util');
var Promise = require('../utils/promise.js');
+var ParsoidConfig = require('../config/ParsoidConfig.js').ParsoidConfig;
+var parseJsPath = require.resolve('../parse.js');
/**
* ParsoidService
@@ -33,15 +35,17 @@
* Instantiates an [express](http://expressjs.com/) server
* to handle HTTP requests and begins listening on the configured port.
*
- * @param {ParsoidConfig} parsoidConfig
+ * @param {Object} parsoidOptions
* @param {Object} processLogger
* WARNING: `processLogger` is not necessarily an instance of `Logger`.
* The interface is merely that exposed by service-runner, `log(level,
info)`.
* Don't expect it to exit after you've logged "fatal" and other such things.
* @return {Promise} server
*/
-ParsoidService.init = Promise.method(function(parsoidConfig, processLogger) {
+ParsoidService.init = Promise.method(function(parsoidOptions, processLogger) {
processLogger.log('info', 'loading ...');
+
+ var parsoidConfig = new ParsoidConfig(null, parsoidOptions);
// Get host and port from the environment, if available
// note: in production, the port is exposed via the 'port' config
stanza and
@@ -55,8 +59,10 @@
// and 'serverInterface' is the legacy option name
var host = parsoidConfig.interface || parsoidConfig.serverInterface ||
process.env.INTERFACE;
+ var parse = require(parseJsPath);
+
// Load routes
- var routes = require('./routes')(parsoidConfig, processLogger);
+ var routes = require('./routes')(parsoidConfig, processLogger,
parsoidOptions, parse);
var app = express();
@@ -205,7 +211,8 @@
reject(err);
});
server = app.listen(port, host, resolve);
- }).then(function() {
+ })
+ .then(function() {
port = server.address().port;
processLogger.log('info',
util.format('ready on %s:%s', host || '', port));
diff --git a/lib/api/apiUtils.js b/lib/api/apiUtils.js
index d6a1d35..8172d0f 100644
--- a/lib/api/apiUtils.js
+++ b/lib/api/apiUtils.js
@@ -236,13 +236,7 @@
}
// Now pass it to the MediaWiki API with onlypst set so that it
// subst's the templates.
- return PHPParseRequest.promise(env, target, wt,
true).then(function(wikitext) {
- // Set data-parsoid to be discarded, so that the subst'ed
- // content is considered new when it comes back.
- env.discardDataParsoid = true;
- // Use the returned wikitext as the page source.
- return wikitext;
- });
+ return PHPParseRequest.promise(env, target, wt, true);
};
apiUtils.wikitextContentType = function(env) {
diff --git a/lib/api/routes.js b/lib/api/routes.js
index d09d554..c6ecb22 100644
--- a/lib/api/routes.js
+++ b/lib/api/routes.js
@@ -18,8 +18,7 @@
var TemplateRequest = ApiRequest.TemplateRequest;
-
-module.exports = function(parsoidConfig, processLogger) {
+module.exports = function(parsoidConfig, processLogger, parsoidOptions, parse)
{
var routes = {};
var metrics = parsoidConfig.metrics;
var REQ_TIMEOUT = parsoidConfig.timeouts.request;
@@ -50,6 +49,10 @@
res.locals.bodyOnly = !!(req.query.body || req.body.body);
// "subst" flag to perform {{subst:}} template expansion
res.locals.subst = !!(req.query.subst || req.body.subst);
+ res.locals.envOptions = {
+ prefix: res.locals.iwp,
+ pageName: res.locals.pageName,
+ };
next();
};
@@ -106,6 +109,14 @@
return errOut(res, 'Invalid domain: ' +
req.params.domain);
}
res.locals.iwp = iwp;
+
+ res.locals.envOptions = {
+ prefix: res.locals.iwp,
+ pageName: res.locals.pageName,
+ cookie: req.headers.cookie,
+ reqId: req.headers['x-request-id'],
+ userAgent: req.headers['user-agent'],
+ };
// "subst" flag to perform {{subst:}} template expansion
res.locals.subst = !!(req.query.subst || req.body.subst);
@@ -179,14 +190,7 @@
}
return Promise.resolve().nodify(callback);
}
- var options = {
- prefix: res.locals.iwp,
- pageName: res.locals.pageName,
- cookie: req.headers.cookie,
- reqId: req.headers['x-request-id'],
- userAgent: req.headers['user-agent'],
- };
- MWParserEnv.getParserEnv(parsoidConfig, options)
+ MWParserEnv.getParserEnv(parsoidConfig, res.locals.envOptions)
.then(function(env) {
env.logger.registerBackend(/fatal(\/.*)?/, errBack);
res.locals.env = env;
@@ -399,20 +403,13 @@
// Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
- var wt2html = Promise.method(function(req, res, wt) {
+ var wt2html = Promise.method(function(req, res, wt, reuseExpansions) {
var env = res.locals.env;
var opts = res.locals.opts;
var oldid = res.locals.oldid;
var target = env.normalizeAndResolvePageTitle();
- // VE, the only client using bodyOnly property,
- // doesn't want section tags when this flag is set.
- if (res.locals.bodyOnly) {
- env.wrapSections = false;
- }
-
var pageBundle = !!(res.locals.opts && res.locals.opts.format
=== 'pagebundle');
- env.pageBundle = pageBundle;
// Performance Timing options
var startTimers = new Map();
@@ -439,7 +436,8 @@
(typeof wt !== 'string' ? env.page.src :
wt).length);
});
- if (typeof wt === 'string' && res.locals.subst) {
+ var doSubst = (typeof wt === 'string' && res.locals.subst);
+ if (doSubst) {
p = p.then(function(wikitext) {
// FIXME: reset limits after subst'ing
return apiUtils.substTopLevelTemplates(env,
target, wikitext);
@@ -473,14 +471,20 @@
env.log('info', 'started parsing');
- var p2;
+ var envOptions = Object.assign({
+ pageBundle: pageBundle,
+ // Set data-parsoid to be discarded, so that
the subst'ed
+ // content is considered new when it comes back.
+ discardDataParsoid: doSubst,
+ }, res.locals.envOptions);
+
+ // VE, the only client using bodyOnly property,
+ // doesn't want section tags when this flag is set.
+ if (res.locals.bodyOnly) {
+ envOptions.wrapSections = false;
+ }
+
if (typeof wikitext === 'string') {
- env.setPageSrcInfo(wikitext);
-
- if (opts.contentmodel) {
- env.page.meta.revision.contentmodel =
opts.contentmodel;
- }
-
// Don't cache requests when wt is set in case
somebody uses
// GET for wikitext parsing
apiUtils.setHeader(res, 'Cache-Control',
'private,no-cache,s-maxage=0');
@@ -496,10 +500,13 @@
// clear default page name
env.page.name = '';
}
-
- p2 = env.getContentHandler().toHTML(env);
} else if (oldid) {
- env.pageWithOldid = true;
+ envOptions.pageWithOldid = true;
+
+ if (req.headers.cookie) {
+ // Don't cache requests with a session.
+ apiUtils.setHeader(res,
'Cache-Control', 'private,no-cache,s-maxage=0');
+ }
// Indicate the MediaWiki revision in a header
as well for
// ease of extraction in clients.
@@ -511,47 +518,30 @@
startTimers.set('wt2html.pageWithOldid.parse', Date.now());
metrics.timing('wt2html.pageWithOldid.size.input', env.page.src.length);
}
-
- p2 = env.getContentHandler().toHTML(env)
- .tap(function() {
- if (req.headers.cookie) {
- // Don't cache requests with a
session.
- apiUtils.setHeader(res,
'Cache-Control', 'private,no-cache,s-maxage=0');
- }
- });
} else {
console.assert(false, 'Should be unreachable');
}
- return p2
- .tap(function(doc) {
- if (env.conf.parsoid.useBatchAPI) {
- return DU.addRedLinks(env, doc);
- }
+ return parse({
+ // NOTE: This causes another TemplateRequest
but otherwise
+ // we don't have all the metadata.
+ input: (typeof wikitext === 'string') ?
wikitext : undefined,
+ mode: 'wt2html',
+ parsoidOptions: parsoidOptions,
+ envOptions: envOptions,
+ oldid: oldid,
+ contentmodel: opts.contentmodel,
+ contentVersion: env.contentVersion,
+ bodyOnly: res.locals.bodyOnly,
+ cacheConfig: true,
+ reuseExpansions: reuseExpansions,
})
- // .timeout(REQ_TIMEOUT)
- .then(function(doc) {
- var out;
- if (pageBundle) {
- out =
DU.extractDpAndSerialize(res.locals.bodyOnly ? doc.body : doc, {
- innerXML: res.locals.bodyOnly,
- });
- } else {
- out = {
- html:
DU.toXML(res.locals.bodyOnly ? doc.body : doc, {
- innerXML:
res.locals.bodyOnly,
- }),
- };
- }
-
+ .then(function(out) {
if (opts.format === 'lint') {
- apiUtils.jsonResponse(res,
env.lintLogger.buffer);
+ apiUtils.jsonResponse(res, out.lint);
} else {
- var contentmodel =
env.page.meta.revision.contentmodel;
- apiUtils.wt2htmlRes(env, res, out.html,
out.pb, contentmodel);
- env.log('end/response'); // Flush log
buffer for linter
+ apiUtils.wt2htmlRes(env, res, out.html,
out.pb, out.contentmodel);
}
-
var html = out.html;
if (metrics) {
if
(startTimers.has('wt2html.wt.parse')) {
@@ -565,7 +555,6 @@
}
metrics.endTiming('wt2html.total',
startTimers.get('wt2html.total'));
}
-
apiUtils.logTime(env, res, 'parsing');
});
});
@@ -575,17 +564,12 @@
var env = res.locals.env;
var opts = res.locals.opts;
- env.scrubWikitext = apiUtils.shouldScrub(req,
env.scrubWikitext);
+ var envOptions = Object.assign({
+ scrubWikitext: apiUtils.shouldScrub(req,
env.scrubWikitext),
+ }, res.locals.envOptions);
// Performance Timing options
var startTimers = new Map();
-
- env.page.reset();
- env.page.meta.revision.revid = res.locals.oldid;
- env.page.meta.revision.contentmodel =
- opts.contentmodel ||
- (opts.original && opts.original.contentmodel) ||
- env.page.meta.revision.contentmodel;
env.bumpSerializerResourceUse('htmlSize', html.length);
env.log('info', 'started serializing');
@@ -640,6 +624,9 @@
DU.applyPageBundle(doc, pb);
}
+ var oldhtml;
+ var oldtext = null;
+
if (original) {
if (opts.from === 'pagebundle') {
// Apply the pagebundle to the parsed doc.
This supports the
@@ -660,22 +647,23 @@
// independently, but we leave this for
backwards compatibility
// until content version <= 1.2.0 is
deprecated. Anything new
// should only depend on `env.originalVersion`.
- env.page.dpContentType =
(original['data-parsoid'].headers || {})['content-type'];
+ envOptions.dpContentType =
(original['data-parsoid'].headers || {})['content-type'];
}
// If we got original src, set it
if (original.wikitext) {
// Don't overwrite env.page.meta!
- env.page.src = original.wikitext.body;
+ oldtext = original.wikitext.body;
}
// If we got original html, parse it
if (original.html) {
- env.page.dom =
DU.parseHTML(original.html.body).body;
+ var oldbody =
DU.parseHTML(original.html.body).body;
if (opts.from === 'pagebundle') {
apiUtils.validatePageBundle(origPb,
env.originalVersion);
-
DU.applyPageBundle(env.page.dom.ownerDocument, origPb);
+
DU.applyPageBundle(oldbody.ownerDocument, origPb);
}
+ oldhtml = DU.toXML(oldbody);
}
}
@@ -683,20 +671,33 @@
// "Both it and the oldid parameter are needed for
// clean round-tripping of HTML retrieved earlier with"
// So, no oldid => no selser
- var hasOldId = !!env.page.meta.revision.revid;
+ var hasOldId = !!res.locals.oldid;
var useSelser = hasOldId && env.conf.parsoid.useSelser;
- var handler = env.getContentHandler();
- return handler.fromHTML(env, doc.body, useSelser)
- // .timeout(REQ_TIMEOUT)
- .then(function(output) {
+ var selser;
+ if (useSelser) {
+ selser = { oldtext: oldtext, oldhtml: oldhtml };
+ }
+
+ return parse({
+ input: DU.toXML(doc),
+ mode: useSelser ? 'selser' : 'html2wt',
+ parsoidOptions: parsoidOptions,
+ envOptions: envOptions,
+ oldid: res.locals.oldid,
+ selser: selser,
+ contentmodel: opts.contentmodel ||
+ (opts.original && opts.original.contentmodel),
+ cacheConfig: true,
+ })
+ .then(function(out) {
if (metrics) {
metrics.endTiming('html2wt.total',
startTimers.get('html2wt.total'));
- metrics.timing('html2wt.size.output',
output.length);
+ metrics.timing('html2wt.size.output',
out.wt.length);
}
apiUtils.logTime(env, res, 'serializing');
- apiUtils.plainResponse(res, output, undefined,
apiUtils.wikitextContentType(env));
+ apiUtils.plainResponse(res, out.wt, undefined,
apiUtils.wikitextContentType(env));
});
});
@@ -742,8 +743,7 @@
updates: opts.updates,
html: DU.toXML(doc),
};
- env.cacheReusableExpansions(reuseExpansions);
- return wt2html(req, res);
+ return wt2html(req, res, undefined,
reuseExpansions);
}
} else {
return apiUtils.fatalRequest(env, 'We do not know how
to do this conversion.', 415);
diff --git a/lib/html2wt/WikitextSerializer.js
b/lib/html2wt/WikitextSerializer.js
index 14a8ce0..2fc2a1a 100644
--- a/lib/html2wt/WikitextSerializer.js
+++ b/lib/html2wt/WikitextSerializer.js
@@ -420,7 +420,7 @@
var defaultBlockSpc = '{{_\n| _ = _\n}}'; // "block"
var defaultInlineSpc = '{{_|_=_}}'; // "inline"
// FIXME: Do a full regexp test maybe?
- if (/.*data-parsoid\/0.0.1"$/.test(this.env.page.dpContentType)) {
+ if (/.*data-parsoid\/0.0.1"$/.test(this.env.dpContentType)) {
// For previous versions of data-parsoid,
// wt2html pipeline used "|foo = bar" style args
// as the default.
diff --git a/lib/index.js b/lib/index.js
index 9170bae..b22fba4 100644
--- a/lib/index.js
+++ b/lib/index.js
@@ -5,7 +5,6 @@
var path = require('path');
var json = require('../package.json');
var parseJs = require('./parse.js');
-var ParsoidConfig = require('./config/ParsoidConfig.js').ParsoidConfig;
var ParsoidService = require('./api/ParsoidService.js');
/**
@@ -49,6 +48,5 @@
if (parsoidOptions.localsettings) {
parsoidOptions.localsettings =
path.resolve(options.appBasePath, parsoidOptions.localsettings);
}
- var parsoidConfig = new ParsoidConfig(null, parsoidOptions);
- return ParsoidService.init(parsoidConfig, options.logger);
+ return ParsoidService.init(parsoidOptions, options.logger);
};
--
To view, visit https://gerrit.wikimedia.org/r/394601
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I20f6cc3078853bbc462e5d565269d97bf097f880
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: C. Scott Ananian <[email protected]>
Gerrit-Reviewer: Sbailey <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits