jenkins-bot has submitted this change and it was merged.
Change subject: Move some code back to routes.js that is no longer shared
......................................................................
Move some code back to routes.js that is no longer shared
Change-Id: I98c80945cd94c76f425b6f013612227fd95657c9
---
M lib/api/apiUtils.js
M lib/api/routes.js
2 files changed, 243 insertions(+), 291 deletions(-)
Approvals:
Cscott: Looks good to me, approved
jenkins-bot: Verified
diff --git a/lib/api/apiUtils.js b/lib/api/apiUtils.js
index dd81339..78c4a1c 100644
--- a/lib/api/apiUtils.js
+++ b/lib/api/apiUtils.js
@@ -10,10 +10,7 @@
var DU = require('../utils/DOMUtils.js').DOMUtils;
var PegTokenizer = require('../wt2html/tokenizer.js').PegTokenizer;
var Promise = require('../utils/promise.js');
-var ApiRequest = require('../mw/ApiRequest.js');
-
-var TemplateRequest = ApiRequest.TemplateRequest;
-var PHPParseRequest = ApiRequest.PHPParseRequest;
+var PHPParseRequest = require('../mw/ApiRequest.js').PHPParseRequest;
/**
@@ -244,75 +241,12 @@
});
};
-apiUtils.startHtml2wt = Promise.method(function(req, res, html) {
- var env = res.locals.env;
-
- env.bumpSerializerResourceUse('htmlSize', html.length);
- env.page.id = res.locals.oldid;
- env.log('info', 'started serializing');
-
- // Performance Timing options
- var stats = env.conf.parsoid.stats;
- var startTimers;
-
- if (stats) {
- startTimers = new Map();
- startTimers.set('html2wt.init', Date.now());
- startTimers.set('html2wt.total', Date.now());
- startTimers.set('html2wt.init.domparse', Date.now());
- }
-
- var doc = DU.parseHTML(html);
-
- // send domparse time, input size and init time to statsd/Graphite
- // init time is the time elapsed before serialization
- // init.domParse, a component of init time, is the time elapsed from
html string to DOM tree
- if (stats) {
- stats.timing('html2wt.init.domparse', '',
- Date.now() - startTimers.get('html2wt.init.domparse'));
- stats.timing('html2wt.size.input', '', html.length);
- stats.timing('html2wt.init', '',
- Date.now() - startTimers.get('html2wt.init'));
- }
-
- return {
- env: env,
- res: res,
- doc: doc,
- startTimers: startTimers,
- };
-});
-
-apiUtils.endHtml2wt = function(ret) {
- var env = ret.env;
- var stats = env.conf.parsoid.stats;
- // var REQ_TIMEOUT = env.conf.parsoid.timeouts.request;
-
- // As per https://www.mediawiki.org/wiki/Parsoid/API#v1_API_entry_points
- // "Both it and the oldid parameter are needed for
- // clean round-tripping of HTML retrieved earlier with"
- // So, no oldid => no selser
- var hasOldId = (env.page.id && env.page.id !== '0');
- var useSelser = hasOldId && env.conf.parsoid.useSelser;
- return DU.serializeDOM(env, ret.doc.body, useSelser)
- // .timeout(REQ_TIMEOUT)
- .then(function(output) {
- if (stats) {
- stats.timing('html2wt.total', '',
- Date.now() -
ret.startTimers.get('html2wt.total'));
- stats.timing('html2wt.size.output', '', output.length);
- }
- apiUtils.logTime(env, ret.res, 'serializing');
- return output;
- });
-};
-
// To support the 'subst' API parameter, we need to prefix each
// top-level template with 'subst'. To make sure we do this for the
// correct templates, tokenize the starting wikitext and use that to
// detect top-level templates. Then, substitute each starting '{{' with
// '{{subst' using the template token's tsr.
-var substTopLevelTemplates = function(env, target, wt) {
+apiUtils.substTopLevelTemplates = function(env, target, wt) {
var tokenizer = new PegTokenizer(env);
var tokens = tokenizer.tokenize(wt, null, null, true);
var tsrIncr = 0;
@@ -336,144 +270,6 @@
});
};
-apiUtils.startWt2html = Promise.method(function(req, res, wt) {
- var env = res.locals.env;
-
- // Check early if we have a wt string.
- if (typeof wt === 'string') {
- env.bumpParserResourceUse('wikitextSize', wt.length);
- }
-
- // Performance Timing options
- var stats = env.conf.parsoid.stats;
- var startTimers;
-
- if (stats) {
- startTimers = new Map();
- // init refers to time elapsed before parsing begins
- startTimers.set('wt2html.init', Date.now());
- startTimers.set('wt2html.total', Date.now());
- }
-
- var prefix = res.locals.iwp;
- var oldid = res.locals.oldid;
- var target = env.normalizeAndResolvePageTitle();
-
- var p = Promise.resolve(wt);
-
- if (oldid || typeof wt !== 'string') {
- // Always fetch the page info if we have an oldid.
- // Otherwise, if no wt was passed, we need to figure out
- // the latest revid to which we'll redirect.
- p = p.tap(function() {
- return TemplateRequest.setPageSrcInfo(env, target,
oldid);
- }).tap(function() {
- // Now that we have the page src, check if we're using
that as wt.
- if (typeof wt !== 'string') {
- env.bumpParserResourceUse('wikitextSize',
env.page.src.length);
- }
- });
- }
-
- if (typeof wt === 'string' && res.locals.subst) {
- p = p.then(function(wikitext) {
- // FIXME: reset limits after subst'ing
- return substTopLevelTemplates(env, target, wikitext);
- });
- }
-
- return p.then(function(wikitext) {
- return {
- req: req,
- res: res,
- env: env,
- startTimers: startTimers,
- oldid: oldid,
- target: target,
- prefix: prefix,
- // Calling this wikitext so that it's easily
distinguishable.
- // It may have been modified by substTopLevelTemplates.
- wikitext: wikitext,
- };
- });
-});
-
-apiUtils.redirectToRevision = function(env, res, path, revid) {
- var stats = env.conf.parsoid.stats;
- env.log('info', 'redirecting to revision', revid);
-
- if (stats) {
- stats.count('wt2html.redirectToOldid', '');
- }
-
- // Don't cache requests with no oldid
- apiUtils.setHeader(res, env, 'Cache-Control',
'private,no-cache,s-maxage=0');
- apiUtils.relativeRedirect({ 'path': path, 'res': res, 'env': env });
-};
-
-apiUtils.parsePageWithOldid = function(ret) {
- var env = ret.env;
- var stats = env.conf.parsoid.stats;
- var startTimers = ret.startTimers;
- env.log('info', 'started parsing');
-
- // Indicate the MediaWiki revision in a header as well for
- // ease of extraction in clients.
- apiUtils.setHeader(ret.res, env, 'content-revision-id', ret.oldid);
-
- if (stats) {
- stats.timing('wt2html.pageWithOldid.init', '',
- Date.now() - startTimers.get('wt2html.init'));
- startTimers.set('wt2html.pageWithOldid.parse', Date.now());
- stats.timing('wt2html.pageWithOldid.size.input', '',
env.page.src.length);
- }
-
- var expansions = ret.reuse && ret.reuse.expansions;
- if (expansions) {
- // Figure out what we can reuse
- switch (ret.reuse.mode) {
- case "templates":
- // Transclusions need to be updated, so don't reuse
them.
- expansions.transclusions = {};
- break;
- case "files":
- // Files need to be updated, so don't reuse them.
- expansions.files = {};
- break;
- }
- }
-
- return env.pipelineFactory.parse(env, env.page.src, expansions);
-};
-
-apiUtils.parseWt = function(ret) {
- var env = ret.env;
- var res = ret.res;
- var stats = env.conf.parsoid.stats;
- var startTimers = ret.startTimers;
-
- env.log('info', 'started parsing');
- env.setPageSrcInfo(ret.wikitext);
-
- // Don't cache requests when wt is set in case somebody uses
- // GET for wikitext parsing
- apiUtils.setHeader(res, env, 'Cache-Control',
'private,no-cache,s-maxage=0');
-
- if (stats) {
- stats.timing('wt2html.wt.init', '',
- Date.now() - startTimers.get('wt2html.init'));
- startTimers.set('wt2html.wt.parse', Date.now());
- stats.timing('wt2html.wt.size.input', '', ret.wikitext.length);
- }
-
- if (!res.locals.pageName) {
- // clear default page name
- env.page.name = '';
- }
-
- return env.pipelineFactory.parse(env, ret.wikitext);
-};
-
apiUtils.wikitextContentType = function(env) {
return 'text/plain; charset=utf-8;
profile="https://www.mediawiki.org/wiki/Specs/wikitext/' +
env.conf.parsoid.WIKITEXT_VERSION + '"';
};
@@ -484,61 +280,6 @@
apiUtils.dataParsoidContentType = function(env) {
return 'application/json; charset=utf-8;
profile="https://www.mediawiki.org/wiki/Specs/data-parsoid/' +
env.conf.parsoid.DATA_PARSOID_VERSION + '"';
-};
-
-apiUtils.endWt2html = function(ret, doc, output) {
- var env = ret.env;
- var res = ret.res;
- var stats = env.conf.parsoid.stats;
- var startTimers = ret.startTimers;
-
- if (doc) {
- output = DU.toXML(res.locals.bodyOnly ? doc.body : doc, {
- innerXML: res.locals.bodyOnly,
- });
- apiUtils.setHeader(res, env, 'content-type',
apiUtils.htmlContentType(env));
- apiUtils.sendResponse(res, env, output);
- }
-
- if (stats) {
- if (startTimers.has('wt2html.wt.parse')) {
- stats.timing('wt2html.wt.parse', '',
- Date.now() -
startTimers.get('wt2html.wt.parse'));
- stats.timing('wt2html.wt.size.output', '',
output.length);
- } else if (startTimers.has('wt2html.pageWithOldid.parse')) {
- stats.timing('wt2html.pageWithOldid.parse', '',
- Date.now() -
startTimers.get('wt2html.pageWithOldid.parse'));
- stats.timing('wt2html.pageWithOldid.size.output', '',
output.length);
- }
- stats.timing('wt2html.total', '',
- Date.now() - startTimers.get('wt2html.total'));
- }
-
- apiUtils.logTime(env, res, 'parsing');
-};
-
-apiUtils.v3endWt2html = function(ret, doc) {
- var env = ret.env;
- var res = ret.res;
- var opts = res.locals.opts;
- if (opts.format === 'pagebundle') {
- var out = DU.extractDpAndSerialize(res.locals.bodyOnly ?
doc.body : doc, {
- innerXML: res.locals.bodyOnly,
- });
- apiUtils.jsonResponse(res, env, {
- html: {
- headers: { 'content-type':
apiUtils.htmlContentType(env) },
- body: out.str,
- },
- 'data-parsoid': {
- headers: { 'content-type': out.type },
- body: out.dp,
- },
- });
- apiUtils.endWt2html(ret, null, out.str);
- } else {
- apiUtils.endWt2html(ret, doc);
- }
};
/**
diff --git a/lib/api/routes.js b/lib/api/routes.js
index 39b9e4e..332a914 100644
--- a/lib/api/routes.js
+++ b/lib/api/routes.js
@@ -364,16 +364,115 @@
// Spec'd in https://phabricator.wikimedia.org/T75955 and the API tests.
- var v3Wt2html = function(req, res, wt) {
+ var wt2html = function(req, res, wt) {
var env = res.locals.env;
var opts = res.locals.opts;
- var p = apiUtils.startWt2html(req, res, wt).then(function(ret) {
+
+ // Performance Timing options
+ var stats = env.conf.parsoid.stats;
+ var startTimers = new Map();
+
+ var p = Promise.method(function() {
+ // Check early if we have a wt string.
+ if (typeof wt === 'string') {
+ env.bumpParserResourceUse('wikitextSize',
wt.length);
+ }
+
+ if (stats) {
+ // init refers to time elapsed before parsing
begins
+ startTimers.set('wt2html.init', Date.now());
+ startTimers.set('wt2html.total', Date.now());
+ }
+
+ var prefix = res.locals.iwp;
+ var oldid = res.locals.oldid;
+ var target = env.normalizeAndResolvePageTitle();
+
+ var p2 = Promise.resolve(wt);
+
+ if (oldid || typeof wt !== 'string') {
+ // Always fetch the page info if we have an
oldid.
+ // Otherwise, if no wt was passed, we need to
figure out
+ // the latest revid to which we'll redirect.
+ p2 = p2.tap(function() {
+ return
TemplateRequest.setPageSrcInfo(env, target, oldid);
+ }).tap(function() {
+ // Now that we have the page src, check
if we're using that as wt.
+ if (typeof wt !== 'string') {
+
env.bumpParserResourceUse('wikitextSize', env.page.src.length);
+ }
+ });
+ }
+
+ if (typeof wt === 'string' && res.locals.subst) {
+ p2 = p2.then(function(wikitext) {
+ // FIXME: reset limits after subst'ing
+ return
apiUtils.substTopLevelTemplates(env, target, wikitext);
+ });
+ }
+
+ return p2.then(function(wikitext) {
+ return {
+ req: req,
+ res: res,
+ env: env,
+ startTimers: startTimers,
+ oldid: oldid,
+ target: target,
+ prefix: prefix,
+ // Calling this wikitext so that it's
easily distinguishable.
+ // It may have been modified by
substTopLevelTemplates.
+ wikitext: wikitext,
+ };
+ });
+ })().then(function(ret) {
+ if (typeof ret.wikitext !== 'string' && !ret.oldid) {
+ var revid = env.page.meta.revision.revid;
+ var path = [
+ '',
+
env.conf.parsoid.mwApiMap.get(ret.prefix).domain,
+ 'v3',
+ 'page',
+ opts.format,
+ encodeURIComponent(ret.target),
+ revid,
+ ].join('/');
+ if (Object.keys(req.query).length > 0) {
+ path += '?' + qs.stringify(req.query);
+ }
+ env.log('info', 'redirecting to revision',
revid);
+ if (stats) {
+ stats.count('wt2html.redirectToOldid',
'');
+ }
+ // Don't cache requests with no oldid
+ apiUtils.setHeader(res, env, 'Cache-Control',
'private,no-cache,s-maxage=0');
+ apiUtils.relativeRedirect({ 'path': path,
'res': res, 'env': env });
+ return;
+ }
+ var p2;
if (typeof ret.wikitext === 'string') {
- return apiUtils.parseWt(ret)
- // .timeout(REQ_TIMEOUT)
- .then(apiUtils.v3endWt2html.bind(null,
ret));
+ env.log('info', 'started parsing');
+ env.setPageSrcInfo(ret.wikitext);
+
+ // Don't cache requests when wt is set in case
somebody uses
+ // GET for wikitext parsing
+ apiUtils.setHeader(res, env, 'Cache-Control',
'private,no-cache,s-maxage=0');
+
+ if (stats) {
+ stats.timing('wt2html.wt.init', '',
+ Date.now() -
startTimers.get('wt2html.init'));
+ startTimers.set('wt2html.wt.parse',
Date.now());
+ stats.timing('wt2html.wt.size.input',
'', ret.wikitext.length);
+ }
+
+ if (!res.locals.pageName) {
+ // clear default page name
+ env.page.name = '';
+ }
+
+ p2 = env.pipelineFactory.parse(env,
ret.wikitext);
} else if (ret.oldid) {
- var p2 = Promise.resolve(ret);
+ p2 = Promise.resolve(ret);
// See if we can reuse transclusion or
extension expansions.
var revision = opts.previous || opts.original;
if (revision) {
@@ -398,30 +497,86 @@
return ret2;
});
}
- return
p2.then(apiUtils.parsePageWithOldid).tap(function() {
+ p2 = p2.then(function(ret2) {
+ env.log('info', 'started parsing');
+
+ // Indicate the MediaWiki revision in a
header as well for
+ // ease of extraction in clients.
+ apiUtils.setHeader(ret2.res, env,
'content-revision-id', ret2.oldid);
+
+ if (stats) {
+
stats.timing('wt2html.pageWithOldid.init', '',
+ Date.now() -
startTimers.get('wt2html.init'));
+
startTimers.set('wt2html.pageWithOldid.parse', Date.now());
+
stats.timing('wt2html.pageWithOldid.size.input', '', env.page.src.length);
+ }
+
+ var expansions = ret2.reuse &&
ret2.reuse.expansions;
+ if (expansions) {
+ // Figure out what we can reuse
+ switch (ret2.reuse.mode) {
+ case "templates":
+ // Transclusions need
to be updated, so don't reuse them.
+
expansions.transclusions = {};
+ break;
+ case "files":
+ // Files need to be
updated, so don't reuse them.
+ expansions.files = {};
+ break;
+ }
+ }
+
+ return env.pipelineFactory.parse(env,
env.page.src, expansions);
+ }).tap(function() {
if (req.headers.cookie) {
// Don't cache requests with a
session.
apiUtils.setHeader(res, env,
'Cache-Control', 'private,no-cache,s-maxage=0');
}
- })
- // .timeout(REQ_TIMEOUT)
- .then(apiUtils.v3endWt2html.bind(null, ret));
- } else {
- var revid = env.page.meta.revision.revid;
- var path = [
- '',
-
env.conf.parsoid.mwApiMap.get(ret.prefix).domain,
- 'v3',
- 'page',
- opts.format,
- encodeURIComponent(ret.target),
- revid,
- ].join('/');
- if (Object.keys(req.query).length > 0) {
- path += '?' + qs.stringify(req.query);
- }
- apiUtils.redirectToRevision(env, res, path,
revid);
+ });
}
+ return p2
+ // .timeout(REQ_TIMEOUT)
+ .then(function(doc) {
+ var output;
+ if (opts.format === 'pagebundle') {
+ var out =
DU.extractDpAndSerialize(res.locals.bodyOnly ? doc.body : doc, {
+ innerXML: res.locals.bodyOnly,
+ });
+ apiUtils.jsonResponse(res, env, {
+ html: {
+ headers: {
'content-type': apiUtils.htmlContentType(env) },
+ body: out.str,
+ },
+ 'data-parsoid': {
+ headers: {
'content-type': out.type },
+ body: out.dp,
+ },
+ });
+ output = out.str;
+ } else {
+ output = DU.toXML(res.locals.bodyOnly ?
doc.body : doc, {
+ innerXML: res.locals.bodyOnly,
+ });
+ apiUtils.setHeader(res, env,
'content-type', apiUtils.htmlContentType(env));
+ apiUtils.sendResponse(res, env, output);
+ }
+
+ if (stats) {
+ if
(startTimers.has('wt2html.wt.parse')) {
+
stats.timing('wt2html.wt.parse', '',
+ Date.now() -
startTimers.get('wt2html.wt.parse'));
+
stats.timing('wt2html.wt.size.output', '', output.length);
+ } else if
(startTimers.has('wt2html.pageWithOldid.parse')) {
+
stats.timing('wt2html.pageWithOldid.parse', '',
+ Date.now() -
startTimers.get('wt2html.pageWithOldid.parse'));
+
stats.timing('wt2html.pageWithOldid.size.output', '', output.length);
+ }
+ stats.timing('wt2html.total', '',
+ Date.now() -
startTimers.get('wt2html.total'));
+ }
+
+ apiUtils.logTime(env, res, 'parsing');
+ });
});
return apiUtils.cpuTimeout(p, res)
.catch(apiUtils.timeoutResp.bind(null, env));
@@ -429,7 +584,7 @@
// GET requests
routes.v3Get = function(req, res) {
- return v3Wt2html(req, res);
+ return wt2html(req, res);
};
// POST requests
@@ -451,7 +606,7 @@
wikitext = opts.original.wikitext.body;
}
}
- return v3Wt2html(req, res, wikitext);
+ return wt2html(req, res, wikitext);
} else {
// html is required for serialization
if (opts.html === undefined) {
@@ -465,7 +620,43 @@
env.setPageSrcInfo(opts.original.wikitext.body);
}
- var p = apiUtils.startHtml2wt(req, res,
html).then(function(ret) {
+ var p = Promise.method(function() {
+ env.bumpSerializerResourceUse('htmlSize',
html.length);
+ env.page.id = res.locals.oldid;
+ env.log('info', 'started serializing');
+
+ // Performance Timing options
+ var stats = env.conf.parsoid.stats;
+ var startTimers;
+
+ if (stats) {
+ startTimers = new Map();
+ startTimers.set('html2wt.init',
Date.now());
+ startTimers.set('html2wt.total',
Date.now());
+
startTimers.set('html2wt.init.domparse', Date.now());
+ }
+
+ var doc = DU.parseHTML(html);
+
+ // send domparse time, input size and init time
to statsd/Graphite
+ // init time is the time elapsed before
serialization
+ // init.domParse, a component of init time, is
the time elapsed
+ // from html string to DOM tree
+ if (stats) {
+ stats.timing('html2wt.init.domparse',
'',
+ Date.now() -
startTimers.get('html2wt.init.domparse'));
+ stats.timing('html2wt.size.input', '',
html.length);
+ stats.timing('html2wt.init', '',
+ Date.now() -
startTimers.get('html2wt.init'));
+ }
+
+ return {
+ env: env,
+ res: res,
+ doc: doc,
+ startTimers: startTimers,
+ };
+ })().then(function(ret) {
if (opts.original) {
var dp = opts.original['data-parsoid'];
// This is optional to support
serializing html with inlined
@@ -491,7 +682,28 @@
// For now, ignoring this.
return ret;
- }).then(apiUtils.endHtml2wt).then(function(output) {
+ }).then(function(ret) {
+ var stats = env.conf.parsoid.stats;
+ // var REQ_TIMEOUT =
env.conf.parsoid.timeouts.request;
+
+ // As per
https://www.mediawiki.org/wiki/Parsoid/API#v1_API_entry_points
+ // "Both it and the oldid parameter are
needed for
+ // clean round-tripping of HTML retrieved
earlier with"
+ // So, no oldid => no selser
+ var hasOldId = (env.page.id && env.page.id !==
'0');
+ var useSelser = hasOldId &&
env.conf.parsoid.useSelser;
+ return DU.serializeDOM(env, ret.doc.body,
useSelser)
+ // .timeout(REQ_TIMEOUT)
+ .then(function(output) {
+ if (stats) {
+ stats.timing('html2wt.total',
'',
+ Date.now() -
ret.startTimers.get('html2wt.total'));
+
stats.timing('html2wt.size.output', '', output.length);
+ }
+ apiUtils.logTime(env, ret.res,
'serializing');
+ return output;
+ });
+ }).then(function(output) {
apiUtils.setHeader(res, env, 'content-type',
apiUtils.wikitextContentType(env));
apiUtils.sendResponse(res, env, output);
});
@@ -499,7 +711,6 @@
.catch(apiUtils.timeoutResp.bind(null, env));
}
};
-
return routes;
};
--
To view, visit https://gerrit.wikimedia.org/r/283672
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I98c80945cd94c76f425b6f013612227fd95657c9
Gerrit-PatchSet: 8
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits