jenkins-bot has submitted this change and it was merged.
Change subject: Accept original source without a title in wt2html direction
......................................................................
Accept original source without a title in wt2html direction
* Also, defines an html2wt func instead of having that logic in v3Post.
Change-Id: I4dd92efbf3218f62e347f64c34e0229ed405eeb9
---
M lib/api/routes.js
M tests/mocha/api.js
2 files changed, 144 insertions(+), 105 deletions(-)
Approvals:
Arlolra: Looks good to me, approved
Cscott: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/lib/api/routes.js b/lib/api/routes.js
index 5d91d2a..c574c35 100644
--- a/lib/api/routes.js
+++ b/lib/api/routes.js
@@ -583,6 +583,105 @@
.catch(apiUtils.timeoutResp.bind(null, env));
};
+ var html2wt = function(req, res, html) {
+ var env = res.locals.env;
+ var opts = res.locals.opts;
+
+ if (opts.original && opts.original.wikitext) {
+ env.setPageSrcInfo(opts.original.wikitext.body);
+ }
+
+ var p = Promise.method(function() {
+ env.bumpSerializerResourceUse('htmlSize', html.length);
+ env.page.id = res.locals.oldid;
+ env.log('info', 'started serializing');
+
+ // Performance Timing options
+ var stats = env.conf.parsoid.stats;
+ var startTimers;
+
+ if (stats) {
+ startTimers = new Map();
+ startTimers.set('html2wt.init', Date.now());
+ startTimers.set('html2wt.total', Date.now());
+ startTimers.set('html2wt.init.domparse',
Date.now());
+ }
+
+ var doc = DU.parseHTML(html);
+
+ // send domparse time, input size and init time to
statsd/Graphite
+ // init time is the time elapsed before serialization
+ // init.domParse, a component of init time, is the time
elapsed
+ // from html string to DOM tree
+ if (stats) {
+ stats.timing('html2wt.init.domparse', '',
+ Date.now() -
startTimers.get('html2wt.init.domparse'));
+ stats.timing('html2wt.size.input', '',
html.length);
+ stats.timing('html2wt.init', '',
+ Date.now() -
startTimers.get('html2wt.init'));
+ }
+
+ return {
+ env: env,
+ res: res,
+ doc: doc,
+ startTimers: startTimers,
+ };
+ })().then(function(ret) {
+ if (opts.original) {
+ var dp = opts.original['data-parsoid'];
+ // This is optional to support serializing html
with inlined
+ // data-parsoid.
+ if (dp) {
+ apiUtils.validateDp(opts.original);
+ DU.applyDataParsoid(ret.doc, dp.body);
+ env.page.dpContentType = (dp.headers ||
{})['content-type'];
+ }
+ if (opts.original.html) {
+ env.page.dom =
DU.parseHTML(opts.original.html.body).body;
+ // However, if we're given stored html,
data-parsoid
+ // should be provided as well. We have
no use case for
+ // stored inlined dp anymore.
+ apiUtils.validateDp(opts.original);
+
DU.applyDataParsoid(env.page.dom.ownerDocument, dp.body);
+ env.page.htmlContentType =
(opts.original.html.headers || {})['content-type'];
+ }
+ }
+
+ // SSS FIXME: As a fallback, lookup the content types
+ // in the <head> of ret.doc and/or env.page.dom
+ // For now, ignoring this.
+
+ return ret;
+ }).then(function(ret) {
+ var stats = env.conf.parsoid.stats;
+ // var REQ_TIMEOUT = env.conf.parsoid.timeouts.request;
+
+ // As per
https://www.mediawiki.org/wiki/Parsoid/API#v1_API_entry_points
+ // "Both it and the oldid parameter are needed for
+ // clean round-tripping of HTML retrieved earlier
with"
+ // So, no oldid => no selser
+ var hasOldId = (env.page.id && env.page.id !== '0');
+ var useSelser = hasOldId && env.conf.parsoid.useSelser;
+ return DU.serializeDOM(env, ret.doc.body, useSelser)
+ // .timeout(REQ_TIMEOUT)
+ .then(function(output) {
+ if (stats) {
+ stats.timing('html2wt.total', '',
+ Date.now() -
ret.startTimers.get('html2wt.total'));
+ stats.timing('html2wt.size.output', '',
output.length);
+ }
+ apiUtils.logTime(env, ret.res, 'serializing');
+ return output;
+ });
+ }).then(function(output) {
+ apiUtils.setHeader(res, env, 'content-type',
apiUtils.wikitextContentType(env));
+ apiUtils.sendResponse(res, env, output);
+ });
+ return apiUtils.cpuTimeout(p, res)
+ .catch(apiUtils.timeoutResp.bind(null, env));
+ };
+
// GET requests
routes.v3Get = function(req, res) {
return wt2html(req, res);
@@ -595,16 +694,17 @@
if (wt2htmlFormats.has(opts.format)) {
// Accept wikitext as a string or object{body,headers}
- var wikitext = (opts.wikitext && typeof opts.wikitext
!== 'string') ?
- opts.wikitext.body : opts.wikitext;
- if (typeof wikitext !== 'string') {
- if (res.locals.titleMissing) {
- return apiUtils.fatalRequest(env, 'No
title or wikitext was provided.', 400);
- }
- // We've been given source for this page
- if (opts.original && opts.original.wikitext) {
- wikitext = opts.original.wikitext.body;
- }
+ var wikitext = opts.wikitext;
+ if (typeof wikitext !== 'string' && opts.wikitext) {
+ wikitext = opts.wikitext.body;
+ }
+ // We've been given source for this page
+ if (typeof wikitext !== 'string' && opts.original &&
opts.original.wikitext) {
+ wikitext = opts.original.wikitext.body;
+ }
+ // Abort if no wikitext or title.
+ if (typeof wikitext !== 'string' &&
res.locals.titleMissing) {
+ return apiUtils.fatalRequest(env, 'No title or
wikitext was provided.', 400);
}
return wt2html(req, res, wikitext);
} else {
@@ -616,99 +716,7 @@
var html = (typeof opts.html === 'string') ?
opts.html : (opts.html.body || '');
- if (opts.original && opts.original.wikitext) {
- env.setPageSrcInfo(opts.original.wikitext.body);
- }
-
- var p = Promise.method(function() {
- env.bumpSerializerResourceUse('htmlSize',
html.length);
- env.page.id = res.locals.oldid;
- env.log('info', 'started serializing');
-
- // Performance Timing options
- var stats = env.conf.parsoid.stats;
- var startTimers;
-
- if (stats) {
- startTimers = new Map();
- startTimers.set('html2wt.init',
Date.now());
- startTimers.set('html2wt.total',
Date.now());
-
startTimers.set('html2wt.init.domparse', Date.now());
- }
-
- var doc = DU.parseHTML(html);
-
- // send domparse time, input size and init time
to statsd/Graphite
- // init time is the time elapsed before
serialization
- // init.domParse, a component of init time, is
the time elapsed
- // from html string to DOM tree
- if (stats) {
- stats.timing('html2wt.init.domparse',
'',
- Date.now() -
startTimers.get('html2wt.init.domparse'));
- stats.timing('html2wt.size.input', '',
html.length);
- stats.timing('html2wt.init', '',
- Date.now() -
startTimers.get('html2wt.init'));
- }
-
- return {
- env: env,
- res: res,
- doc: doc,
- startTimers: startTimers,
- };
- })().then(function(ret) {
- if (opts.original) {
- var dp = opts.original['data-parsoid'];
- // This is optional to support
serializing html with inlined
- // data-parsoid.
- if (dp) {
-
apiUtils.validateDp(opts.original);
- DU.applyDataParsoid(ret.doc,
dp.body);
- env.page.dpContentType =
(dp.headers || {})['content-type'];
- }
- if (opts.original.html) {
- env.page.dom =
DU.parseHTML(opts.original.html.body).body;
- // However, if we're given
stored html, data-parsoid
- // should be provided as well.
We have no use case for
- // stored inlined dp anymore.
-
apiUtils.validateDp(opts.original);
-
DU.applyDataParsoid(env.page.dom.ownerDocument, dp.body);
- env.page.htmlContentType =
(opts.original.html.headers || {})['content-type'];
- }
- }
-
- // SSS FIXME: As a fallback, lookup the content
types
- // in the <head> of ret.doc and/or env.page.dom
- // For now, ignoring this.
-
- return ret;
- }).then(function(ret) {
- var stats = env.conf.parsoid.stats;
- // var REQ_TIMEOUT =
env.conf.parsoid.timeouts.request;
-
- // As per
https://www.mediawiki.org/wiki/Parsoid/API#v1_API_entry_points
- // "Both it and the oldid parameter are
needed for
- // clean round-tripping of HTML retrieved
earlier with"
- // So, no oldid => no selser
- var hasOldId = (env.page.id && env.page.id !==
'0');
- var useSelser = hasOldId &&
env.conf.parsoid.useSelser;
- return DU.serializeDOM(env, ret.doc.body,
useSelser)
- // .timeout(REQ_TIMEOUT)
- .then(function(output) {
- if (stats) {
- stats.timing('html2wt.total',
'',
- Date.now() -
ret.startTimers.get('html2wt.total'));
-
stats.timing('html2wt.size.output', '', output.length);
- }
- apiUtils.logTime(env, ret.res,
'serializing');
- return output;
- });
- }).then(function(output) {
- apiUtils.setHeader(res, env, 'content-type',
apiUtils.wikitextContentType(env));
- apiUtils.sendResponse(res, env, output);
- });
- return apiUtils.cpuTimeout(p, res)
- .catch(apiUtils.timeoutResp.bind(null, env));
+ return html2wt(req, res, html);
}
};
diff --git a/tests/mocha/api.js b/tests/mocha/api.js
index 4719a7c..b4be014 100644
--- a/tests/mocha/api.js
+++ b/tests/mocha/api.js
@@ -304,7 +304,7 @@
title: 'Main_Page',
},
})
- .expect(302) // no revid provided
+ .expect(302) // no revid or wikitext source provided
.expect(function(res) {
res.headers.should.have.property('location');
res.headers.location.should.equal('/' +
mockDomain + '/v3/page/html/Main_Page/1');
@@ -320,7 +320,7 @@
title: 'Main_Page',
},
})
- .expect(302) // no revid provided
+ .expect(302) // no revid or wikitext source provided
.expect(function(res) {
res.headers.should.have.property('location');
res.headers.location.should.equal('/' +
mockDomain + '/v3/page/pagebundle/Main_Page/1');
@@ -362,6 +362,18 @@
.end(done);
});
+ it('should not require a rev id when wikitext and a title is
provided', function(done) {
+ request(api)
+ .post(mockDomain +
'/v3/transform/wikitext/to/html/Main_Page')
+ .send({
+ wikitext: "== h2 ==",
+ })
+ .expect(validHtmlResponse(function(doc) {
+ doc.body.firstChild.nodeName.should.equal('H2');
+ }))
+ .end(done);
+ });
+
it('should accept the wikitext source as original data',
function(done) {
request(api)
.post(mockDomain +
'/v3/transform/wikitext/to/html/Main_Page/1')
@@ -381,6 +393,25 @@
.end(done);
});
+ it('should accept the wikitext source as original without a
title or revision', function(done) {
+ request(api)
+ .post(mockDomain + '/v3/transform/wikitext/to/html/')
+ .send({
+ original: {
+ wikitext: {
+ headers: {
+ 'content-type':
'text/plain;profile="https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0"',
+ },
+ body: "== h2 ==",
+ },
+ },
+ })
+ .expect(validHtmlResponse(function(doc) {
+ doc.body.firstChild.nodeName.should.equal('H2');
+ }))
+ .end(done);
+ });
+
it("should respect body parameter in wikitext->html
(body_only)", function(done) {
request(api)
.post(mockDomain + '/v3/transform/wikitext/to/html/')
--
To view, visit https://gerrit.wikimedia.org/r/284341
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4dd92efbf3218f62e347f64c34e0229ed405eeb9
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: Arlolra <[email protected]>
Gerrit-Reviewer: Cscott <[email protected]>
Gerrit-Reviewer: Subramanya Sastry <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits