jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/370295 )
Change subject: Add read-base-html route
......................................................................
Add read-base-html route
This new route is currently similar to read-html, except that it is
not optimized for payload size.In the future read-html will request
read-base-html and then run transformations to reduce the payload
and to prepare consumption by app clients.
Bug: T162179
Change-Id: I3752626fd9ff86dec7c57a8ffc10e95139772494
---
M lib/parsoid-access.js
M routes/read-html.js
A test/features/read-html/pagecontent-base.js
3 files changed, 57 insertions(+), 8 deletions(-)
Approvals:
Gergő Tisza: Looks good to me, approved
jenkins-bot: Verified
Mholloway: Looks good to me, but someone else must approve
diff --git a/lib/parsoid-access.js b/lib/parsoid-access.js
index c4dc9c9..df44853 100644
--- a/lib/parsoid-access.js
+++ b/lib/parsoid-access.js
@@ -131,19 +131,21 @@
/**
* @param {!Object} app the application object
* @param {!Object} req the request object
- * @param {?Boolean} [legacy] if enabled will apply additional transformations
- * including a legacy version of relocation of first paragraph
- * and hiding IPA via an inline style rather than clas.
+ * @param {?Boolean} [optimized] if true will apply additional transformations
+ * to reduce the payload
* @return {!promise} Returns a promise to retrieve the page content from
Parsoid
*/
-function pageHtmlPromise(app, req, legacy) {
+function pageHtmlPromise(app, req, optimized) {
return getParsoidHtml(app, req)
.then((response) => {
const meta = { revision: getRevisionFromEtag(response.headers) };
const doc = domino.createDocument(response.body);
- transforms.stripReferenceListContent(doc);
- transforms.stripUnneededMarkup(doc, legacy);
+ if (optimized) {
+ transforms.stripReferenceListContent(doc);
+ transforms.stripUnneededMarkup(doc, false);
+ }
+
parsoidSections.addSectionDivs(doc);
const html = doc.outerHTML;
diff --git a/routes/read-html.js b/routes/read-html.js
index d39f5ef..e7db2ca 100644
--- a/routes/read-html.js
+++ b/routes/read-html.js
@@ -15,11 +15,11 @@
let app;
/**
- * GET {domain}/v1/page/read-html/{title}/{revision?}/{tid?}
+ * GET {domain}/v1/page/read-base-html/{title}/{revision?}/{tid?}
* Gets page content in HTML. This is based on Parsoid with some minor
modifications more
* suitable for the reading use cases.
*/
-router.get('/read-html/:title/:revision?/:tid?', (req, res) => {
+router.get('/read-base-html/:title/:revision?/:tid?', (req, res) => {
return parsoid.pageHtmlPromise(app, req, false)
.then((response) => {
res.status(200);
@@ -29,6 +29,21 @@
});
});
+/**
+ * GET {domain}/v1/page/read-html/{title}/{revision?}/{tid?}
+ * Gets page content in HTML. This is a more optimized for direct consumption
by reading
+ * clients.
+ */
+router.get('/read-html/:title/:revision?/:tid?', (req, res) => {
+ return parsoid.pageHtmlPromise(app, req, true)
+ .then((response) => {
+ res.status(200);
+ mUtil.setContentType(res, mUtil.CONTENT_TYPES.readHtml, 'text/html');
+ mUtil.setETag(res, response.meta.revision);
+ res.send(response.html).end();
+ });
+});
+
module.exports = function(appObj) {
app = appObj;
return {
diff --git a/test/features/read-html/pagecontent-base.js
b/test/features/read-html/pagecontent-base.js
new file mode 100644
index 0000000..b7ae121
--- /dev/null
+++ b/test/features/read-html/pagecontent-base.js
@@ -0,0 +1,32 @@
+'use strict';
+
+const domino = require('domino');
+const preq = require('preq');
+const assert = require('../../utils/assert.js');
+const headers = require('../../utils/headers.js');
+const server = require('../../utils/server.js');
+
+describe('read-html', function() {
+
+ this.timeout(20000); // eslint-disable-line no-invalid-this
+
+ before(() => { return server.start(); });
+
+ const localUri = (title, domain = 'en.wikipedia.org') => {
+ return `${server.config.uri}${domain}/v1/page/read-base-html/${title}`;
+ };
+
+ it('should respond to GET request with expected headers, incl. CORS and
CSP headers', () => {
+ const uri = localUri('Foobar');
+ return headers.checkHeaders(uri, headers.HTML_CONTENT_TYPE_REGEX);
+ });
+
+ it('HTML should be sectioned', () => {
+ const uri = localUri('Foobar/788941783');
+ return preq.get({ uri })
+ .then((res) => {
+ const document = domino.createDocument(res.body);
+ assert.selectorExistsNTimes(document, 'section', 7, 'should have 7
sections');
+ });
+ });
+});
--
To view, visit https://gerrit.wikimedia.org/r/370295
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I3752626fd9ff86dec7c57a8ffc10e95139772494
Gerrit-PatchSet: 23
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: BearND <[email protected]>
Gerrit-Reviewer: Dbrant <[email protected]>
Gerrit-Reviewer: Fjalapeno <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: Gergő Tisza <[email protected]>
Gerrit-Reviewer: Jdlrobson <[email protected]>
Gerrit-Reviewer: Mholloway <[email protected]>
Gerrit-Reviewer: Mhurd <[email protected]>
Gerrit-Reviewer: Mobrovac <[email protected]>
Gerrit-Reviewer: Niedzielski <[email protected]>
Gerrit-Reviewer: Ppchelko <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits