Jdlrobson has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/364896 )

Change subject: POC: Use mobile content service instead of TextExtracts to 
generate summaries
......................................................................

POC: Use mobile content service instead of TextExtracts to generate summaries

This shows how easy it would be by reusing existing transformations
to replicate the work of TextExtracts inside MCS.

Some examples of output are given here:
https://docs.google.com/document/d/1429x9EBdr0V0MTkit74-5rtRZ8pvyQc9wzw3cfo3nzM/edit?usp=sharing

These already render better than the existing TextExtracts examples.

Bug: T113094
Change-Id: I5a0bb75900452131788965f12e7d6029449ac6b8
---
A lib/transformations/flattenElements.js
M lib/transformations/hideRedLinks.js
M lib/transforms.js
M routes/mobile-sections.js
4 files changed, 83 insertions(+), 13 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/mobileapps 
refs/changes/96/364896/1

diff --git a/lib/transformations/flattenElements.js 
b/lib/transformations/flattenElements.js
new file mode 100644
index 0000000..d8dfb09
--- /dev/null
+++ b/lib/transformations/flattenElements.js
@@ -0,0 +1,30 @@
+/**
+ * DOM transformation shared with app. Let's keep this in sync with the app.
+ * Last sync: Android repo 3d5b441 www/js/transforms/hideRedLinks.js
+ *
+ * The main change from the original Android app file is to use
+ * content.createElement() instead of document.createElement().
+ */
+
+'use strict';
+
+/**
+ * Replace all elements in Document `content` that match the
+ * css selector, replacing them with span tags.
+ * @param {!Document} content
+ * @param {?string} selector
+ */
+function flattenElements(content, selector) {
+    const elements = content.querySelectorAll(selector || 'a');
+    for (let i = 0; i < elements.length; i++) {
+        const element = elements[i];
+        const replacementSpan = content.createElement('span');
+        replacementSpan.innerHTML = element.innerHTML;
+        if (element.getAttribute('class')) {
+          replacementSpan.setAttribute('class', element.getAttribute('class'));
+        }
+        element.parentNode.replaceChild(replacementSpan, element);
+    }
+}
+
+module.exports = flattenElements;
diff --git a/lib/transformations/hideRedLinks.js 
b/lib/transformations/hideRedLinks.js
index b0355d6..b910196 100644
--- a/lib/transformations/hideRedLinks.js
+++ b/lib/transformations/hideRedLinks.js
@@ -7,16 +7,10 @@
  */
 
 'use strict';
+const flattenElements = require('./flattenElements');
 
 function hideRedLinks(content) {
-    const redLinks = content.querySelectorAll('a.new');
-    for (let i = 0; i < redLinks.length; i++) {
-        const redLink = redLinks[i];
-        const replacementSpan = content.createElement('span');
-        replacementSpan.innerHTML = redLink.innerHTML;
-        replacementSpan.setAttribute('class', redLink.getAttribute('class'));
-        redLink.parentNode.replaceChild(replacementSpan, redLink);
-    }
+    flattenElements(content, 'a.new');
 }
 
 module.exports = {
diff --git a/lib/transforms.js b/lib/transforms.js
index 23d0248..261ef35 100644
--- a/lib/transforms.js
+++ b/lib/transforms.js
@@ -15,6 +15,7 @@
 const extractInfobox = require('./transformations/extractInfobox');
 const extractPageIssues = require('./transformations/extractPageIssues');
 const extractLeadIntroduction = 
require('./transformations/extractLeadIntroduction');
+const flattenElements = require('./transformations/flattenElements');
 
 const transforms = {};
 
@@ -341,5 +342,6 @@
 transforms.extractInfobox = extractInfobox;
 transforms.extractPageIssues = extractPageIssues;
 transforms.extractLeadIntroduction = extractLeadIntroduction;
+transforms.flattenElements = flattenElements;
 
 module.exports = transforms;
diff --git a/routes/mobile-sections.js b/routes/mobile-sections.js
index 9b8f720..31270a8 100644
--- a/routes/mobile-sections.js
+++ b/routes/mobile-sections.js
@@ -308,14 +308,37 @@
     });
 }
 
-function buildLeadResponse(req, res, legacy) {
+/*
+ * Builds an object which gives structure to the lead of an article
+ * providing access to metadata.
+ * @param {!Request} req
+ * @param {?Boolean} [legacy] when true MCS will
+ *  not apply legacy transformations that we are in the process
+ *  of deprecating.
+ * @return {!BBPromise}
+ */
+function buildLeadObject(req, legacy) {
     return BBPromise.props({
         page: parsoid.pageContentPromise(app, req, legacy),
         meta: pageMetadataPromise(req)
-    }).then((response) => {
-        return handleNamespaceAndSpecialCases(req, response);
-    }).then((response) => {
-        response = buildLead(response, legacy);
+    }).then((interimState) => {
+        return handleNamespaceAndSpecialCases(req, interimState);
+    }).then((lead) => {
+      return buildLead(lead, legacy);
+    });
+}
+
+/*
+ * Responds with the lead content of a page in structured form.
+ * @param {!Request} req
+ * @param {!Response} res
+ * @param {?Boolean} [legacy] when true MCS will
+ *  not apply legacy transformations that we are in the process
+ *  of deprecating.
+ * @return {!BBPromise}
+ */
+function buildLeadResponse(req, res, legacy) {
+    return buildLeadObject(req, legacy).then((response) => {
         res.status(200);
         mUtil.setETag(res, response.revision);
         mUtil.setContentType(res, mUtil.CONTENT_TYPES.mobileSections);
@@ -369,6 +392,27 @@
 });
 
 /**
+* GET {domain}/v1/page/preview-html/{title}
+* Gets a formatted version of a given wiki page rather than a blob of wikitext.
+*/
+router.get('/preview-html/:title', (req, res) => {
+  return buildLeadObject(req, false).then((lead) => {
+    const intro = lead && lead.intro;
+    if ( intro ) {
+        const doc = domino.createDocument(intro);
+        transforms.flattenElements(doc);
+        transforms.rmElementsWithSelector(doc, '.mw-reflink-text');
+        transforms.rmElementsWithSelector(doc, 'math');
+        res.status(200);
+        mUtil.setETag(res, lead.revision);
+        res.send(doc.body.innerHTML).end();
+    } else {
+        res.status(404);
+    }
+  });
+});
+
+/**
 * GET {domain}/v1/page/formatted/{title}/{revision?}
 * Gets a formatted version of a given wiki page rather than a blob of wikitext.
 */

-- 
To view, visit https://gerrit.wikimedia.org/r/364896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5a0bb75900452131788965f12e7d6029449ac6b8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/mobileapps
Gerrit-Branch: master
Gerrit-Owner: Jdlrobson <jrob...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to