Santhosh has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/383109 )

Change subject: Combine page fetch, segmentation to a new class
......................................................................

Combine page fetch, segmentation to a new class

* Simplify the API end point for page/:language/:title/:revision?
  in v1.js. Router is not supposed to data processing as it does now.
* Move pageloader/PageLoader.js to mw/MWPage class which now does page
  fetch and segmentation call. In follow ups, we will add section
  wrapping call to ths class(T177752).
* The new MWPage class inherits APIRequest. Move the remaining utility
  methods in apiUtil to APIRequest class so that api related methods
  are in single place.

Known issues to be fixed:
* The language parameter passed to the /page api is sometimes domain
  because of the way services is keeping cxserver behind
  xx.wikipedia.org/api. Using our domain template we need to understand
  which one is passed and derive language code from it.

Change-Id: I2ad6e1f183c8fe69c960597a29632989965cce82
---
M app.js
D lib/api-util.js
M lib/mw/ApiRequest.js
A lib/mw/MWPage.js
D lib/pageloader/PageLoader.js
M lib/routes/v1.js
6 files changed, 145 insertions(+), 169 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/09/383109/1

diff --git a/app.js b/app.js
index 907fbe6..1795d04 100644
--- a/app.js
+++ b/app.js
@@ -7,7 +7,6 @@
        bodyParser = require( 'body-parser' ),
        fs = BBPromise.promisifyAll( require( 'fs' ) ),
        sUtil = require( './lib/util' ),
-       apiUtil = require( './lib/api-util' ),
        packageInfo = require( './package.json' ),
        yaml = require( 'js-yaml' );
 
@@ -59,9 +58,6 @@
                        }
                }
        }
-
-       // set up the request templates for the APIs
-       apiUtil.setupApiTemplates( app );
 
        // set up the spec
        if ( !app.conf.spec ) {
diff --git a/lib/api-util.js b/lib/api-util.js
deleted file mode 100644
index 435cd37..0000000
--- a/lib/api-util.js
+++ /dev/null
@@ -1,80 +0,0 @@
-'use strict';
-
-var preq = require( 'preq' ),
-       Template = require( 'swagger-router' ).Template;
-
-/**
- * Calls the REST API with the supplied domain, path and request parameters
- *
- * @param {Object} app the application object
- * @param {string} domain the domain to issue the request for
- * @param {string} path the REST API path to contact without the leading slash
- * @param {Object} [restReq={}] the object containing the REST request details
- * @param {string} [restReq.method=get] the request method
- * @param {Object} [restReq.query={}] the query string to send, if any
- * @param {Object} [restReq.headers={}] the request headers to send
- * @param {Object} [restReq.body=null] the body of the request, if any
- * @return {Promise} a promise resolving as the response object from the REST 
API
- *
- */
-function restApiGet( app, domain, path, restReq ) {
-       var request;
-       restReq = restReq || {};
-       path = path[ 0 ] === '/' ? path.slice( 1 ) : path;
-
-       request = app.restbase_tpl.expand( {
-               request: {
-                       method: restReq.method,
-                       params: {
-                               domain: domain,
-                               path: path
-                       },
-                       query: restReq.query,
-                       headers: Object.assign( {
-                               'user-agent': app.conf.user_agent
-                       }, restReq.headers ),
-                       body: restReq.body
-               }
-       } );
-
-       return preq( request );
-}
-
-/**
- * Sets up the request templates for MW and RESTBase API requests
- *
- * @param {Application} app the application object
- */
-function setupApiTemplates( app ) {
-       /* eslint-disable camelcase */
-       // set up the MW API request template
-       if ( !app.conf.mwapi_req ) {
-               app.conf.mwapi_req = {
-                       uri: 'https://{{domain}}/w/api.php',
-                       headers: {
-                               'user-agent': '{{user-agent}}'
-                       },
-                       query: '{{ default(request.query, {}) }}',
-                       body: '{{request.body}}'
-               };
-       }
-       app.mwapi_tpl = new Template( app.conf.mwapi_req );
-
-       // set up the RESTBase request template
-       if ( !app.conf.restbase_req ) {
-               app.conf.restbase_req = {
-                       method: '{{request.method}}',
-                       uri: 'http://{{domain}}/api/rest_v1/{+path}',
-                       query: '{{ default(request.query, {}) }}',
-                       headers: '{{request.headers}}',
-                       body: '{{request.body}}'
-               };
-       }
-       app.restbase_tpl = new Template( app.conf.restbase_req );
-       /* eslint-enable camelcase */
-}
-
-module.exports = {
-       restApiGet: restApiGet,
-       setupApiTemplates: setupApiTemplates
-};
diff --git a/lib/mw/ApiRequest.js b/lib/mw/ApiRequest.js
index 58c910d..492b5a3 100644
--- a/lib/mw/ApiRequest.js
+++ b/lib/mw/ApiRequest.js
@@ -1,10 +1,10 @@
 'use strict';
 
-const apiUtil = require( '../api-util.js' ),
-       preq = require( 'preq' ),
+const preq = require( 'preq' ),
        cxUtil = require( '../util.js' ),
        Title = require( 'mediawiki-title' ).Title,
        cxutil = require( '../util.js' ),
+       Template = require( 'swagger-router' ).Template,
        languageDomainNameMapping = require( 
'./../language-domain-mapping.json' ),
        HTTPError = cxUtil.HTTPError;
 
@@ -20,7 +20,7 @@
                // Source and target languages
                this.sourceLanguage = config.sourceLanguage;
                this.targetLanguage = config.targetLanguage;
-               apiUtil.setupApiTemplates( config.context );
+               this.setupApiTemplates( config.context );
        }
 
        /**
@@ -132,14 +132,14 @@
        }
 
        htmlToWikiText( html, language ) {
-               var restReq, path, domain = this.getDomain( language );
+               const domain = this.getDomain( language );
 
                if ( cxUtil.isPlainText( html ) ) {
                        // Does not contain HTML elements. Save api call.
                        return Promise.resolve( html );
                }
 
-               restReq = {
+               const restReq = {
                        method: 'post',
                        body: { html },
                        headers: {
@@ -147,21 +147,21 @@
                                accept: 'text/html; charset=utf-8; 
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0";'
                        }
                };
-               path = '/transform/html/to/wikitext';
+               const path = '/transform/html/to/wikitext';
 
-               return apiUtil.restApiGet( this.context, domain, path, restReq )
+               return this.restApiGet( domain, path, restReq )
                        .then( ( response ) => response.body );
        }
 
        wikitextToHTML( wikitext, language ) {
-               var restReq, path, domain = this.getDomain( language );
+               const domain = this.getDomain( language );
 
                if ( !wikitext || !wikitext.trim() ) {
                        // Save api call.
                        return Promise.resolve( wikitext );
                }
 
-               restReq = {
+               const restReq = {
                        method: 'post',
                        body: {
                                wikitext,
@@ -173,11 +173,79 @@
                                accept: 'text/html; charset=utf-8; 
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0";'
                        }
                };
-               path = '/transform/wikitext/to/html';
+               const path = '/transform/wikitext/to/html';
 
-               return apiUtil.restApiGet( this.context, domain, path, restReq )
+               return this.restApiGet( domain, path, restReq )
                        .then( ( response ) => response.body );
        }
+
+       /**
+        * Sets up the request templates for MW and RESTBase API requests
+        *
+        * @param {Application} app the application object
+        */
+       setupApiTemplates( app ) {
+               /* eslint-disable camelcase */
+               // set up the MW API request template
+               if ( !this.context.conf.mwapi_req ) {
+                       this.context.conf.mwapi_req = {
+                               uri: 'https://{{domain}}/w/api.php',
+                               headers: {
+                                       'user-agent': '{{user-agent}}'
+                               },
+                               query: '{{ default(request.query, {}) }}',
+                               body: '{{request.body}}'
+                       };
+               }
+               this.context.mwapi_tpl = new Template( 
this.context.conf.mwapi_req );
+
+               // set up the RESTBase request template
+               if ( !app.conf.restbase_req ) {
+                       app.conf.restbase_req = {
+                               method: '{{request.method}}',
+                               uri: 'http://{{domain}}/api/rest_v1/{+path}',
+                               query: '{{ default(request.query, {}) }}',
+                               headers: '{{request.headers}}',
+                               body: '{{request.body}}'
+                       };
+               }
+               app.restbase_tpl = new Template( this.context.conf.restbase_req 
);
+               /* eslint-enable camelcase */
+       }
+
+       /**
+        * Calls the REST API with the supplied domain, path and request 
parameters
+        *
+        * @param {string} domain the domain to issue the request for
+        * @param {string} path the REST API path to contact without the 
leading slash
+        * @param {Object} [restReq={}] the object containing the REST request 
details
+        * @param {string} [restReq.method=get] the request method
+        * @param {Object} [restReq.query={}] the query string to send, if any
+        * @param {Object} [restReq.headers={}] the request headers to send
+        * @param {Object} [restReq.body=null] the body of the request, if any
+        * @return {Promise} a promise resolving as the response object from 
the REST API
+        *
+        */
+       restApiGet( domain, path, restReq ) {
+               restReq = restReq || {};
+               path = path[ 0 ] === '/' ? path.slice( 1 ) : path;
+
+               const request = this.context.restbase_tpl.expand( {
+                       request: {
+                               method: restReq.method,
+                               params: {
+                                       domain,
+                                       path
+                               },
+                               query: restReq.query,
+                               headers: Object.assign( {
+                                       'user-agent': 
this.context.conf.user_agent
+                               }, restReq.headers ),
+                               body: restReq.body
+                       }
+               } );
+               return preq( request );
+       }
 }
 
 /**
diff --git a/lib/mw/MWPage.js b/lib/mw/MWPage.js
new file mode 100644
index 0000000..73b8287
--- /dev/null
+++ b/lib/mw/MWPage.js
@@ -0,0 +1,57 @@
+'use strict';
+
+const ApiRequest = require( '../mw/ApiRequest' ),
+       CXSegmenter = require( '../segmentation/CXSegmenter' );
+
+class MWPage extends ApiRequest {
+       getPage( page, revision ) {
+               return this.fetch( page, revision ).then( ( response ) => {
+                       let segmentedDoc = this.segment( response.body );
+                       // TODO: segmentedDoc.wrapSections();
+                       return {
+                               content: segmentedDoc.getHtml(),
+                               revision: response.revision
+                       };
+               } );
+       }
+
+       /**
+        * @param {string} page The page title
+        * @param {string} revision The revision id
+        * @return {Promise}
+        */
+       fetch( page, revision ) {
+               let path = 'page/html/' + encodeURIComponent( page );
+               const domain = this.getDomain( this.sourceLanguage );
+
+               if ( revision ) {
+                       path += '/' + revision;
+               }
+
+               const restReq = {
+                       method: 'get',
+                       headers: {
+                               // See 
https://www.mediawiki.org/wiki/Specs/HTML/1.5.0
+                               accept: 'text/html; charset=utf-8; 
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0";'
+                       }
+               };
+               return this.restApiGet( domain, path, restReq ).then( ( 
response ) => {
+                       return {
+                               body: response.body,
+                               // Restbase returns revision ID in etag  header.
+                               // Example:
+                               //     ETag: 
"123456/c4e494da-ee8f-11e4-83a1-8b80de1cde5f"
+                               revision: response.headers.etag.split( '/' )[ 0 
].replace( '"', '' )
+                       };
+               } );
+       }
+
+       segment( pageContent ) {
+               const segmenter = new CXSegmenter( pageContent, 
this.sourceLanguage );
+               segmenter.segment();
+               return segmenter.getSegmentedDoc();
+       }
+
+}
+
+module.exports = MWPage;
diff --git a/lib/pageloader/PageLoader.js b/lib/pageloader/PageLoader.js
deleted file mode 100644
index b1c8256..0000000
--- a/lib/pageloader/PageLoader.js
+++ /dev/null
@@ -1,51 +0,0 @@
-'use strict';
-
-const apiUtil = require( '../api-util' );
-const ApiRequest = require( '../mw/ApiRequest' );
-
-function PageLoader( app ) {
-       this.app = app;
-       this.log = app.logger.log || function () {};
-}
-
-/**
- * @param {string} page The page title
- * @param {string} source The language code or the domain of the wiki
- * @param {string} revision The revision id
- * @return {Promise}
- */
-PageLoader.prototype.load = function ( page, source, revision ) {
-       var path, domain, restReq;
-
-       path = 'page/html/' + encodeURIComponent( page );
-       if ( /.+\.org$/.test( source ) ) {
-               // We got an actual domain
-               domain = source;
-       } else {
-               domain = new ApiRequest( { context: this.app } ).getDomain( 
source );
-       }
-
-       if ( revision ) {
-               path += '/' + revision;
-       }
-
-       restReq = {
-               method: 'get',
-               headers: {
-                       // See https://www.mediawiki.org/wiki/Specs/HTML/1.5.0
-                       accept: 'text/html; charset=utf-8; 
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0";'
-               }
-       };
-
-       return apiUtil.restApiGet( this.app, domain, path, restReq ).then( 
function ( response ) {
-               return {
-                       body: response.body,
-                       // Restbase returns revision ID in etag  header.
-                       // Example:
-                       //     ETag: 
"123456/c4e494da-ee8f-11e4-83a1-8b80de1cde5f"
-                       revision: response.headers.etag.split( '/' )[ 0 
].replace( '"', '' )
-               };
-       } );
-};
-
-module.exports = PageLoader;
diff --git a/lib/routes/v1.js b/lib/routes/v1.js
index 8ba2cf7..405f7f4 100644
--- a/lib/routes/v1.js
+++ b/lib/routes/v1.js
@@ -58,39 +58,25 @@
                var sourceLanguage = req.params.language,
                        title = req.params.title,
                        revision = req.params.revision,
-                       CXSegmenter = require( __dirname + 
'/../segmentation/CXSegmenter.js' ),
-                       PageLoader = require( __dirname + 
'/../pageloader/PageLoader.js' ),
-                       pageloader = new PageLoader( this.app );
+                       MWPage = require( __dirname + '/../mw/MWPage' ),
+                       page = new MWPage( {
+                               context: this.app,
+                               sourceLanguage
+                       } );
 
-               return pageloader.load( title, sourceLanguage, revision ).then(
+               return page.getPage( title, revision ).then(
                        ( response ) => {
-                               var segmenter, segmentedContent;
-
-                               try {
-                                       this.app.logger.log( 'debug', 'Fetch 
page', {
-                                               title: title,
-                                               sourceLanguage: sourceLanguage
-                                       } );
-                                       segmenter = new CXSegmenter( 
response.body, sourceLanguage );
-                                       segmenter.segment();
-                                       segmentedContent = 
segmenter.getSegmentedContent();
-                                       this.app.logger.log( 'debug', 'Segment 
page', {
-                                               title: title,
-                                               sourceLanguage: sourceLanguage
-                                       } );
-                               } catch ( error ) {
-                                       res.status( 500 ).end( `Page 
${sourceLanguage} : ${title} could not be fetched or segmented: ` + 
error.toString() );
-                               }
                                res.send( {
                                        sourceLanguage: sourceLanguage,
                                        title: title,
                                        revision: response.revision,
-                                       segmentedContent: segmentedContent
+                                       segmentedContent: response.content
                                } );
                                this.app.logger.log( 'debug', 'Page sent' );
                        },
                        ( error ) => {
-                               res.status( 404 ).end( 'Page ' + sourceLanguage 
+ ':' + title + ' could not be found. ' + error.toString() );
+                               res.status( 404 )
+                                       .end( `Page ${sourceLanguage}:${title} 
could not be found. ` + error.toString() );
                        }
                );
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/383109
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I2ad6e1f183c8fe69c960597a29632989965cce82
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to