jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/366263 )

Change subject: MediaWiki api request interface with batching
......................................................................


MediaWiki api request interface with batching

* Batched mw api request interface for titles. We will need
  this for namespaces, images, categories, templates etc. This is
  an adaptation of ApiRequestCache in VE to nodejs and es6.
* MediaWiki title normalization, language code to domain name mapping
  included.
* MWLink now adapts links based on this interface.
* Tests for title adaptation and link adaptations
* ES6 classes are liberally used since minimum nodejs dependency is v6.9.0

Bug: T170674
Change-Id: I26354761ab2db816801b8247f81799989fa71184
---
M .eslintrc.json
M lib/api-util.js
A lib/language-domain-mapping.json
M lib/lineardoc/TextBlock.js
A lib/mw/APIRequestManager.js
A lib/mw/ApiRequest.js
A lib/mw/BatchedAPIRequest.js
A lib/mw/TitlePairRequest.js
M lib/translationunits/MWLink.js
M lib/translationunits/TranslationUnit.js
M lib/util.js
M package.json
M test/adaptation/AdaptationTest.js
M test/adaptation/AdaptationTests.json
M test/index.js
A test/mw/TitlePairRequest.test.js
16 files changed, 613 insertions(+), 115 deletions(-)

Approvals:
  Catrope: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/.eslintrc.json b/.eslintrc.json
index dbb2dc5..12375f6 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -5,7 +5,8 @@
                "browser": true,
                "jquery": true,
                "qunit": true,
-               "node": true
+               "node": true,
+               "mocha": true
        },
        "globals": {
                "require": false
diff --git a/lib/api-util.js b/lib/api-util.js
index 68183f6..840ce2f 100644
--- a/lib/api-util.js
+++ b/lib/api-util.js
@@ -1,48 +1,7 @@
 'use strict';
 
 var preq = require( 'preq' ),
-       sUtil = require( './util' ),
-       Template = require( 'swagger-router' ).Template,
-       HTTPError = sUtil.HTTPError;
-
-/**
- * Calls the MW API with the supplied query as its body
- *
- * @param {Object} app the application object
- * @param {string} domain the domain to issue the request to
- * @param {Object} query an object with all the query parameters for the MW API
- * @return {Promise} a promise resolving as the response object from the MW API
- */
-function mwApiGet( app, domain, query ) {
-       var request;
-       query = query || {};
-       query.continue = query.continue || '';
-
-       request = app.mwapi_tpl.expand( {
-               request: {
-                       params: {
-                               domain: domain
-                       },
-                       headers: {
-                               'user-agent': app.conf.user_agent
-                       },
-                       query: query
-               }
-       } );
-
-       return preq( request ).then( function ( response ) {
-               if ( response.status < 200 || response.status > 399 ) {
-                       // there was an error when calling the upstream 
service, propagate that
-                       throw new HTTPError( {
-                               status: response.status,
-                               type: 'api_error',
-                               title: 'MW API error',
-                               detail: response.body
-                       } );
-               }
-               return response;
-       } );
-}
+       Template = require( 'swagger-router' ).Template;
 
 /**
  * Calls the REST API with the supplied domain, path and request parameters
@@ -91,11 +50,12 @@
        // set up the MW API request template
        if ( !app.conf.mwapi_req ) {
                app.conf.mwapi_req = {
-                       uri: 'http://{{domain}}/w/api.php',
+                       uri: 'https://{{domain}}/w/api.php',
                        headers: {
                                'user-agent': '{{user-agent}}'
                        },
-                       body: '{{ default(request.query, {}) }}'
+                       query: '{{ default(request.query, {}) }}',
+                       body: '{{request.body}}'
                };
        }
        app.mwapi_tpl = new Template( app.conf.mwapi_req );
@@ -115,7 +75,6 @@
 }
 
 module.exports = {
-       mwApiGet: mwApiGet,
        restApiGet: restApiGet,
        setupApiTemplates: setupApiTemplates
 };
diff --git a/lib/language-domain-mapping.json b/lib/language-domain-mapping.json
new file mode 100644
index 0000000..bc90164
--- /dev/null
+++ b/lib/language-domain-mapping.json
@@ -0,0 +1,13 @@
+{
+       "be-tarask": "be-x-old",
+       "bho": "bh",
+       "crh-latn": "crh",
+       "gsw": "als",
+       "lzh": "zh-classical",
+       "nan": "zh-min-nan",
+       "nb": "no",
+       "rup": "roa-rup",
+       "sgs": "bat-smg",
+       "vro": "fiu-vro",
+       "yue": "zh-yue"
+}
diff --git a/lib/lineardoc/TextBlock.js b/lib/lineardoc/TextBlock.js
index 264da1c..3f9ece6 100644
--- a/lib/lineardoc/TextBlock.js
+++ b/lib/lineardoc/TextBlock.js
@@ -1,8 +1,8 @@
 'use strict';
 
 var TextChunk = require( './TextChunk.js' ),
-       Utils = require( './Utils.js' ),
-       cxutil = require( './../util.js' );
+       Utils = require( './Utils.js' );
+
 /**
  * A block of annotated inline text
  *
@@ -378,23 +378,46 @@
        return new TextBlock( allTextChunks );
 };
 
-TextBlock.prototype.adapt = cxutil.async( function* ( getAdapter ) {
-       var i, j, chunk, tags, len, adapter;
-       for ( i = 0, len = this.textChunks.length; i < len; i++ ) {
-               chunk = this.textChunks[ i ];
-               tags = chunk.tags;
-               for ( j = 0; j < tags.length; j++ ) {
-                       adapter = getAdapter( tags[ j ] );
+/**
+ * Adapt a text block.
+ * @param {Function} getAdapter A function that returns an adapter for the 
given node item
+ * @return {Promise} Promise that resolves the adapted TextBlock instance
+ */
+TextBlock.prototype.adapt = function ( getAdapter ) {
+       var textChunkPromises = [];
+
+       // Note that we are not using yield for the better readable code here. 
Yield will pause
+       // the execution till the async call is resolved. For us, while looping 
over these text
+       // chunks and tags, this will create a problem. Adaptations often 
perform asynchrounous API
+       // calls to a MediaWiki instance. If we do API calls for each and every 
item like a link
+       // title, it is inefficient. The API accepts a batched list of titles. 
We do have a  batched
+       // API mechanism in cxserver, but that works by debouncing the incoming 
requests with a
+       // timeout. Pausing execution here will cause that debounce handler to 
be called.
+       // So we avoid that pausing by just using an array of promises.
+       this.textChunks.forEach( ( chunk ) => {
+               var tagPromises = [],
+                       tags = chunk.tags;
+
+               tags.forEach( ( tag ) => {
+                       const adapter = getAdapter( tag );
                        if ( adapter ) {
-                               tags[ j ] = yield adapter.adapt();
+                               tagPromises.push( adapter.adapt() );
                        }
-               }
+               } );
+
+               textChunkPromises.push( Promise.all( tagPromises ) );
+
                if ( chunk.inlineContent && chunk.inlineContent.adapt ) {
-                       chunk.inlineContent = yield chunk.inlineContent.adapt( 
getAdapter );
+                       textChunkPromises.push( ( ( chunk ) => 
chunk.inlineContent.adapt( getAdapter )
+                               .then( ( adaptedInlineContent ) => {
+                                       chunk.inlineContent = 
adaptedInlineContent;
+                               } ) )( chunk )
+                       );
                }
-       }
-       return this;
-} );
+       } );
+
+       return Promise.all( textChunkPromises ).then( () => this );
+};
 
 /**
  * Dump an XML Array version of the linear representation, for debugging
diff --git a/lib/mw/APIRequestManager.js b/lib/mw/APIRequestManager.js
new file mode 100644
index 0000000..1e8f673
--- /dev/null
+++ b/lib/mw/APIRequestManager.js
@@ -0,0 +1,40 @@
+const TitlePairRequest = require( './TitlePairRequest.js' );
+
+class MWAPIRequestManager {
+       constructor( appContext ) {
+               this.context = appContext;
+       }
+
+       /**
+        * Creates a title pair request for a given title between a given 
language pair
+        * @param {string} title Source title for which we want to know the 
target title in the target language
+        * @param {string} sourceLanguage Source language code
+        * @param {string} targetLanguage Target language code
+        * @return {Promise}
+        */
+       titlePairRequest( title, sourceLanguage, targetLanguage ) {
+               let instance;
+
+               if ( !MWAPIRequestManager.titlePairCache[ sourceLanguage ] ) {
+                       MWAPIRequestManager.titlePairCache[ sourceLanguage ] = 
new Map();
+               }
+
+               instance = MWAPIRequestManager.titlePairCache[ sourceLanguage 
][ targetLanguage ];
+               if ( !instance ) {
+                       instance = new TitlePairRequest( { sourceLanguage, 
targetLanguage, context: this.context } );
+                       MWAPIRequestManager.titlePairCache[ sourceLanguage ][ 
targetLanguage ] = instance;
+               }
+
+               return instance.get( title );
+       }
+}
+
+/**
+ * MediaWiki API request manager cache instance. We cache the request manager 
instances for each
+ * source language, target language pair. Theoretically this can grow up to 
300x300 = 9K items.
+ * The cached instances helps to batch the API requests. Also future-ready for 
API response cache.
+ * @type {Map}
+ */
+MWAPIRequestManager.titlePairCache = new Map();
+
+module.exports = MWAPIRequestManager;
diff --git a/lib/mw/ApiRequest.js b/lib/mw/ApiRequest.js
new file mode 100644
index 0000000..845d5d0
--- /dev/null
+++ b/lib/mw/ApiRequest.js
@@ -0,0 +1,150 @@
+const apiUtil = require( '../api-util.js' ),
+       preq = require( 'preq' ),
+       cxUtil = require( '../util.js' ),
+       Title = require( 'mediawiki-title' ).Title,
+       cxutil = require( '../util.js' ),
+       languageDomainNameMapping = require( 
'./../language-domain-mapping.json' ),
+       HTTPError = cxUtil.HTTPError;
+
+class MWApiRequest {
+       /**
+        * @param {Object} config Configuration options
+        * @cfg {Object} context Application context
+        * @cfg {string} sourceLanguage Source language
+        * @cfg {string} targetLanguage target language
+        */
+       constructor( config ) {
+               this.context = config.context;
+               // Source and target languages
+               this.sourceLanguage = config.sourceLanguage;
+               this.targetLanguage = config.targetLanguage;
+               apiUtil.setupApiTemplates( config.context );
+       }
+
+       /**
+        * Calls the MW API with the supplied query as its body
+        *
+        * @param {string} domain the domain to issue the request to
+        * @param {Object} query an object with all the query parameters for 
the MW API
+        * @param {string} method The HTTP method to use - get or post
+        * @return {Promise} a promise resolving as the response object from 
the MW API
+        */
+       mwRequest( domain, query, method ) {
+               var request;
+               query = query || {};
+               query.continue = query.continue || '';
+               query.format = 'json';
+               request = this.context.mwapi_tpl.expand( {
+                       request: {
+                               params: {
+                                       domain: domain,
+                                       origin: '*'
+                               },
+                               headers: {
+                                       'user-agent': 
this.context.conf.user_agent
+                               }
+                       }
+               } );
+               if ( method === 'get' ) {
+                       request.query = query;
+               } else if ( method === 'post' ) {
+                       request.body = query;
+                       request.headers[ 'content-type' ] = 
'application/x-www-form-urlencoded';
+               }
+               return preq[ method ]( request ).then( ( response ) => {
+                       if ( response.status < 200 || response.status > 399 ) {
+                               // there was an error when calling the upstream 
service, propagate that
+                               throw new HTTPError( {
+                                       status: response.status,
+                                       type: 'api_error',
+                                       title: 'MW API error',
+                                       detail: response.body
+                               } );
+                       }
+                       return response.body;
+               } );
+       }
+
+       /**
+        * Calls the MW API with the supplied query as its body
+        *
+        * @param {string} domain the domain to issue the request to
+        * @param {Object} query an object with all the query parameters for 
the MW API
+        * @return {Promise} a promise resolving as the response object from 
the MW API
+        */
+       mwPost( domain, query ) {
+               return this.mwRequest( domain, query, 'post' );
+       }
+
+       /**
+        * Calls the MW API with the supplied query as its body
+        *
+        * @param {string} domain the domain to issue the request to
+        * @param {Object} query an object with all the query parameters for 
the MW API
+        * @return {Promise} a promise resolving as the response object from 
the MW API
+        */
+       mwGet( domain, query ) {
+               return this.mwRequest( domain, query, 'get' );
+       }
+
+       getDomain( language ) {
+               return this.getSiteCode( language ) + '.wikipedia.org';
+       }
+
+       /**
+        * Resolve non-standard wikimedia site codes
+        * @param {string} language Language code
+        * @return {string} Wikipedia site code corresponding to the language 
code.
+        */
+       getSiteCode( language ) {
+               return languageDomainNameMapping[ language ] || language;
+       }
+
+       /**
+        * Fetch the site information for a given language
+        * @param  {string} language
+        * @return {Promise}
+        */
+       getSiteInfo( language ) {
+               var query,
+                       domain = this.getDomain( language );
+               if ( MWApiRequest.siteInfoCache[ domain ] ) {
+                       return MWApiRequest.siteInfoCache[ domain ];
+               }
+
+               query = {
+                       action: 'query',
+                       meta: 'siteinfo',
+                       siprop: 
'general|namespaces|namespacealiases|specialpagealiases',
+                       format: 'json',
+                       formatversion: 2
+               };
+
+               MWApiRequest.siteInfoCache[ domain ] = this.mwGet( domain, 
query )
+                       .then( ( res ) => res.query );
+               return MWApiRequest.siteInfoCache[ domain ];
+       }
+}
+
+/**
+ * Normalize the title of the response
+ *
+ * @param {string} title Title
+ * @param {string} language language
+ * @return {Promise} Promise resolved with the normalized title
+ */
+MWApiRequest.prototype.normalizeTitle = cxutil.async( function* ( title, 
language ) {
+       var titleObj, siteInfo;
+       siteInfo = yield this.getSiteInfo( language );
+       // Remove prefixes like './'
+       title = title.replace( /^\.*\//, '' );
+       titleObj = Title.newFromText( title, siteInfo );
+       if ( !titleObj ) {
+               return title;
+       }
+       return titleObj.getPrefixedDBKey();
+} );
+
+MWApiRequest.siteInfoCache = new Map();
+
+module.exports = MWApiRequest;
diff --git a/lib/mw/BatchedAPIRequest.js b/lib/mw/BatchedAPIRequest.js
new file mode 100644
index 0000000..e66a5fe
--- /dev/null
+++ b/lib/mw/BatchedAPIRequest.js
@@ -0,0 +1,161 @@
+const cxutil = require( '../util.js' ),
+       MWApiRequest = require( './ApiRequest.js' );
+
+/**
+ * MediaWiki API batch queue.
+ *
+ * Used to queue up lists of items centrally to get information about in 
batches of requests.
+ *
+ * @class
+ * @extends OO.EventEmitter
+ * @constructor
+ * @param {Object} config Configuration
+ */
+class BatchedAPIRequest extends MWApiRequest {
+       constructor( config ) {
+               super( config );
+               // Keys are titles, values are promises
+               this.promises = new Map();
+
+               // Array of page titles queued to be looked up
+               this.queue = [];
+               this.dispatchTimer = null;
+       }
+
+       /**
+        * Process each page in the response of an API request
+        *
+        * @abstract
+        * @method
+        * @param {Object} page The page object
+        * @return {Object|undefined} Any relevant info that we want to cache 
and return.
+        */
+       processPage() {
+               throw new Error( 'Not implemented!' );
+       }
+
+       /**
+        * Get an API request promise to deal with a list of titles
+        *
+        * @abstract
+        * @return {Promise}
+        */
+       getRequestPromise() {
+               throw new Error( 'Not implemented!' );
+       }
+
+       /**
+        * Perform any scheduled API requests.
+        *
+        * @private
+        * @fires add
+        */
+       processQueue() {
+               var subqueue, queue, processResult,
+                       batchRequest = this;
+
+               function rejectSubqueue( rejectQueue ) {
+                       var i, len;
+                       for ( i = 0, len = rejectQueue.length; i < len; i++ ) {
+                               batchRequest.promises[ rejectQueue[ i ] 
].reject();
+                       }
+               }
+
+               processResult = cxutil.async( function*( data ) {
+                       var pageid, page, i, processedPage,
+                               pages = ( data.query && data.query.pages ) || 
data.pages,
+                               redirects,
+                               processed = {};
+
+                       redirects = ( data.query && data.query.redirects ) || 
{};
+                       if ( pages ) {
+                               for ( pageid in pages ) {
+                                       page = pages[ pageid ];
+                                       processedPage = 
batchRequest.processPage( page, redirects );
+                                       if ( processedPage !== undefined ) {
+                                               processed[ page.title ] = 
processedPage;
+                                       }
+                                       for ( i in redirects ) {
+                                               // Locate the title in 
redirects, if any.
+                                               if ( redirects[ i ].to === 
page.title ) {
+                                                       processed[ redirects[ i 
].from ] = processedPage;
+                                                       break;
+                                               }
+                                       }
+                               }
+                               yield batchRequest.set( processed );
+                       }
+               } );
+
+               queue = this.queue;
+               this.queue = [];
+               while ( queue.length ) {
+                       subqueue = queue.splice( 0, 50 );
+                       this.getRequestPromise( subqueue )
+                               .then( processResult )
+
+                               // Reject everything in subqueue; this will 
only reject the ones
+                               // that weren't already resolved above, because 
.reject() on an
+                               // already resolved Deferred is a no-op.
+                               .then( rejectSubqueue.bind( null, subqueue ) );
+               }
+       }
+
+       /**
+        * Dispatch the queue for processing when there is gap in the arrival 
of requests,
+        * or when the queue size exceed a given size.
+        */
+       dispatch() {
+               if ( this.queue.length >= 100 ) {
+                       // Process the queue immediately.
+                       this.processQueue();
+               }
+               if ( this.dispatchTimer ) {
+                       clearTimeout( this.dispatchTimer );
+               }
+               this.dispatchTimer = setTimeout( this.processQueue.bind( this 
), 10 );
+       }
+}
+
+/**
+ * Look up data about a title. If the data about this title is already in the 
cache, this
+ * returns an already-resolved promise. Otherwise, it returns a pending 
promise and schedules
+ * a request to retrieve the data.
+ *
+ * @param {string} title Title
+ * @return {Promise} Promise that gets resolved when data is available
+ */
+BatchedAPIRequest.prototype.get = cxutil.async( function* ( title ) {
+       var normalizedTitle;
+       if ( typeof title !== 'string' ) {
+               // Don't bother letting things like undefined or null make it 
all the way through,
+               // just reject them here. Otherwise they'll cause problems or 
exceptions at random
+               // other points in this file.
+               return Promise.reject( 'Invalid title' );
+       }
+       normalizedTitle = yield this.normalizeTitle( title, this.sourceLanguage 
);
+       if ( !Object.prototype.hasOwnProperty.call( this.promises, 
normalizedTitle ) ) {
+               this.promises[ normalizedTitle ] = new cxutil.Deferred();
+               this.queue.push( normalizedTitle );
+               this.dispatch();
+       }
+       return this.promises[ normalizedTitle ];
+} );
+
+/**
+ * Add entries to the cache. Does not overwrite already-set entries.
+ *
+ * @param {Object} entries Object keyed by page title, with the values being 
data objects
+ */
+BatchedAPIRequest.prototype.set = cxutil.async( function* ( entries ) {
+       var normalizedTitle, title;
+       for ( title in entries ) {
+               normalizedTitle = yield this.normalizeTitle( title, 
this.sourceLanguage );
+               if ( !Object.prototype.hasOwnProperty.call( this.promises, 
normalizedTitle ) ) {
+                       this.promises[ normalizedTitle ] = new 
cxutil.Deferred();
+               }
+               this.promises[ normalizedTitle ].resolve( entries[ title ] );
+       }
+} );
+
+module.exports = BatchedAPIRequest;
diff --git a/lib/mw/TitlePairRequest.js b/lib/mw/TitlePairRequest.js
new file mode 100644
index 0000000..349c566
--- /dev/null
+++ b/lib/mw/TitlePairRequest.js
@@ -0,0 +1,46 @@
+/**
+ * ContentTranslation Title pair request
+ *
+ */
+const BatchedAPIRequest = require( './BatchedAPIRequest.js' );
+
+/**
+ * Fetches information about title pairs in batches.
+ *
+ * @class
+ * @extends BatchedAPIRequest
+ * @constructor
+ * @param {Object} config Configuration
+ */
+class TitlePairRequest extends BatchedAPIRequest {
+       constructor( config ) {
+               super( config );
+       }
+
+       processPage( page ) {
+               return {
+                       sourceTitle: page.title,
+                       targetTitle: page.langlinks && page.langlinks[ 0 ] && 
page.langlinks[ 0 ][ '*' ],
+                       missing: page.langlinks && page.langlinks[ 0 ] && 
page.langlinks[ 0 ][ '*' ] === undefined
+               };
+       }
+
+       getRequestPromise( subqueue ) {
+               var domain, query;
+               query = {
+                       action: 'query',
+                       prop: 'langlinks',
+                       lllimit: subqueue.length,
+                       lllang: this.getSiteCode( this.targetLanguage ),
+                       titles: subqueue.join( '|' ),
+                       redirects: true,
+                       'continue': ''
+               };
+               domain = this.getDomain( this.sourceLanguage );
+               // We use POST here because the titles when joined will result 
in a longer query string
+               // that GET requests cannot process sometimes.
+               return this.mwPost( domain, query );
+       }
+}
+
+module.exports = TitlePairRequest;
diff --git a/lib/translationunits/MWLink.js b/lib/translationunits/MWLink.js
index c344e55..b2dff27 100644
--- a/lib/translationunits/MWLink.js
+++ b/lib/translationunits/MWLink.js
@@ -1,42 +1,37 @@
-var util = require( 'util' ),
-       cxutil = require( '../util.js' ),
-       TranslationUnit = require( './TranslationUnit.js' );
+var cxutil = require( '../util.js' ),
+       TranslationUnit = require( './TranslationUnit.js' ),
+       MWAPIRequestManager = require( '../mw/APIRequestManager.js' );
 
-function MWLink( node, sourceLanguage, targetLanguage, context ) {
-       this.node = node;
-       this.sourceLanguage = sourceLanguage;
-       this.targetLanguage = targetLanguage;
-       this.context = context;
+class MWLink extends TranslationUnit {
+       constructor( node, sourceLanguage, targetLanguage, context ) {
+               super( node, sourceLanguage, targetLanguage, context );
+               // Nothing else?
+       }
 }
-
-util.inherits( MWLink, TranslationUnit );
 
 MWLink.name = 'link';
 MWLink.matchTagNames = [ 'a' ];
 MWLink.matchRdfaTypes = [ 'mw:WikiLink' ];
 
 MWLink.prototype.adapt = cxutil.async( function* () {
-       // XXX: Just a marker for now. To be removed
-       this.node.attributes[ 'adapted' ] = 'true';
-       this.node.attributes[ 'href' ] = yield this.findLinkTarget(
-               this.sourceLanguage,
-               this.node.attributes.href,
-               this.targetLanguage
-       );
+       var linkPairInfo;
+
+       linkPairInfo = yield new MWAPIRequestManager( this.context )
+               .titlePairRequest( this.node.attributes.href, 
this.sourceLanguage, this.targetLanguage );
+
+       if ( linkPairInfo.targetTitle ) {
+               // NOTE: This titles we are setting here are not relative 
titles.
+               this.node.attributes[ 'href' ] = linkPairInfo.targetTitle;
+       } else {
+               // TODO: This format is not decided yet. We do need to inform 
client about failed
+               // adaptations somehow.
+               this.node.attributes[ 'data-cx' ] = JSON.stringify( {
+                       adapted: false,
+                       sourceTitle: linkPairInfo.sourceTitle
+               } );
+       }
 
        return this.node;
 } );
-
-/**
- * Find link target for the given source title
- * @param {string} sourceLanguage
- * @param {string} sourceTitle
- * @param {string} targetLanguage
- * @return {Promise}
- */
-MWLink.prototype.findLinkTarget = function ( sourceLanguage, sourceTitle, 
targetLanguage ) {
-       console.log( 'Adapting from ' + sourceLanguage + ' to ' + 
targetLanguage );
-       return Promise.resolve( sourceTitle );
-};
 
 module.exports = MWLink;
diff --git a/lib/translationunits/TranslationUnit.js 
b/lib/translationunits/TranslationUnit.js
index 2fa4423..edec850 100644
--- a/lib/translationunits/TranslationUnit.js
+++ b/lib/translationunits/TranslationUnit.js
@@ -1,14 +1,21 @@
-function TranslationUnit( node, context ) {
-       this.node = node;
-       this.context = context;
+/*
+ * @abstract
+ */
+class TranslationUnit {
+       constructor( node, sourceLanguage, targetLanguage, context ) {
+               this.node = node;
+               this.sourceLanguage = sourceLanguage;
+               this.targetLanguage = targetLanguage;
+               this.context = context;
+       }
+
+       adapt() {
+               return this.node;
+       }
 }
 
 TranslationUnit.name = null;
 TranslationUnit.matchTagNames = null;
 TranslationUnit.matchRdfaTypes = null;
-
-TranslationUnit.prototype.adapt = function() {
-       return this.node;
-};
 
 module.exports = TranslationUnit;
diff --git a/lib/util.js b/lib/util.js
index 2e55577..3196571 100644
--- a/lib/util.js
+++ b/lib/util.js
@@ -310,12 +310,23 @@
        };
 }
 
+function Deferred() {
+       this.promise = new Promise( ( function( resolve, reject ) {
+               this.resolve = resolve;
+               this.reject = reject;
+       } ).bind( this ) );
+
+       this.then = this.promise.then.bind( this.promise );
+       this.catch = this.promise.catch.bind( this.promise );
+}
+
 module.exports = {
-       HTTPError: HTTPError,
-       initAndLogRequest: initAndLogRequest,
-       wrapRouteHandlers: wrapRouteHandlers,
-       setErrorHandler: setErrorHandler,
+       HTTPError,
+       initAndLogRequest,
+       wrapRouteHandlers,
+       setErrorHandler,
        router: createRouter,
-       spawn: spawn,
-       async: async
+       spawn,
+       async,
+       Deferred
 };
diff --git a/package.json b/package.json
index deb7a8f..03ce75d 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
     "domino": "^1.0.25",
     "express": "^4.14.0",
     "js-yaml": "^3.6.1",
+    "mediawiki-title": "^0.6.3",
     "preq": "^0.5.2",
     "service-runner": "^2.2.5",
     "swagger-router": "^0.4.6",
diff --git a/test/adaptation/AdaptationTest.js 
b/test/adaptation/AdaptationTest.js
index 1d8bb17..c6c0500 100644
--- a/test/adaptation/AdaptationTest.js
+++ b/test/adaptation/AdaptationTest.js
@@ -1,7 +1,6 @@
 'use strict';
 
-var fs = require( 'fs' ),
-       assert = require( '../utils/assert.js' ),
+var assert = require( '../utils/assert.js' ),
        server = require( '../utils/server.js' ),
        LinearDoc = require( '../../lib/lineardoc' ),
        async = require( 'async' ),
@@ -16,20 +15,17 @@
 }
 
 describe( 'Adaptation tests', function () {
-       before( function () {
-               return server.start();
-       } );
-
        async.forEach( tests, function ( test ) {
                var expectedResultData, adapter;
 
-               adapter = new Adapter( test.from, test.to, server );
-               adapter.adapt( test.source ).then( function( result ) {
-                       result = normalize( result.getHtml() );
-                       expectedResultData = normalize( test.result );
-                       it( 'should not have any errors when: ' + test.desc, 
function () {
+               adapter = new Adapter( test.from, test.to, server.config );
+               it( 'should not have any errors when: ' + test.desc, function 
() {
+                       return adapter.adapt( test.source ).then( function( 
result ) {
+                               result = normalize( result.getHtml() );
+                               expectedResultData = normalize( test.result );
                                assert.deepEqual( result, expectedResultData, 
test.source + ': ' + test.desc || '' );
                        } );
                } );
        } );
+
 } );
diff --git a/test/adaptation/AdaptationTests.json 
b/test/adaptation/AdaptationTests.json
index acb6e29..a96d619 100644
--- a/test/adaptation/AdaptationTests.json
+++ b/test/adaptation/AdaptationTests.json
@@ -4,6 +4,13 @@
                "from": "en",
                "to": "es",
                "source": "<p><a rel='mw:WikiLink' href='Oxygen'>Oxygen</a> is 
a chemical element with symbol O and <a rel='mw:WikiLink' href='Atomic 
number'>atomic number</a> 8.</p>",
-               "result": "<p><a adapted='true' href='Oxygen' 
rel='mw:WikiLink'>Oxygen</a> is a chemical element with symbol O and <a 
adapted='true' href='Atomic number' rel='mw:WikiLink'>atomic number</a> 8.</p>"
+               "result": "<p><a href='Oxígeno' rel='mw:WikiLink'>Oxygen</a> is 
a chemical element with symbol O and <a href='Número atómico' 
rel='mw:WikiLink'>atomic number</a> 8.</p>"
+       },
+       {
+               "desc": "Link adaptation - Full paragraph and relative links",
+               "from": "en",
+               "to": "es",
+               "source": "<p id='mwBg'><b id='mwBw'>Oxygen</b> is a <a 
rel='mw:WikiLink' href='./Chemical_element' title='Chemical element' 
id='mwCA'>chemical element</a> with symbol<span typeof='mw:Entity' id='mwCQ'> 
</span><b id='mwCg'>O</b> and <a rel='mw:WikiLink' href='./Atomic_number' 
title='Atomic number' id='mwCw'>atomic number</a> 8. It is a member of the <a 
rel='mw:WikiLink' href='./Chalcogen' title='Chalcogen' id='mwDA'>chalcogen</a> 
<a rel='mw:WikiLink' href='./Group_(periodic_table)' title='Group (periodic 
table)' id='mwDQ'>group</a> on the <a rel='mw:WikiLink' href='./Periodic_table' 
title='Periodic table' id='mwDg'>periodic table</a> and is a highly <a 
rel='mw:WikiLink' href='./Chemical_reaction' title='Chemical reaction' 
id='mwDw'>reactive</a> <a rel='mw:WikiLink' href='./Nonmetal' title='Nonmetal' 
id='mwEA'>nonmetal</a> and <a rel='mw:WikiLink' href='./Oxidizing_agent' 
title='Oxidizing agent' id='mwEQ'>oxidizing agent</a> that readily forms <a 
rel='mw:WikiLink' href='./Oxide' title='Oxide' id='mwEg'>oxides</a> with most 
elements as well as other <a rel='mw:WikiLink' href='./Chemical_compound' 
title='Chemical compound' id='mwEw'>compounds</a>. By mass, oxygen is the 
third-<a rel='mw:WikiLink' href='./Abundance_of_the_chemical_elements' 
title='Abundance of the chemical elements' id='mwFA'>most abundant element</a> 
in the universe, after <a rel='mw:WikiLink' href='./Hydrogen' title='Hydrogen' 
id='mwFQ'>hydrogen</a> and <a rel='mw:WikiLink' href='./Helium' title='Helium' 
id='mwFg'>helium</a>. At <a rel='mw:WikiLink' 
href='./Standard_temperature_and_pressure' title='Standard temperature and 
pressure' id='mwFw' class='mw-redirect'>standard temperature and pressure</a>, 
two atoms of the element <a rel='mw:WikiLink' href='./Chemical_bond' 
title='Chemical bond' id='mwGA'>bind</a> to form <a rel='mw:WikiLink' 
href='./Allotropes_of_oxygen#Dioxygen' title='Allotropes of oxygen' 
id='mwGQ'>dioxygen</a>, a colorless and odorless <a rel='mw:WikiLink' 
href='./Diatomic_molecule' title='Diatomic molecule' id='mwGg'>diatomic</a> <a 
rel='mw:WikiLink' href='./Gas' title='Gas' id='mwGw'>gas</a> with the formula. 
This is an important part of the <a rel='mw:WikiLink' 
href='./Atmosphere_of_Earth' title='Atmosphere of Earth' 
id='mwHQ'>atmosphere</a> and diatomic oxygen gas constitutes 20.8% of the <a 
rel='mw:WikiLink' href='./Earth's_atmosphere' title='Earth's atmosphere' 
id='mwHg' class='mw-redirect'>Earth's atmosphere</a>. Additionally, as oxides 
the element makes up almost half of the <a rel='mw:WikiLink' 
href='./Earth's_crust' title='Earth's crust' id='mwHw' 
class='mw-redirect'>Earth's crust</a>.</p>",
+               "result": "<p id='mwBg'><b id='mwBw'>Oxygen</b> is a <a 
href='Elemento químico' id='mwCA' rel='mw:WikiLink' title='Chemical 
element'>chemical element</a> with symbol<span id='mwCQ' typeof='mw:Entity'> 
</span><b id='mwCg'>O</b> and <a href='Número atómico' id='mwCw' 
rel='mw:WikiLink' title='Atomic number'>atomic number</a> 8. It is a member of 
the <a href='Anfígeno' id='mwDA' rel='mw:WikiLink' 
title='Chalcogen'>chalcogen</a> <a 
data-cx='{&#34;adapted&#34;:false,&#34;sourceTitle&#34;:&#34;Group (periodic 
table)&#34;}' href='./Group_(periodic_table)' id='mwDQ' rel='mw:WikiLink' 
title='Group (periodic table)'>group</a> on the <a href='Tabla periódica de los 
elementos' id='mwDg' rel='mw:WikiLink' title='Periodic table'>periodic 
table</a> and is a highly <a href='Reacción química' id='mwDw' 
rel='mw:WikiLink' title='Chemical reaction'>reactive</a> <a href='No metal' 
id='mwEA' rel='mw:WikiLink' title='Nonmetal'>nonmetal</a> and <a 
href='Oxidante' id='mwEQ' rel='mw:WikiLink' title='Oxidizing agent'>oxidizing 
agent</a> that readily forms <a href='Óxido' id='mwEg' rel='mw:WikiLink' 
title='Oxide'>oxides</a> with most elements as well as other <a href='Compuesto 
químico' id='mwEw' rel='mw:WikiLink' title='Chemical compound'>compounds</a>. 
By mass, oxygen is the third-<a href='Abundancia de los elementos químicos' 
id='mwFA' rel='mw:WikiLink' title='Abundance of the chemical elements'>most 
abundant element</a> in the universe, after <a href='Hidrógeno' id='mwFQ' 
rel='mw:WikiLink' title='Hydrogen'>hydrogen</a> and <a href='Helio' id='mwFg' 
rel='mw:WikiLink' title='Helium'>helium</a>. At <a class='mw-redirect' 
href='Condiciones normalizadas de presión y temperatura' id='mwFw' 
rel='mw:WikiLink' title='Standard temperature and pressure'>standard 
temperature and pressure</a>, two atoms of the element <a href='Enlace químico' 
id='mwGA' rel='mw:WikiLink' title='Chemical bond'>bind</a> to form <a 
href='Alótropos del oxígeno' id='mwGQ' rel='mw:WikiLink' title='Allotropes of 
oxygen'>dioxygen</a>, a colorless and odorless <a href='Molécula diatómica' 
id='mwGg' rel='mw:WikiLink' title='Diatomic molecule'>diatomic</a> <a 
href='Gas' id='mwGw' rel='mw:WikiLink' title='Gas'>gas</a> with the formula. 
This is an important part of the <a href='Atmósfera terrestre' id='mwHQ' 
rel='mw:WikiLink' title='Atmosphere of Earth'>atmosphere</a> and diatomic 
oxygen gas constitutes 20.8% of the <a atmosphere='' class='mw-redirect' 
href='Tierra' id='mwHg' rel='mw:WikiLink' s='' s_atmosphere='' 
title='Earth'>Earth's atmosphere</a>. Additionally, as oxides the element makes 
up almost half of the <a class='mw-redirect' crust='' href='Tierra' id='mwHw' 
rel='mw:WikiLink' s='' s_crust='' title='Earth'>Earth's crust</a>.</p>"
        }
 ]
diff --git a/test/index.js b/test/index.js
index 8776d2c..b6feadd 100644
--- a/test/index.js
+++ b/test/index.js
@@ -9,6 +9,7 @@
        'lib/dictionary',
        'lib/lineardoc',
        'lib/mt',
+       'lib/mw',
        'lib/pageloader',
        'lib/routes',
        'lib/segmentation',
diff --git a/test/mw/TitlePairRequest.test.js b/test/mw/TitlePairRequest.test.js
new file mode 100644
index 0000000..5cc4e34
--- /dev/null
+++ b/test/mw/TitlePairRequest.test.js
@@ -0,0 +1,87 @@
+'use strict';
+
+var tests,
+       assert = require( '../utils/assert.js' ),
+       server = require( '../utils/server.js' ),
+       async = require( 'async' ),
+       TitlePairRequest = require( '../../lib/mw/TitlePairRequest' );
+
+tests = [
+       {
+               source: 'Kerala',
+               result: 'കേരളം',
+               sourceLanguage: 'en',
+               targetLanguage: 'ml',
+               desc: 'Corresponding title exist in target language'
+       },
+       {
+               source: 'Sea',
+               result: 'Mar',
+               sourceLanguage: 'en',
+               targetLanguage: 'es',
+               desc: 'Corresponding title exist in target language'
+       },
+       {
+               source: 'Atomic number',
+               result: 'Número atómico',
+               sourceLanguage: 'en',
+               targetLanguage: 'es',
+               desc: 'Corresponding title exist in target language and given 
title need normalization'
+       },
+       {
+               source: 'This title does not exist in English wikipedia',
+               result: undefined,
+               sourceLanguage: 'en',
+               targetLanguage: 'es',
+               desc: 'Corresponding title does not exist in target language 
and given title need normalization'
+       },
+       {
+               source: 'Group_(periodic_table)',
+               result: 'ଶ୍ରେଣୀ (ପର୍ଯ୍ୟାୟ ସାରଣୀ)',
+               sourceLanguage: 'en',
+               targetLanguage: 'or',
+               desc: 'Corresponding title exist in target language and given 
title need normalization, has parenthesis'
+       }
+];
+
+describe( 'Title pair tests', function () {
+       async.forEach( tests, function ( test ) {
+               var request;
+
+               request = new TitlePairRequest( {
+                       sourceLanguage: test.sourceLanguage,
+                       targetLanguage: test.targetLanguage,
+                       context: server.config
+               } );
+               it( 'should adapt the title when: ' + test.desc, function () {
+                       return request.get( test.source ).then( function( 
result ) {
+                               assert.deepEqual( result.targetTitle, 
test.result );
+                       } );
+               } );
+       } );
+} );
+
+describe( 'Title pair tests - batching', function () {
+       var oldGetRequestPromise;
+
+       it( 'should have the queue size 50', function () {
+               var i, titlePairRequest;
+               oldGetRequestPromise = 
TitlePairRequest.prototype.getRequestPromise;
+               TitlePairRequest.prototype.getRequestPromise = function( 
subqueue ) {
+                       assert.deepEqual( subqueue.length, 50 );
+                       return Promise.resolve( {} );
+               };
+               titlePairRequest = new TitlePairRequest( {
+                       sourceLanguage: 'en',
+                       targetLanguage: 'es',
+                       context: server.config
+               } );
+               for ( i = 0; i < 50; i++ ) {
+                       titlePairRequest.get( 'Title' + i );
+               }
+               return Promise.all( titlePairRequest.promises );
+       } );
+       after( function () {
+               TitlePairRequest.prototype.getRequestPromise = 
oldGetRequestPromise;
+       } );
+} );

-- 
To view, visit https://gerrit.wikimedia.org/r/366263
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I26354761ab2db816801b8247f81799989fa71184
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to