jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/366263 )
Change subject: MediaWiki api request interface with batching
......................................................................
MediaWiki api request interface with batching
* Batched mw api request interface for titles. We will need
this for namespaces, images, categories, templates etc. This is
an adaptation of ApiRequestCache in VE to nodejs and es6.
* MediaWiki title normalization, language code to domain name mapping
included.
* MWLink now adapts links based on this interface.
* Tests for title adaptation and link adaptations
* ES6 classes are liberally used since minimum nodejs dependency is v6.9.0
Bug: T170674
Change-Id: I26354761ab2db816801b8247f81799989fa71184
---
M .eslintrc.json
M lib/api-util.js
A lib/language-domain-mapping.json
M lib/lineardoc/TextBlock.js
A lib/mw/APIRequestManager.js
A lib/mw/ApiRequest.js
A lib/mw/BatchedAPIRequest.js
A lib/mw/TitlePairRequest.js
M lib/translationunits/MWLink.js
M lib/translationunits/TranslationUnit.js
M lib/util.js
M package.json
M test/adaptation/AdaptationTest.js
M test/adaptation/AdaptationTests.json
M test/index.js
A test/mw/TitlePairRequest.test.js
16 files changed, 613 insertions(+), 115 deletions(-)
Approvals:
Catrope: Looks good to me, approved
jenkins-bot: Verified
diff --git a/.eslintrc.json b/.eslintrc.json
index dbb2dc5..12375f6 100644
--- a/.eslintrc.json
+++ b/.eslintrc.json
@@ -5,7 +5,8 @@
"browser": true,
"jquery": true,
"qunit": true,
- "node": true
+ "node": true,
+ "mocha": true
},
"globals": {
"require": false
diff --git a/lib/api-util.js b/lib/api-util.js
index 68183f6..840ce2f 100644
--- a/lib/api-util.js
+++ b/lib/api-util.js
@@ -1,48 +1,7 @@
'use strict';
var preq = require( 'preq' ),
- sUtil = require( './util' ),
- Template = require( 'swagger-router' ).Template,
- HTTPError = sUtil.HTTPError;
-
-/**
- * Calls the MW API with the supplied query as its body
- *
- * @param {Object} app the application object
- * @param {string} domain the domain to issue the request to
- * @param {Object} query an object with all the query parameters for the MW API
- * @return {Promise} a promise resolving as the response object from the MW API
- */
-function mwApiGet( app, domain, query ) {
- var request;
- query = query || {};
- query.continue = query.continue || '';
-
- request = app.mwapi_tpl.expand( {
- request: {
- params: {
- domain: domain
- },
- headers: {
- 'user-agent': app.conf.user_agent
- },
- query: query
- }
- } );
-
- return preq( request ).then( function ( response ) {
- if ( response.status < 200 || response.status > 399 ) {
- // there was an error when calling the upstream
service, propagate that
- throw new HTTPError( {
- status: response.status,
- type: 'api_error',
- title: 'MW API error',
- detail: response.body
- } );
- }
- return response;
- } );
-}
+ Template = require( 'swagger-router' ).Template;
/**
* Calls the REST API with the supplied domain, path and request parameters
@@ -91,11 +50,12 @@
// set up the MW API request template
if ( !app.conf.mwapi_req ) {
app.conf.mwapi_req = {
- uri: 'http://{{domain}}/w/api.php',
+ uri: 'https://{{domain}}/w/api.php',
headers: {
'user-agent': '{{user-agent}}'
},
- body: '{{ default(request.query, {}) }}'
+ query: '{{ default(request.query, {}) }}',
+ body: '{{request.body}}'
};
}
app.mwapi_tpl = new Template( app.conf.mwapi_req );
@@ -115,7 +75,6 @@
}
module.exports = {
- mwApiGet: mwApiGet,
restApiGet: restApiGet,
setupApiTemplates: setupApiTemplates
};
diff --git a/lib/language-domain-mapping.json b/lib/language-domain-mapping.json
new file mode 100644
index 0000000..bc90164
--- /dev/null
+++ b/lib/language-domain-mapping.json
@@ -0,0 +1,13 @@
+{
+ "be-tarask": "be-x-old",
+ "bho": "bh",
+ "crh-latn": "crh",
+ "gsw": "als",
+ "lzh": "zh-classical",
+ "nan": "zh-min-nan",
+ "nb": "no",
+ "rup": "roa-rup",
+ "sgs": "bat-smg",
+ "vro": "fiu-vro",
+ "yue": "zh-yue"
+}
diff --git a/lib/lineardoc/TextBlock.js b/lib/lineardoc/TextBlock.js
index 264da1c..3f9ece6 100644
--- a/lib/lineardoc/TextBlock.js
+++ b/lib/lineardoc/TextBlock.js
@@ -1,8 +1,8 @@
'use strict';
var TextChunk = require( './TextChunk.js' ),
- Utils = require( './Utils.js' ),
- cxutil = require( './../util.js' );
+ Utils = require( './Utils.js' );
+
/**
* A block of annotated inline text
*
@@ -378,23 +378,46 @@
return new TextBlock( allTextChunks );
};
-TextBlock.prototype.adapt = cxutil.async( function* ( getAdapter ) {
- var i, j, chunk, tags, len, adapter;
- for ( i = 0, len = this.textChunks.length; i < len; i++ ) {
- chunk = this.textChunks[ i ];
- tags = chunk.tags;
- for ( j = 0; j < tags.length; j++ ) {
- adapter = getAdapter( tags[ j ] );
+/**
+ * Adapt a text block.
+ * @param {Function} getAdapter A function that returns an adapter for the
given node item
+ * @return {Promise} Promise that resolves the adapted TextBlock instance
+ */
+TextBlock.prototype.adapt = function ( getAdapter ) {
+ var textChunkPromises = [];
+
+ // Note that we are not using yield for the better readable code here.
Yield will pause
+ // the execution till the async call is resolved. For us, while looping
over these text
+ // chunks and tags, this will create a problem. Adaptations often
perform asynchrounous API
+ // calls to a MediaWiki instance. If we do API calls for each and every
item like a link
+ // title, it is inefficient. The API accepts a batched list of titles.
We do have a batched
+ // API mechanism in cxserver, but that works by debouncing the incoming
requests with a
+ // timeout. Pausing execution here will cause that debounce handler to
be called.
+ // So we avoid that pausing by just using an array of promises.
+ this.textChunks.forEach( ( chunk ) => {
+ var tagPromises = [],
+ tags = chunk.tags;
+
+ tags.forEach( ( tag ) => {
+ const adapter = getAdapter( tag );
if ( adapter ) {
- tags[ j ] = yield adapter.adapt();
+ tagPromises.push( adapter.adapt() );
}
- }
+ } );
+
+ textChunkPromises.push( Promise.all( tagPromises ) );
+
if ( chunk.inlineContent && chunk.inlineContent.adapt ) {
- chunk.inlineContent = yield chunk.inlineContent.adapt(
getAdapter );
+ textChunkPromises.push( ( ( chunk ) =>
chunk.inlineContent.adapt( getAdapter )
+ .then( ( adaptedInlineContent ) => {
+ chunk.inlineContent =
adaptedInlineContent;
+ } ) )( chunk )
+ );
}
- }
- return this;
-} );
+ } );
+
+ return Promise.all( textChunkPromises ).then( () => this );
+};
/**
* Dump an XML Array version of the linear representation, for debugging
diff --git a/lib/mw/APIRequestManager.js b/lib/mw/APIRequestManager.js
new file mode 100644
index 0000000..1e8f673
--- /dev/null
+++ b/lib/mw/APIRequestManager.js
@@ -0,0 +1,40 @@
+const TitlePairRequest = require( './TitlePairRequest.js' );
+
+class MWAPIRequestManager {
+ constructor( appContext ) {
+ this.context = appContext;
+ }
+
+ /**
+ * Creates a title pair request for a given title between a given
language pair
+ * @param {string} title Source title for which we want to know the
target title in the target language
+ * @param {string} sourceLanguage Source language code
+ * @param {string} targetLanguage Target language code
+ * @return {Promise}
+ */
+ titlePairRequest( title, sourceLanguage, targetLanguage ) {
+ let instance;
+
+ if ( !MWAPIRequestManager.titlePairCache[ sourceLanguage ] ) {
+ MWAPIRequestManager.titlePairCache[ sourceLanguage ] =
new Map();
+ }
+
+ instance = MWAPIRequestManager.titlePairCache[ sourceLanguage
][ targetLanguage ];
+ if ( !instance ) {
+ instance = new TitlePairRequest( { sourceLanguage,
targetLanguage, context: this.context } );
+ MWAPIRequestManager.titlePairCache[ sourceLanguage ][
targetLanguage ] = instance;
+ }
+
+ return instance.get( title );
+ }
+}
+
+/**
+ * MediaWiki API request manager cache instance. We cache the request manager
instances for each
+ * source language, target language pair. Theoretically this can grow up to
300x300 = 9K items.
+ * The cached instances helps to batch the API requests. Also future-ready for
API response cache.
+ * @type {Map}
+ */
+MWAPIRequestManager.titlePairCache = new Map();
+
+module.exports = MWAPIRequestManager;
diff --git a/lib/mw/ApiRequest.js b/lib/mw/ApiRequest.js
new file mode 100644
index 0000000..845d5d0
--- /dev/null
+++ b/lib/mw/ApiRequest.js
@@ -0,0 +1,150 @@
+const apiUtil = require( '../api-util.js' ),
+ preq = require( 'preq' ),
+ cxUtil = require( '../util.js' ),
+ Title = require( 'mediawiki-title' ).Title,
+ cxutil = require( '../util.js' ),
+ languageDomainNameMapping = require(
'./../language-domain-mapping.json' ),
+ HTTPError = cxUtil.HTTPError;
+
+class MWApiRequest {
+ /**
+ * @param {Object} config Configuration options
+ * @cfg {Object} context Application context
+ * @cfg {string} sourceLanguage Source language
+ * @cfg {string} targetLanguage target language
+ */
+ constructor( config ) {
+ this.context = config.context;
+ // Source and target languages
+ this.sourceLanguage = config.sourceLanguage;
+ this.targetLanguage = config.targetLanguage;
+ apiUtil.setupApiTemplates( config.context );
+ }
+
+ /**
+ * Calls the MW API with the supplied query as its body
+ *
+ * @param {string} domain the domain to issue the request to
+ * @param {Object} query an object with all the query parameters for
the MW API
+ * @param {string} method The HTTP method to use - get or post
+ * @return {Promise} a promise resolving as the response object from
the MW API
+ */
+ mwRequest( domain, query, method ) {
+ var request;
+ query = query || {};
+ query.continue = query.continue || '';
+ query.format = 'json';
+ request = this.context.mwapi_tpl.expand( {
+ request: {
+ params: {
+ domain: domain,
+ origin: '*'
+ },
+ headers: {
+ 'user-agent':
this.context.conf.user_agent
+ }
+ }
+ } );
+ if ( method === 'get' ) {
+ request.query = query;
+ } else if ( method === 'post' ) {
+ request.body = query;
+ request.headers[ 'content-type' ] =
'application/x-www-form-urlencoded';
+ }
+ return preq[ method ]( request ).then( ( response ) => {
+ if ( response.status < 200 || response.status > 399 ) {
+ // there was an error when calling the upstream
service, propagate that
+ throw new HTTPError( {
+ status: response.status,
+ type: 'api_error',
+ title: 'MW API error',
+ detail: response.body
+ } );
+ }
+ return response.body;
+ } );
+ }
+
+ /**
+ * Calls the MW API with the supplied query as its body
+ *
+ * @param {string} domain the domain to issue the request to
+ * @param {Object} query an object with all the query parameters for
the MW API
+ * @return {Promise} a promise resolving as the response object from
the MW API
+ */
+ mwPost( domain, query ) {
+ return this.mwRequest( domain, query, 'post' );
+ }
+
+ /**
+ * Calls the MW API with the supplied query as its body
+ *
+ * @param {string} domain the domain to issue the request to
+ * @param {Object} query an object with all the query parameters for
the MW API
+ * @return {Promise} a promise resolving as the response object from
the MW API
+ */
+ mwGet( domain, query ) {
+ return this.mwRequest( domain, query, 'get' );
+ }
+
+ getDomain( language ) {
+ return this.getSiteCode( language ) + '.wikipedia.org';
+ }
+
+ /**
+ * Resolve non-standard wikimedia site codes
+ * @param {string} language Language code
+ * @return {string} Wikipedia site code corresponding to the language
code.
+ */
+ getSiteCode( language ) {
+ return languageDomainNameMapping[ language ] || language;
+ }
+
+ /**
+ * Fetch the site information for a given language
+ * @param {string} language
+ * @return {Promise}
+ */
+ getSiteInfo( language ) {
+ var query,
+ domain = this.getDomain( language );
+ if ( MWApiRequest.siteInfoCache[ domain ] ) {
+ return MWApiRequest.siteInfoCache[ domain ];
+ }
+
+ query = {
+ action: 'query',
+ meta: 'siteinfo',
+ siprop:
'general|namespaces|namespacealiases|specialpagealiases',
+ format: 'json',
+ formatversion: 2
+ };
+
+ MWApiRequest.siteInfoCache[ domain ] = this.mwGet( domain,
query )
+ .then( ( res ) => res.query );
+ return MWApiRequest.siteInfoCache[ domain ];
+ }
+}
+
+/**
+ * Normalize the title of the response
+ *
+ * @param {string} title Title
+ * @param {string} language language
+ * @return {Promise} Promise resolved with the normalized title
+ */
+MWApiRequest.prototype.normalizeTitle = cxutil.async( function* ( title,
language ) {
+ var titleObj, siteInfo;
+ siteInfo = yield this.getSiteInfo( language );
+ // Remove prefixes like './'
+ title = title.replace( /^\.*\//, '' );
+ titleObj = Title.newFromText( title, siteInfo );
+ if ( !titleObj ) {
+ return title;
+ }
+ return titleObj.getPrefixedDBKey();
+} );
+
+MWApiRequest.siteInfoCache = new Map();
+
+module.exports = MWApiRequest;
diff --git a/lib/mw/BatchedAPIRequest.js b/lib/mw/BatchedAPIRequest.js
new file mode 100644
index 0000000..e66a5fe
--- /dev/null
+++ b/lib/mw/BatchedAPIRequest.js
@@ -0,0 +1,161 @@
+const cxutil = require( '../util.js' ),
+ MWApiRequest = require( './ApiRequest.js' );
+
+/**
+ * MediaWiki API batch queue.
+ *
+ * Used to queue up lists of items centrally to get information about in
batches of requests.
+ *
+ * @class
+ * @extends OO.EventEmitter
+ * @constructor
+ * @param {Object} config Configuration
+ */
+class BatchedAPIRequest extends MWApiRequest {
+ constructor( config ) {
+ super( config );
+ // Keys are titles, values are promises
+ this.promises = new Map();
+
+ // Array of page titles queued to be looked up
+ this.queue = [];
+ this.dispatchTimer = null;
+ }
+
+ /**
+ * Process each page in the response of an API request
+ *
+ * @abstract
+ * @method
+ * @param {Object} page The page object
+ * @return {Object|undefined} Any relevant info that we want to cache
and return.
+ */
+ processPage() {
+ throw new Error( 'Not implemented!' );
+ }
+
+ /**
+ * Get an API request promise to deal with a list of titles
+ *
+ * @abstract
+ * @return {Promise}
+ */
+ getRequestPromise() {
+ throw new Error( 'Not implemented!' );
+ }
+
+ /**
+ * Perform any scheduled API requests.
+ *
+ * @private
+ * @fires add
+ */
+ processQueue() {
+ var subqueue, queue, processResult,
+ batchRequest = this;
+
+ function rejectSubqueue( rejectQueue ) {
+ var i, len;
+ for ( i = 0, len = rejectQueue.length; i < len; i++ ) {
+ batchRequest.promises[ rejectQueue[ i ]
].reject();
+ }
+ }
+
+ processResult = cxutil.async( function*( data ) {
+ var pageid, page, i, processedPage,
+ pages = ( data.query && data.query.pages ) ||
data.pages,
+ redirects,
+ processed = {};
+
+ redirects = ( data.query && data.query.redirects ) ||
{};
+ if ( pages ) {
+ for ( pageid in pages ) {
+ page = pages[ pageid ];
+ processedPage =
batchRequest.processPage( page, redirects );
+ if ( processedPage !== undefined ) {
+ processed[ page.title ] =
processedPage;
+ }
+ for ( i in redirects ) {
+ // Locate the title in
redirects, if any.
+ if ( redirects[ i ].to ===
page.title ) {
+ processed[ redirects[ i
].from ] = processedPage;
+ break;
+ }
+ }
+ }
+ yield batchRequest.set( processed );
+ }
+ } );
+
+ queue = this.queue;
+ this.queue = [];
+ while ( queue.length ) {
+ subqueue = queue.splice( 0, 50 );
+ this.getRequestPromise( subqueue )
+ .then( processResult )
+
+ // Reject everything in subqueue; this will
only reject the ones
+ // that weren't already resolved above, because
.reject() on an
+ // already resolved Deferred is a no-op.
+ .then( rejectSubqueue.bind( null, subqueue ) );
+ }
+ }
+
+ /**
+ * Dispatch the queue for processing when there is gap in the arrival
of requests,
+ * or when the queue size exceed a given size.
+ */
+ dispatch() {
+ if ( this.queue.length >= 100 ) {
+ // Process the queue immediately.
+ this.processQueue();
+ }
+ if ( this.dispatchTimer ) {
+ clearTimeout( this.dispatchTimer );
+ }
+ this.dispatchTimer = setTimeout( this.processQueue.bind( this
), 10 );
+ }
+}
+
+/**
+ * Look up data about a title. If the data about this title is already in the
cache, this
+ * returns an already-resolved promise. Otherwise, it returns a pending
promise and schedules
+ * a request to retrieve the data.
+ *
+ * @param {string} title Title
+ * @return {Promise} Promise that gets resolved when data is available
+ */
+BatchedAPIRequest.prototype.get = cxutil.async( function* ( title ) {
+ var normalizedTitle;
+ if ( typeof title !== 'string' ) {
+ // Don't bother letting things like undefined or null make it
all the way through,
+ // just reject them here. Otherwise they'll cause problems or
exceptions at random
+ // other points in this file.
+ return Promise.reject( 'Invalid title' );
+ }
+ normalizedTitle = yield this.normalizeTitle( title, this.sourceLanguage
);
+ if ( !Object.prototype.hasOwnProperty.call( this.promises,
normalizedTitle ) ) {
+ this.promises[ normalizedTitle ] = new cxutil.Deferred();
+ this.queue.push( normalizedTitle );
+ this.dispatch();
+ }
+ return this.promises[ normalizedTitle ];
+} );
+
+/**
+ * Add entries to the cache. Does not overwrite already-set entries.
+ *
+ * @param {Object} entries Object keyed by page title, with the values being
data objects
+ */
+BatchedAPIRequest.prototype.set = cxutil.async( function* ( entries ) {
+ var normalizedTitle, title;
+ for ( title in entries ) {
+ normalizedTitle = yield this.normalizeTitle( title,
this.sourceLanguage );
+ if ( !Object.prototype.hasOwnProperty.call( this.promises,
normalizedTitle ) ) {
+ this.promises[ normalizedTitle ] = new
cxutil.Deferred();
+ }
+ this.promises[ normalizedTitle ].resolve( entries[ title ] );
+ }
+} );
+
+module.exports = BatchedAPIRequest;
diff --git a/lib/mw/TitlePairRequest.js b/lib/mw/TitlePairRequest.js
new file mode 100644
index 0000000..349c566
--- /dev/null
+++ b/lib/mw/TitlePairRequest.js
@@ -0,0 +1,46 @@
+/**
+ * ContentTranslation Title pair request
+ *
+ */
+const BatchedAPIRequest = require( './BatchedAPIRequest.js' );
+
+/**
+ * Fetches information about title pairs in batches.
+ *
+ * @class
+ * @extends BatchedAPIRequest
+ * @constructor
+ * @param {Object} config Configuration
+ */
+class TitlePairRequest extends BatchedAPIRequest {
+ constructor( config ) {
+ super( config );
+ }
+
+ processPage( page ) {
+ return {
+ sourceTitle: page.title,
+ targetTitle: page.langlinks && page.langlinks[ 0 ] &&
page.langlinks[ 0 ][ '*' ],
+ missing: page.langlinks && page.langlinks[ 0 ] &&
page.langlinks[ 0 ][ '*' ] === undefined
+ };
+ }
+
+ getRequestPromise( subqueue ) {
+ var domain, query;
+ query = {
+ action: 'query',
+ prop: 'langlinks',
+ lllimit: subqueue.length,
+ lllang: this.getSiteCode( this.targetLanguage ),
+ titles: subqueue.join( '|' ),
+ redirects: true,
+ 'continue': ''
+ };
+ domain = this.getDomain( this.sourceLanguage );
+ // We use POST here because the titles when joined will result
in a longer query string
+ // that GET requests cannot process sometimes.
+ return this.mwPost( domain, query );
+ }
+}
+
+module.exports = TitlePairRequest;
diff --git a/lib/translationunits/MWLink.js b/lib/translationunits/MWLink.js
index c344e55..b2dff27 100644
--- a/lib/translationunits/MWLink.js
+++ b/lib/translationunits/MWLink.js
@@ -1,42 +1,37 @@
-var util = require( 'util' ),
- cxutil = require( '../util.js' ),
- TranslationUnit = require( './TranslationUnit.js' );
+var cxutil = require( '../util.js' ),
+ TranslationUnit = require( './TranslationUnit.js' ),
+ MWAPIRequestManager = require( '../mw/APIRequestManager.js' );
-function MWLink( node, sourceLanguage, targetLanguage, context ) {
- this.node = node;
- this.sourceLanguage = sourceLanguage;
- this.targetLanguage = targetLanguage;
- this.context = context;
+class MWLink extends TranslationUnit {
+ constructor( node, sourceLanguage, targetLanguage, context ) {
+ super( node, sourceLanguage, targetLanguage, context );
+ // Nothing else?
+ }
}
-
-util.inherits( MWLink, TranslationUnit );
MWLink.name = 'link';
MWLink.matchTagNames = [ 'a' ];
MWLink.matchRdfaTypes = [ 'mw:WikiLink' ];
MWLink.prototype.adapt = cxutil.async( function* () {
- // XXX: Just a marker for now. To be removed
- this.node.attributes[ 'adapted' ] = 'true';
- this.node.attributes[ 'href' ] = yield this.findLinkTarget(
- this.sourceLanguage,
- this.node.attributes.href,
- this.targetLanguage
- );
+ var linkPairInfo;
+
+ linkPairInfo = yield new MWAPIRequestManager( this.context )
+ .titlePairRequest( this.node.attributes.href,
this.sourceLanguage, this.targetLanguage );
+
+ if ( linkPairInfo.targetTitle ) {
+ // NOTE: This titles we are setting here are not relative
titles.
+ this.node.attributes[ 'href' ] = linkPairInfo.targetTitle;
+ } else {
+ // TODO: This format is not decided yet. We do need to inform
client about failed
+ // adaptations somehow.
+ this.node.attributes[ 'data-cx' ] = JSON.stringify( {
+ adapted: false,
+ sourceTitle: linkPairInfo.sourceTitle
+ } );
+ }
return this.node;
} );
-
-/**
- * Find link target for the given source title
- * @param {string} sourceLanguage
- * @param {string} sourceTitle
- * @param {string} targetLanguage
- * @return {Promise}
- */
-MWLink.prototype.findLinkTarget = function ( sourceLanguage, sourceTitle,
targetLanguage ) {
- console.log( 'Adapting from ' + sourceLanguage + ' to ' +
targetLanguage );
- return Promise.resolve( sourceTitle );
-};
module.exports = MWLink;
diff --git a/lib/translationunits/TranslationUnit.js
b/lib/translationunits/TranslationUnit.js
index 2fa4423..edec850 100644
--- a/lib/translationunits/TranslationUnit.js
+++ b/lib/translationunits/TranslationUnit.js
@@ -1,14 +1,21 @@
-function TranslationUnit( node, context ) {
- this.node = node;
- this.context = context;
+/*
+ * @abstract
+ */
+class TranslationUnit {
+ constructor( node, sourceLanguage, targetLanguage, context ) {
+ this.node = node;
+ this.sourceLanguage = sourceLanguage;
+ this.targetLanguage = targetLanguage;
+ this.context = context;
+ }
+
+ adapt() {
+ return this.node;
+ }
}
TranslationUnit.name = null;
TranslationUnit.matchTagNames = null;
TranslationUnit.matchRdfaTypes = null;
-
-TranslationUnit.prototype.adapt = function() {
- return this.node;
-};
module.exports = TranslationUnit;
diff --git a/lib/util.js b/lib/util.js
index 2e55577..3196571 100644
--- a/lib/util.js
+++ b/lib/util.js
@@ -310,12 +310,23 @@
};
}
+function Deferred() {
+ this.promise = new Promise( ( function( resolve, reject ) {
+ this.resolve = resolve;
+ this.reject = reject;
+ } ).bind( this ) );
+
+ this.then = this.promise.then.bind( this.promise );
+ this.catch = this.promise.catch.bind( this.promise );
+}
+
module.exports = {
- HTTPError: HTTPError,
- initAndLogRequest: initAndLogRequest,
- wrapRouteHandlers: wrapRouteHandlers,
- setErrorHandler: setErrorHandler,
+ HTTPError,
+ initAndLogRequest,
+ wrapRouteHandlers,
+ setErrorHandler,
router: createRouter,
- spawn: spawn,
- async: async
+ spawn,
+ async,
+ Deferred
};
diff --git a/package.json b/package.json
index deb7a8f..03ce75d 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
"domino": "^1.0.25",
"express": "^4.14.0",
"js-yaml": "^3.6.1",
+ "mediawiki-title": "^0.6.3",
"preq": "^0.5.2",
"service-runner": "^2.2.5",
"swagger-router": "^0.4.6",
diff --git a/test/adaptation/AdaptationTest.js
b/test/adaptation/AdaptationTest.js
index 1d8bb17..c6c0500 100644
--- a/test/adaptation/AdaptationTest.js
+++ b/test/adaptation/AdaptationTest.js
@@ -1,7 +1,6 @@
'use strict';
-var fs = require( 'fs' ),
- assert = require( '../utils/assert.js' ),
+var assert = require( '../utils/assert.js' ),
server = require( '../utils/server.js' ),
LinearDoc = require( '../../lib/lineardoc' ),
async = require( 'async' ),
@@ -16,20 +15,17 @@
}
describe( 'Adaptation tests', function () {
- before( function () {
- return server.start();
- } );
-
async.forEach( tests, function ( test ) {
var expectedResultData, adapter;
- adapter = new Adapter( test.from, test.to, server );
- adapter.adapt( test.source ).then( function( result ) {
- result = normalize( result.getHtml() );
- expectedResultData = normalize( test.result );
- it( 'should not have any errors when: ' + test.desc,
function () {
+ adapter = new Adapter( test.from, test.to, server.config );
+ it( 'should not have any errors when: ' + test.desc, function
() {
+ return adapter.adapt( test.source ).then( function(
result ) {
+ result = normalize( result.getHtml() );
+ expectedResultData = normalize( test.result );
assert.deepEqual( result, expectedResultData,
test.source + ': ' + test.desc || '' );
} );
} );
} );
+
} );
diff --git a/test/adaptation/AdaptationTests.json
b/test/adaptation/AdaptationTests.json
index acb6e29..a96d619 100644
--- a/test/adaptation/AdaptationTests.json
+++ b/test/adaptation/AdaptationTests.json
@@ -4,6 +4,13 @@
"from": "en",
"to": "es",
"source": "<p><a rel='mw:WikiLink' href='Oxygen'>Oxygen</a> is
a chemical element with symbol O and <a rel='mw:WikiLink' href='Atomic
number'>atomic number</a> 8.</p>",
- "result": "<p><a adapted='true' href='Oxygen'
rel='mw:WikiLink'>Oxygen</a> is a chemical element with symbol O and <a
adapted='true' href='Atomic number' rel='mw:WikiLink'>atomic number</a> 8.</p>"
+ "result": "<p><a href='Oxígeno' rel='mw:WikiLink'>Oxygen</a> is
a chemical element with symbol O and <a href='Número atómico'
rel='mw:WikiLink'>atomic number</a> 8.</p>"
+ },
+ {
+ "desc": "Link adaptation - Full paragraph and relative links",
+ "from": "en",
+ "to": "es",
+ "source": "<p id='mwBg'><b id='mwBw'>Oxygen</b> is a <a
rel='mw:WikiLink' href='./Chemical_element' title='Chemical element'
id='mwCA'>chemical element</a> with symbol<span typeof='mw:Entity' id='mwCQ'>
</span><b id='mwCg'>O</b> and <a rel='mw:WikiLink' href='./Atomic_number'
title='Atomic number' id='mwCw'>atomic number</a> 8. It is a member of the <a
rel='mw:WikiLink' href='./Chalcogen' title='Chalcogen' id='mwDA'>chalcogen</a>
<a rel='mw:WikiLink' href='./Group_(periodic_table)' title='Group (periodic
table)' id='mwDQ'>group</a> on the <a rel='mw:WikiLink' href='./Periodic_table'
title='Periodic table' id='mwDg'>periodic table</a> and is a highly <a
rel='mw:WikiLink' href='./Chemical_reaction' title='Chemical reaction'
id='mwDw'>reactive</a> <a rel='mw:WikiLink' href='./Nonmetal' title='Nonmetal'
id='mwEA'>nonmetal</a> and <a rel='mw:WikiLink' href='./Oxidizing_agent'
title='Oxidizing agent' id='mwEQ'>oxidizing agent</a> that readily forms <a
rel='mw:WikiLink' href='./Oxide' title='Oxide' id='mwEg'>oxides</a> with most
elements as well as other <a rel='mw:WikiLink' href='./Chemical_compound'
title='Chemical compound' id='mwEw'>compounds</a>. By mass, oxygen is the
third-<a rel='mw:WikiLink' href='./Abundance_of_the_chemical_elements'
title='Abundance of the chemical elements' id='mwFA'>most abundant element</a>
in the universe, after <a rel='mw:WikiLink' href='./Hydrogen' title='Hydrogen'
id='mwFQ'>hydrogen</a> and <a rel='mw:WikiLink' href='./Helium' title='Helium'
id='mwFg'>helium</a>. At <a rel='mw:WikiLink'
href='./Standard_temperature_and_pressure' title='Standard temperature and
pressure' id='mwFw' class='mw-redirect'>standard temperature and pressure</a>,
two atoms of the element <a rel='mw:WikiLink' href='./Chemical_bond'
title='Chemical bond' id='mwGA'>bind</a> to form <a rel='mw:WikiLink'
href='./Allotropes_of_oxygen#Dioxygen' title='Allotropes of oxygen'
id='mwGQ'>dioxygen</a>, a colorless and odorless <a rel='mw:WikiLink'
href='./Diatomic_molecule' title='Diatomic molecule' id='mwGg'>diatomic</a> <a
rel='mw:WikiLink' href='./Gas' title='Gas' id='mwGw'>gas</a> with the formula.
This is an important part of the <a rel='mw:WikiLink'
href='./Atmosphere_of_Earth' title='Atmosphere of Earth'
id='mwHQ'>atmosphere</a> and diatomic oxygen gas constitutes 20.8% of the <a
rel='mw:WikiLink' href='./Earth's_atmosphere' title='Earth's atmosphere'
id='mwHg' class='mw-redirect'>Earth's atmosphere</a>. Additionally, as oxides
the element makes up almost half of the <a rel='mw:WikiLink'
href='./Earth's_crust' title='Earth's crust' id='mwHw'
class='mw-redirect'>Earth's crust</a>.</p>",
+ "result": "<p id='mwBg'><b id='mwBw'>Oxygen</b> is a <a
href='Elemento químico' id='mwCA' rel='mw:WikiLink' title='Chemical
element'>chemical element</a> with symbol<span id='mwCQ' typeof='mw:Entity'>
</span><b id='mwCg'>O</b> and <a href='Número atómico' id='mwCw'
rel='mw:WikiLink' title='Atomic number'>atomic number</a> 8. It is a member of
the <a href='Anfígeno' id='mwDA' rel='mw:WikiLink'
title='Chalcogen'>chalcogen</a> <a
data-cx='{"adapted":false,"sourceTitle":"Group (periodic
table)"}' href='./Group_(periodic_table)' id='mwDQ' rel='mw:WikiLink'
title='Group (periodic table)'>group</a> on the <a href='Tabla periódica de los
elementos' id='mwDg' rel='mw:WikiLink' title='Periodic table'>periodic
table</a> and is a highly <a href='Reacción química' id='mwDw'
rel='mw:WikiLink' title='Chemical reaction'>reactive</a> <a href='No metal'
id='mwEA' rel='mw:WikiLink' title='Nonmetal'>nonmetal</a> and <a
href='Oxidante' id='mwEQ' rel='mw:WikiLink' title='Oxidizing agent'>oxidizing
agent</a> that readily forms <a href='Óxido' id='mwEg' rel='mw:WikiLink'
title='Oxide'>oxides</a> with most elements as well as other <a href='Compuesto
químico' id='mwEw' rel='mw:WikiLink' title='Chemical compound'>compounds</a>.
By mass, oxygen is the third-<a href='Abundancia de los elementos químicos'
id='mwFA' rel='mw:WikiLink' title='Abundance of the chemical elements'>most
abundant element</a> in the universe, after <a href='Hidrógeno' id='mwFQ'
rel='mw:WikiLink' title='Hydrogen'>hydrogen</a> and <a href='Helio' id='mwFg'
rel='mw:WikiLink' title='Helium'>helium</a>. At <a class='mw-redirect'
href='Condiciones normalizadas de presión y temperatura' id='mwFw'
rel='mw:WikiLink' title='Standard temperature and pressure'>standard
temperature and pressure</a>, two atoms of the element <a href='Enlace químico'
id='mwGA' rel='mw:WikiLink' title='Chemical bond'>bind</a> to form <a
href='Alótropos del oxígeno' id='mwGQ' rel='mw:WikiLink' title='Allotropes of
oxygen'>dioxygen</a>, a colorless and odorless <a href='Molécula diatómica'
id='mwGg' rel='mw:WikiLink' title='Diatomic molecule'>diatomic</a> <a
href='Gas' id='mwGw' rel='mw:WikiLink' title='Gas'>gas</a> with the formula.
This is an important part of the <a href='Atmósfera terrestre' id='mwHQ'
rel='mw:WikiLink' title='Atmosphere of Earth'>atmosphere</a> and diatomic
oxygen gas constitutes 20.8% of the <a atmosphere='' class='mw-redirect'
href='Tierra' id='mwHg' rel='mw:WikiLink' s='' s_atmosphere=''
title='Earth'>Earth's atmosphere</a>. Additionally, as oxides the element makes
up almost half of the <a class='mw-redirect' crust='' href='Tierra' id='mwHw'
rel='mw:WikiLink' s='' s_crust='' title='Earth'>Earth's crust</a>.</p>"
}
]
diff --git a/test/index.js b/test/index.js
index 8776d2c..b6feadd 100644
--- a/test/index.js
+++ b/test/index.js
@@ -9,6 +9,7 @@
'lib/dictionary',
'lib/lineardoc',
'lib/mt',
+ 'lib/mw',
'lib/pageloader',
'lib/routes',
'lib/segmentation',
diff --git a/test/mw/TitlePairRequest.test.js b/test/mw/TitlePairRequest.test.js
new file mode 100644
index 0000000..5cc4e34
--- /dev/null
+++ b/test/mw/TitlePairRequest.test.js
@@ -0,0 +1,87 @@
+'use strict';
+
+var tests,
+ assert = require( '../utils/assert.js' ),
+ server = require( '../utils/server.js' ),
+ async = require( 'async' ),
+ TitlePairRequest = require( '../../lib/mw/TitlePairRequest' );
+
+tests = [
+ {
+ source: 'Kerala',
+ result: 'കേരളം',
+ sourceLanguage: 'en',
+ targetLanguage: 'ml',
+ desc: 'Corresponding title exist in target language'
+ },
+ {
+ source: 'Sea',
+ result: 'Mar',
+ sourceLanguage: 'en',
+ targetLanguage: 'es',
+ desc: 'Corresponding title exist in target language'
+ },
+ {
+ source: 'Atomic number',
+ result: 'Número atómico',
+ sourceLanguage: 'en',
+ targetLanguage: 'es',
+ desc: 'Corresponding title exist in target language and given
title need normalization'
+ },
+ {
+ source: 'This title does not exist in English wikipedia',
+ result: undefined,
+ sourceLanguage: 'en',
+ targetLanguage: 'es',
+ desc: 'Corresponding title does not exist in target language
and given title need normalization'
+ },
+ {
+ source: 'Group_(periodic_table)',
+ result: 'ଶ୍ରେଣୀ (ପର୍ଯ୍ୟାୟ ସାରଣୀ)',
+ sourceLanguage: 'en',
+ targetLanguage: 'or',
+ desc: 'Corresponding title exist in target language and given
title need normalization, has parenthesis'
+ }
+];
+
+describe( 'Title pair tests', function () {
+ async.forEach( tests, function ( test ) {
+ var request;
+
+ request = new TitlePairRequest( {
+ sourceLanguage: test.sourceLanguage,
+ targetLanguage: test.targetLanguage,
+ context: server.config
+ } );
+ it( 'should adapt the title when: ' + test.desc, function () {
+ return request.get( test.source ).then( function(
result ) {
+ assert.deepEqual( result.targetTitle,
test.result );
+ } );
+ } );
+ } );
+} );
+
+describe( 'Title pair tests - batching', function () {
+ var oldGetRequestPromise;
+
+ it( 'should have the queue size 50', function () {
+ var i, titlePairRequest;
+ oldGetRequestPromise =
TitlePairRequest.prototype.getRequestPromise;
+ TitlePairRequest.prototype.getRequestPromise = function(
subqueue ) {
+ assert.deepEqual( subqueue.length, 50 );
+ return Promise.resolve( {} );
+ };
+ titlePairRequest = new TitlePairRequest( {
+ sourceLanguage: 'en',
+ targetLanguage: 'es',
+ context: server.config
+ } );
+ for ( i = 0; i < 50; i++ ) {
+ titlePairRequest.get( 'Title' + i );
+ }
+ return Promise.all( titlePairRequest.promises );
+ } );
+ after( function () {
+ TitlePairRequest.prototype.getRequestPromise =
oldGetRequestPromise;
+ } );
+} );
--
To view, visit https://gerrit.wikimedia.org/r/366263
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I26354761ab2db816801b8247f81799989fa71184
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits