Santhosh has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/372503 )

Change subject: Refresh MT client modules as classes - ES6 upgrade
......................................................................

Refresh MT client modules as classes - ES6 upgrade

Change-Id: I0a9aaf9b71ee53d093956995cbe3073001f62081
---
M lib/mt/Apertium.js
M lib/mt/MTClient.js
M lib/mt/Yandex.js
M lib/mt/Youdao.js
4 files changed, 547 insertions(+), 589 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/03/372503/1

diff --git a/lib/mt/Apertium.js b/lib/mt/Apertium.js
index 89f8575..af9af5d 100644
--- a/lib/mt/Apertium.js
+++ b/lib/mt/Apertium.js
@@ -1,47 +1,39 @@
 'use strict';
 
 var preq = require( 'preq' ),
-       util = require( 'util' ),
        MTClient = require( './MTClient.js' ),
-       apertiumLangMapping = require( './Apertium.languagenames.json' ),
-       postData;
+       apertiumLangMapping = require( './Apertium.languagenames.json' );
 
-function Apertium( options ) {
-       this.logger = options.logger;
-       this.conf = options.conf;
+class Apertium extends MTClient {
+       /**
+        * Translate plain text with Apertium API
+        * Apertium is not capable of HTML translation with all annotation
+        * mapping. For translating HTML, It use CX's annotation mapping on top
+        * of the plaintext translation. Hence it inherits translateHTML method
+        * of MTClient.
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string} sourceText Source language text
+        * @return {BBPromise} promise: Target language text
+        */
+       translateText( sourceLang, targetLang, sourceText ) {
+               const postData = {
+                       uri: this.conf.mt.apertium.api + '/translate',
+                       body: {
+                               markUnknown: 0,
+                               langpair: apertiumLangMapping[ sourceLang ] + 
'|' + apertiumLangMapping[ targetLang ],
+                               format: 'txt',
+                               q: sourceText
+                       }
+               };
+
+               return preq.post( postData )
+                       .then( ( response ) => 
response.body.responseData.translatedText )
+                       .catch( function () {
+                               throw new Error( 'Translation with Apertium 
failed: ' + sourceLang + '-' + targetLang );
+                       } );
+       }
 }
-
-util.inherits( Apertium, MTClient );
-
-/**
- * Translate plain text with Apertium API
- * Apertium is not capable of HTML translation with all annotation
- * mapping. For translating HTML, It use CX's annotation mapping on top
- * of the plaintext translation. Hence it inherits translateHTML method
- * of MTClient.
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string} sourceText Source language text
- * @return {BBPromise} promise: Target language text
- */
-Apertium.prototype.translateText = function ( sourceLang, targetLang, 
sourceText ) {
-       postData = {
-               uri: this.conf.mt.apertium.api + '/translate',
-               body: {
-                       markUnknown: 0,
-                       langpair: apertiumLangMapping[ sourceLang ] + '|' + 
apertiumLangMapping[ targetLang ],
-                       format: 'txt',
-                       q: sourceText
-               }
-       };
-
-       return preq.post( postData ).then( function ( response ) {
-               return response.body.responseData.translatedText;
-       } ).catch( function () {
-               throw new Error( 'Translation with Apertium failed: ' +
-                       sourceLang + '-' + targetLang );
-       } );
-};
 
 module.exports = Apertium;
diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js
index 1190ff4..1cce712 100644
--- a/lib/mt/MTClient.js
+++ b/lib/mt/MTClient.js
@@ -1,80 +1,61 @@
 'use strict';
 
-var LinearDoc = require( __dirname + '/../lineardoc' ),
-       BBPromise = require( 'bluebird' ),
+const LinearDoc = require( __dirname + '/../lineardoc' ),
        SubSequenceMatcher = require( 
'./annotationmapper/SubsequenceMatcher.js' ),
        createDOMPurify = require( 'dompurify' ),
        jsdom = require( 'jsdom' );
 
 /**
  * MTClient - Generic machine translation client.
- *
- * @class
- *
- * @param {Object} options
  */
-function MTClient( options ) {
-       this.logger = options.logger;
-       this.conf = options.conf;
-       this.sourceDoc = null;
-       this.sourceHTML = null;
-}
-
-MTClient.prototype.log = function ( level, info ) {
-       if ( this.logger && this.logger.log ) {
-               this.logger.log( level, info );
+class MTClient {
+       /**
+        * @param {Object} options
+        */
+       constructor( options ) {
+               this.logger = options.logger;
+               this.conf = options.conf;
+               this.sourceDoc = null;
+               this.sourceHTML = null;
        }
-};
 
-/**
- * Translate the given content between the language pairs.
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string} content Content to translate
- * @param {string} [format="html"] Format of the content- html or text
- * @return {Object} Deferred promise: Target language text
- */
-MTClient.prototype.translate = function ( sourceLang, targetLang, content, 
format ) {
-       if ( format === 'text' ) {
-               return this.translateText( sourceLang, targetLang, content );
-       } else {
-               return this.translateHtml( sourceLang, targetLang, content );
+       log( level, info ) {
+               if ( this.logger && this.logger.log ) {
+                       this.logger.log( level, info );
+               }
        }
-};
 
-/**
- * Translate marked-up text
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string} sourceHtml Source html
- * @return {Promise} promise: Translated html
- */
-MTClient.prototype.translateHtml = function ( sourceLang, targetLang, 
sourceHtml ) {
-       var i, len, targetDoc, chain = [],
-               self = this;
+       /**
+        * Translate the given content between the language pairs.
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string} content Content to translate
+        * @param {string} [format="html"] Format of the content- html or text
+        * @return {Object} Deferred promise: Target language text
+        */
+       translate( sourceLang, targetLang, content, format ) {
+               if ( format === 'text' ) {
+                       return this.translateText( sourceLang, targetLang, 
content );
+               } else {
+                       return this.translateHtml( sourceLang, targetLang, 
content );
+               }
+       }
 
-       this.buildSourceDoc( sourceHtml );
-       // Clone and adapt sourceDoc
-       targetDoc = new LinearDoc.Doc( this.sourceDoc.wrapperTag );
-
-       function translateItemDeferred( item ) {
+       translateItemDeferred( item, sourceLang, targetLang ) {
                if ( item.type !== 'textblock' ) {
-                       return BBPromise.resolve( item );
+                       return Promise.resolve( item );
                }
 
-               return self.translateTextWithTagOffsets(
+               return this.translateTextWithTagOffsets(
                        sourceLang,
                        targetLang,
                        item.item.getPlainText(),
                        item.item.getTagOffsets()
-               ).then( function ( translated ) {
+               ).then( ( translated ) => {
                        var newTextBlock;
 
-                       newTextBlock = item.item.translateTags(
-                               translated.text, translated.rangeMappings
-                       );
+                       newTextBlock = item.item.translateTags( 
translated.text, translated.rangeMappings );
 
                        return {
                                type: 'textblock',
@@ -83,310 +64,319 @@
                } );
        }
 
-       for ( i = 0, len = this.sourceDoc.items.length; i < len; i++ ) {
-               chain.push( translateItemDeferred( this.sourceDoc.items[ i ] ) 
);
+       /**
+        * Translate marked-up text
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string} sourceHtml Source html
+        * @return {Promise} Promise that resolves Translated html
+        */
+       translateHtml( sourceLang, targetLang, sourceHtml ) {
+               var targetDoc, chain = [];
+
+               this.buildSourceDoc( sourceHtml );
+               // Clone and adapt sourceDoc
+               targetDoc = new LinearDoc.Doc( this.sourceDoc.wrapperTag );
+
+               for ( let i = 0, len = this.sourceDoc.items.length; i < len; 
i++ ) {
+                       chain.push( this.translateItemDeferred( 
this.sourceDoc.items[ i ], sourceLang, targetLang ) );
+               }
+
+               return Promise.all( chain ).then( ( results ) => {
+                       targetDoc.items = results;
+                       // Return sanitized HTML output
+                       return this.sanitize( targetDoc.getHtml() );
+               } );
        }
 
-       return BBPromise.all( chain ).then( function ( results ) {
-               targetDoc.items = results;
-               // Return sanitized HTML output
-               return self.sanitize( targetDoc.getHtml() );
-       } );
-};
-
-/**
- * Sanitize given HTML using DOMPurify
- * @param {string} html Dirty HTML
- * @return {string} sanitized HTML output
- */
-MTClient.prototype.sanitize = function ( html ) {
-       if ( !this.DOMPurify ) {
+       /**
+        * Sanitize given HTML using DOMPurify
+        * @param {string} html Dirty HTML
+        * @return {string} sanitized HTML output
+        */
+       sanitize( html ) {
+               if ( !this.DOMPurify ) {
                // Lazy initialize DOMPurify
-               this.DOMPurify = createDOMPurify( ( new jsdom.JSDOM( '' ) 
).window );
+                       this.DOMPurify = createDOMPurify( ( new jsdom.JSDOM( '' 
) ).window );
+               }
+
+               if ( !this.DOMPurify.isSupported ) {
+                       throw new Error( 'DOMPurify not suppported in the DOM 
environment provided by JSDOM' );
+               }
+
+               return this.DOMPurify.sanitize( html, {
+                       ADD_ATTR: [ 'typeof' ], // typeof is not a known 
attribute for DOMPurify
+                       ADD_URI_SAFE_ATTR: [ 'rel', 'typeof' ] // Without this 
rel="mw:WikiLink" attributes will be removed.
+               } );
        }
 
-       if ( !this.DOMPurify.isSupported ) {
-               throw new Error( 'DOMPurify not suppported in the DOM 
environment provided by JSDOM' );
-       }
+       /**
+        * Translate text, using case variants to map tag offsets
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string} sourceText Source plain text
+        * @param {Object[]} tagOffsets start and length for each annotation 
chunk
+        * @return {Object} Deferred promise: Translated plain text and range 
mappings
+        */
+       translateTextWithTagOffsets( sourceLang, targetLang, sourceText, 
tagOffsets ) {
+               var subSequences, sourceLines, i, m, preSpaces, postSpaces, 
trimmedSourceLines;
 
-       return this.DOMPurify.sanitize( html, {
-               ADD_ATTR: [ 'typeof' ], // typeof is not a known attribute for 
DOMPurify
-               ADD_URI_SAFE_ATTR: [ 'rel', 'typeof' ] // Without this 
rel="mw:WikiLink" attributes will be removed.
-       } );
-};
+               subSequences = this.getSubSequences( sourceLang, sourceText, 
tagOffsets );
+               sourceLines = subSequences.map( ( variant ) => variant.text );
+               sourceLines.splice( 0, 0, sourceText );
 
-/**
- * Translate text, using case variants to map tag offsets
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string} sourceText Source plain text
- * @param {Object[]} tagOffsets start and length for each annotation chunk
- * @return {Object} Deferred promise: Translated plain text and range mappings
- */
-MTClient.prototype.translateTextWithTagOffsets = function ( sourceLang, 
targetLang, sourceText, tagOffsets ) {
-       var subSequences, sourceLines, i, m, preSpaces, postSpaces, 
trimmedSourceLines,
-               self = this;
-
-       subSequences = this.getSubSequences( sourceLang, sourceText, tagOffsets 
);
-       sourceLines = subSequences.map( function ( variant ) {
-               return variant.text;
-       } );
-       sourceLines.splice( 0, 0, sourceText );
-
-       // Strip and store leading/trailing whitespace before sending text to 
MT server
-       preSpaces = [];
-       postSpaces = [];
-       trimmedSourceLines = [];
-       for ( i = 0; i < sourceLines.length; i++ ) {
+               // Strip and store leading/trailing whitespace before sending 
text to MT server
+               preSpaces = [];
+               postSpaces = [];
+               trimmedSourceLines = [];
+               for ( i = 0; i < sourceLines.length; i++ ) {
                // Search for zero or more leading and trailing spaces. This 
will always match.
-               m = sourceLines[ i ].match( /^(\s*)([\s\S]*?)(\s*)$/ );
-               if ( !m ) {
+                       m = sourceLines[ i ].match( /^(\s*)([\s\S]*?)(\s*)$/ );
+                       if ( !m ) {
                        // See https://phabricator.wikimedia.org/T86625. This 
not supposed to happen.
-                       this.log( 'error', 'Regex to extract trailing and 
leading space failed for ' + sourceLines[ i ] );
-                       m = [ '', '', sourceLines[ i ], '' ];
+                               this.log( 'error', 'Regex to extract trailing 
and leading space failed for ' + sourceLines[ i ] );
+                               m = [ '', '', sourceLines[ i ], '' ];
+                       }
+                       preSpaces[ i ] = m[ 1 ];
+                       trimmedSourceLines[ i ] = m[ 2 ];
+                       postSpaces[ i ] = m[ 3 ];
                }
-               preSpaces[ i ] = m[ 1 ];
-               trimmedSourceLines[ i ] = m[ 2 ];
-               postSpaces[ i ] = m[ 3 ];
-       }
 
-       // Join segments with a string that will definitely break sentences and 
be preserved
-       return self.translateLines(
-               sourceLang,
-               targetLang,
-               trimmedSourceLines
-       ).then( function ( unnormalizedTargetLines ) {
-               var targetLines, targetText, rangeMappings;
+               // Join segments with a string that will definitely break 
sentences and be preserved
+               return this.translateLines(
+                       sourceLang,
+                       targetLang,
+                       trimmedSourceLines
+               ).then( ( unnormalizedTargetLines ) => {
+                       var targetLines, targetText, rangeMappings;
 
-               // Restore leading/trailing whitespace from source
-               targetLines = unnormalizedTargetLines.map( function ( line, i ) 
{
-                       return preSpaces[ i ] + line.replace( /^\s+|\s+$/g, '' 
) + postSpaces[ i ];
+                       // Restore leading/trailing whitespace from source
+                       targetLines = unnormalizedTargetLines
+                               .map( ( line, i ) => preSpaces[ i ] + 
line.replace( /^\s+|\s+$/g, '' ) + postSpaces[ i ] );
+
+                       try {
+                               targetText = targetLines.splice( 0, 1 )[ 0 ];
+                               rangeMappings = this.getSequenceMappings(
+                                       targetLang,
+                                       subSequences,
+                                       targetText,
+                                       targetLines
+                               );
+                       } catch ( ex ) {
+                               // If annotation mapping fails for any reason, 
return translated text
+                               // without annotations.
+                               this.log( 'debug/mt', 'Error while mapping 
annotations ' + ex.stack );
+                               rangeMappings = {};
+                       }
+                       return {
+                               text: targetText,
+                               rangeMappings: rangeMappings
+                       };
                } );
-               try {
-                       targetText = targetLines.splice( 0, 1 )[ 0 ];
-                       rangeMappings = self.getSequenceMappings(
-                               targetLang,
-                               subSequences,
-                               targetText,
-                               targetLines
-                       );
-               } catch ( ex ) {
-                       // If annotation mapping fails for any reason, return 
translated text
-                       // without annotations.
-                       self.log( 'debug/mt', 'Error while mapping annotations 
' + ex.stack );
-                       rangeMappings = {};
-               }
-               return {
-                       text: targetText,
-                       rangeMappings: rangeMappings
-               };
-       } );
 
-};
-
-/**
- * Translate multiple lines of plaintext
- *
- * The output may need normalizing for leading/trailing whitespace etc.
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string[]} sourceLines Source plaintext lines
- * @return {Promise} Translated plaintext lines
- */
-MTClient.prototype.translateLines = function ( sourceLang, targetLang, 
sourceLines ) {
-       var sourceLinesText;
-
-       // Join lines into single string. Separator must break sentences and 
pass through unchanged
-       // Using Devangari separator Double Danda twice.
-       sourceLinesText = sourceLines.join( '.॥॥.' );
-
-       return this.translateText(
-               sourceLang,
-               targetLang,
-               sourceLinesText
-       ).then( function ( targetLinesText ) {
-               var targetText = targetLinesText.split( /\.॥॥\./g );
-               return targetText;
-       } );
-};
-
-/**
- * Create variants of the text, with a different annotation uppercased in each.
- *
- * @param {string} lang Language code
- * @param {string} sourceText Text
- * @param {Object[]} annotationOffsets start and length of each annotation
- * @return {Object[]}
- * @return {number} Object.start Start offset of uppercasing
- * @return {number} Object.length Length of uppercasing
- * @return {string} Object.text Text variant with uppercasing
- */
-MTClient.prototype.getSubSequences = function ( lang, sourceText, 
annotationOffsets ) {
-       var i, len, offset, subSequences = [];
-
-       for ( i = 0, len = annotationOffsets.length; i < len; i++ ) {
-               offset = annotationOffsets[ i ];
-               subSequences.push( {
-                       start: offset.start,
-                       length: offset.length,
-                       text: sourceText.slice( offset.start, offset.start + 
offset.length )
-               } );
-       }
-       return subSequences;
-};
-
-/**
- * Check if a range already exist in the array of ranges already located.
- * A range is start position and length indicating position of certain text
- * in a bigger text.
- * This is not just a membership check. If the range we are checking
- * falls under the start and end position of an already existing range, then 
also
- * we consider it as an overlapping range.
- * For example [start:5, length:4] and [start:6, length:3] overlaps.
- *
- * @param {Object} range
- * @param {Object[]} rangeArray
- * @return {boolean} Whether the range overlap or exist in any range in the 
given
- *   range array
- */
-function isOverlappingRange( range, rangeArray ) {
-       var i, rangeStart, rangeEnd, start, end;
-
-       rangeStart = range.start;
-       rangeEnd = range.start + range.length;
-       for ( i = 0; i < rangeArray.length; i++ ) {
-               start = rangeArray[ i ].start;
-               end = start + rangeArray[ i ].length;
-               if ( start >= rangeStart && end <= rangeEnd ||
-                       start <= rangeStart && end >= rangeEnd ) {
-                       return true;
-               }
        }
 
-       return false;
-}
+       /**
+        * Translate multiple lines of plaintext
+        *
+        * The output may need normalizing for leading/trailing whitespace etc.
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string[]} sourceLines Source plaintext lines
+        * @return {Promise} Translated plaintext lines
+        */
+       translateLines( sourceLang, targetLang, sourceLines ) {
+               var sourceLinesText;
 
-/**
- * Calculate range mappings based on the target text variants.
- *
- * @param {string} targetLang The target language.
- * @param {Object[]} subSequences The start and length of each subsequence.
- * @param {string} targetText The translated text.
- * @param {Object} targetLines Translation of each subsequences.
- * @return {Object[]} The location of source and translation sequences in the 
text.
- * @return {number} Object.source.start {number} Start position of source 
subSequence in the text.
- * @return {number} Object.source.length {number} Length of source subSequence 
in the text.
- * @return {number} Object.target.start {number} Start position of sequence in 
the text.
- * @return {number} Object.target.length {number} Length of matched sequence 
in the text.
- */
-MTClient.prototype.getSequenceMappings = function ( targetLang, subSequences, 
targetText, targetLines ) {
-       var i, iLen, targetRange, sourceRange, subSequence,
-               rangeMappings = [],
-               targetRanges = [],
-               occurrences = {};
+               // Join lines into single string. Separator must break 
sentences and pass through unchanged
+               // Using Devangari separator Double Danda twice.
+               sourceLinesText = sourceLines.join( '.॥॥.' );
 
-       if ( subSequences.length !== targetLines.length ) {
-               // We must have translation for all subSequences.
-               throw new Error( 'Translation variants length mismatch' );
+               return this.translateText(
+                       sourceLang,
+                       targetLang,
+                       sourceLinesText
+               ).then( ( targetLinesText ) => targetLinesText.split( /\.॥॥\./g 
) );
        }
 
-       for ( i = 0, iLen = subSequences.length; i < iLen; i++ ) {
-               subSequence = subSequences[ i ];
-               sourceRange = {
-                       start: subSequence.start,
-                       length: subSequence.length
-               };
-               // Keep track of repeated occurrences of a subsequence in the 
text. A word can repeat
-               // in a translation block.
-               occurrences[ subSequence.text ] =
-                       occurrences[ subSequence.text ] === undefined ? 0 : 
occurrences[ subSequence.text ] + 1;
-               // Find the position of the translated subsequence in 
translated text.
-               // This involves a non-trivial fuzzy matching algorithm
-               targetRange = this.findSubSequence(
-                       targetText, targetLines[ i ], targetLang, occurrences[ 
subSequence.text ]
-               );
+       /**
+        * Create variants of the text, with a different annotation uppercased 
in each.
+        *
+        * @param {string} lang Language code
+        * @param {string} sourceText Text
+        * @param {Object[]} annotationOffsets start and length of each 
annotation
+        * @return {Object[]}
+        * @return {number} Object.start Start offset of uppercasing
+        * @return {number} Object.length Length of uppercasing
+        * @return {string} Object.text Text variant with uppercasing
+        */
+       getSubSequences( lang, sourceText, annotationOffsets ) {
+               var offset, subSequences = [];
 
-               if ( targetRange && !isOverlappingRange( targetRange, 
targetRanges ) ) {
-                       // targetRanges keep track of all ranges we located. 
Used for overlap
-                       // detection.
-                       targetRanges.push( targetRange );
-                       rangeMappings.push( {
-                               source: sourceRange,
-                               target: targetRange
+               for ( let i = 0, len = annotationOffsets.length; i < len; i++ ) 
{
+                       offset = annotationOffsets[ i ];
+                       subSequences.push( {
+                               start: offset.start,
+                               length: offset.length,
+                               text: sourceText.slice( offset.start, 
offset.start + offset.length )
                        } );
                }
-       }
-       return rangeMappings;
-};
-
-/**
- * Locate the given sequence in the translated text.
- * Example:
- *   Searching  'tropical' in 'They are subtropical and tropical flowers.', 
'tropical',
- *   returns { start: 12, length: 8 }
- *
- * @param {string} text The translated text.
- * @param {string} sequence The search string.
- * @param {string} language Language of the text. Used for language specific 
matching.
- * @param {number} occurrence Pass 1 for first occurrence, 2 for second 
occurrence, so on.
- * @return {null|Object} The location of the sequence in the text.
- * @return {null|number} Object.start {number} Start position of sequence in 
the text.
- * @return {null|number} Object.lengthLength of matched sequence in the text.
- */
-MTClient.prototype.findSubSequence = function ( text, sequence, language, 
occurrence ) {
-       var indices, matcher;
-
-       matcher = new SubSequenceMatcher( language );
-       indices = matcher.findFuzzyMatch( text, sequence );
-       // Find the nth occurrence position
-
-       if ( !indices || indices.length < occurrence ) {
-               return null;
-       }
-       if ( occurrence === 0 ) {
-               return matcher.bestMatch( indices );
-       }
-       return indices[ occurrence ];
-};
-
-/**
- * Build the LinearDoc for the given source html
- *
- * @param {string} sourceHtml The html content
- */
-MTClient.prototype.buildSourceDoc = function ( sourceHtml ) {
-       var parser;
-
-       if ( this.sourceDoc ) {
-               return;
+               return subSequences;
        }
 
-       if ( !sourceHtml ) {
-               throw new Error( 'Invalid sourceHtml' );
+       /**
+        * Check if a range already exist in the array of ranges already 
located.
+        * A range is start position and length indicating position of certain 
text
+        * in a bigger text.
+        * This is not just a membership check. If the range we are checking
+        * falls under the start and end position of an already existing range, 
then also
+        * we consider it as an overlapping range.
+        * For example [start:5, length:4] and [start:6, length:3] overlaps.
+        *
+        * @param {Object} range
+        * @param {Object[]} rangeArray
+        * @return {boolean} Whether the range overlap or exist in any range in 
the given
+        *   range array
+        */
+       isOverlappingRange( range, rangeArray ) {
+               var rangeStart, rangeEnd, start, end;
+
+               rangeStart = range.start;
+               rangeEnd = range.start + range.length;
+               for ( let i = 0; i < rangeArray.length; i++ ) {
+                       start = rangeArray[ i ].start;
+                       end = start + rangeArray[ i ].length;
+                       if ( start >= rangeStart && end <= rangeEnd ||
+                       start <= rangeStart && end >= rangeEnd ) {
+                               return true;
+                       }
+               }
+
+               return false;
        }
 
-       parser = new LinearDoc.Parser( {
-               // For the proper annotation mapping between source and 
translated content,
-               // we need to treat each sentence as isolated.
-               // In other words, trying to find mappings in a sentence 
context has better results
-               // compared to the mapping done in a whole paragraph content.
-               isolateSegments: true
-       } );
-       parser.init();
-       parser.write( sourceHtml );
-       this.sourceHTML = sourceHtml;
-       this.sourceDoc = parser.builder.doc;
-};
+       /**
+        * Calculate range mappings based on the target text variants.
+        *
+        * @param {string} targetLang The target language.
+        * @param {Object[]} subSequences The start and length of each 
subsequence.
+        * @param {string} targetText The translated text.
+        * @param {Object} targetLines Translation of each subsequences.
+        * @return {Object[]} The location of source and translation sequences 
in the text.
+        * @return {number} Object.source.start {number} Start position of 
source subSequence in the text.
+        * @return {number} Object.source.length {number} Length of source 
subSequence in the text.
+        * @return {number} Object.target.start {number} Start position of 
sequence in the text.
+        * @return {number} Object.target.length {number} Length of matched 
sequence in the text.
+        */
+       getSequenceMappings( targetLang, subSequences, targetText, targetLines 
) {
+               var targetRange, sourceRange, subSequence,
+                       rangeMappings = [],
+                       targetRanges = [],
+                       occurrences = {};
 
-/**
- * Whether this engine needs authentication with JWT
- *
- * @return {boolean}
- */
-MTClient.prototype.requiresAuthorization = function () {
-       return false;
-};
+               if ( subSequences.length !== targetLines.length ) {
+               // We must have translation for all subSequences.
+                       throw new Error( 'Translation variants length mismatch' 
);
+               }
+
+               for ( let i = 0, iLen = subSequences.length; i < iLen; i++ ) {
+                       subSequence = subSequences[ i ];
+                       sourceRange = {
+                               start: subSequence.start,
+                               length: subSequence.length
+                       };
+                       // Keep track of repeated occurrences of a subsequence 
in the text. A word can repeat
+                       // in a translation block.
+                       occurrences[ subSequence.text ] =
+                       occurrences[ subSequence.text ] === undefined ? 0 : 
occurrences[ subSequence.text ] + 1;
+                       // Find the position of the translated subsequence in 
translated text.
+                       // This involves a non-trivial fuzzy matching algorithm
+                       targetRange = this.findSubSequence(
+                               targetText, targetLines[ i ], targetLang, 
occurrences[ subSequence.text ]
+                       );
+
+                       if ( targetRange && !this.isOverlappingRange( 
targetRange, targetRanges ) ) {
+                       // targetRanges keep track of all ranges we located. 
Used for overlap
+                       // detection.
+                               targetRanges.push( targetRange );
+                               rangeMappings.push( {
+                                       source: sourceRange,
+                                       target: targetRange
+                               } );
+                       }
+               }
+               return rangeMappings;
+       }
+
+       /**
+        * Locate the given sequence in the translated text.
+        * Example:
+        *   Searching  'tropical' in 'They are subtropical and tropical 
flowers.', 'tropical',
+        *   returns { start: 12, length: 8 }
+        *
+        * @param {string} text The translated text.
+        * @param {string} sequence The search string.
+        * @param {string} language Language of the text. Used for language 
specific matching.
+        * @param {number} occurrence Pass 1 for first occurrence, 2 for second 
occurrence, so on.
+        * @return {null|Object} The location of the sequence in the text.
+        * @return {null|number} Object.start {number} Start position of 
sequence in the text.
+        * @return {null|number} Object.lengthLength of matched sequence in the 
text.
+        */
+       findSubSequence( text, sequence, language, occurrence ) {
+               var indices, matcher;
+
+               matcher = new SubSequenceMatcher( language );
+               indices = matcher.findFuzzyMatch( text, sequence );
+               // Find the nth occurrence position
+               if ( !indices || indices.length < occurrence ) {
+                       return null;
+               }
+               if ( occurrence === 0 ) {
+                       return matcher.bestMatch( indices );
+               }
+               return indices[ occurrence ];
+       }
+
+       /**
+        * Build the LinearDoc for the given source html
+        *
+        * @param {string} sourceHtml The html content
+        */
+       buildSourceDoc( sourceHtml ) {
+               var parser;
+
+               if ( this.sourceDoc ) {
+                       return;
+               }
+
+               if ( !sourceHtml ) {
+                       throw new Error( 'Invalid sourceHtml' );
+               }
+
+               parser = new LinearDoc.Parser( {
+                       // For the proper annotation mapping between source and 
translated content,
+                       // we need to treat each sentence as isolated.
+                       // In other words, trying to find mappings in a 
sentence context has better results
+                       // compared to the mapping done in a whole paragraph 
content.
+                       isolateSegments: true
+               } );
+               parser.init();
+               parser.write( sourceHtml );
+               this.sourceHTML = sourceHtml;
+               this.sourceDoc = parser.builder.doc;
+       }
+
+       /**
+        * Whether this engine needs authentication with JWT
+        *
+        * @return {boolean}
+        */
+       requiresAuthorization() {
+               return false;
+       }
+}
 
 module.exports = MTClient;
diff --git a/lib/mt/Yandex.js b/lib/mt/Yandex.js
index 00beeb4..53305dc 100644
--- a/lib/mt/Yandex.js
+++ b/lib/mt/Yandex.js
@@ -1,100 +1,89 @@
 'use strict';
 
-var
-       util = require( 'util' ),
-       preq = require( 'preq' ),
-       BBPromise = require( 'bluebird' ),
+const preq = require( 'preq' ),
        MTClient = require( './MTClient.js' ),
-       yandexLanguageNameMap;
+       yandexLanguageNameMap = {
+               'be-tarask': 'be', // T122033
+               nb: 'no' // T132217
+       };
 
-// Yandex language codes can differ from the language codes that
-// we use.
-yandexLanguageNameMap = {
-       'be-tarask': 'be', // T122033
-       nb: 'no' // T132217
-};
+class Yandex extends MTClient {
 
-function Yandex( options ) {
-       this.logger = options.logger;
-       this.conf = options.conf;
-}
+       /**
+        * Translate html or plain text content with Yandex.
+        * Yandex is capable of translating plain text and html with
+        * annotations mapping (keeps markup retained in translated content).
+        * Hence overriding translate method of MTClient.
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string} sourceText Source language text
+        * @return {Q.Promise} Target language text
+        */
+       translate( sourceLang, targetLang, sourceText ) {
+               var key, postData;
 
-util.inherits( Yandex, MTClient );
-
-/**
- * Translate html or plain text content with Yandex.
- * Yandex is capable of translating plain text and html with
- * annotations mapping (keeps markup retained in translated content).
- * Hence overriding translate method of MTClient.
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string} sourceText Source language text
- * @return {Q.Promise} Target language text
- */
-Yandex.prototype.translate = function ( sourceLang, targetLang, sourceText ) {
-       var key, postData, self = this;
-
-       key = this.conf.mt.yandex.key;
-       if ( key === null ) {
-               return BBPromise.reject( new Error( 'Yandex service is 
misconfigured' ) );
-       }
-
-       if ( sourceText.length > 10000 ) {
-               return BBPromise.reject( new Error( 'Source text too long: ' +
-                       sourceLang + '-' + targetLang ) );
-       }
-
-       sourceLang = yandexLanguageNameMap[ sourceLang ] || sourceLang;
-       targetLang = yandexLanguageNameMap[ targetLang ] || targetLang;
-
-       postData = {
-               uri: this.conf.mt.yandex.api + '/api/v1.5/tr.json/translate',
-               proxy: this.conf.proxy,
-               body: {
-                       key: key,
-                       lang: sourceLang + '-' + targetLang,
-                       format: 'html',
-                       text: sourceText
+               key = this.conf.mt.yandex.key;
+               if ( key === null ) {
+                       return Promise.reject( new Error( 'Yandex service is 
misconfigured' ) );
                }
-       };
 
-       return preq.post( postData ).then( function ( response ) {
-               return response.body.text[ 0 ];
-       } ).catch( function ( response ) {
-               throw new Error( 'Translation with Yandex failed. Error: ' +
-                       self.getErrorName( response.body.code ) + ' ' + 
sourceLang + '-' + targetLang );
-       } );
-};
+               if ( sourceText.length > 10000 ) {
+                       return Promise.reject( new Error( 'Source text too 
long: ' +
+                               sourceLang + '-' + targetLang ) );
+               }
 
-/**
- * Returns error name from error code.
- *
- * @param {number} code Error code
- * @return {string}
- */
-Yandex.prototype.getErrorName = function ( code ) {
-       // http://api.yandex.com/translate/doc/dg/reference/translate.xml
-       var errormap = {
-               200: 'ERR_OK',
-               401: 'ERR_KEY_INVALID',
-               402: 'ERR_KEY_BLOCKED',
-               403: 'ERR_DAILY_REQ_LIMIT_EXCEEDED',
-               404: 'ERR_DAILY_CHAR_LIMIT_EXCEEDED',
-               413: 'ERR_TEXT_TOO_LONG',
-               422: 'ERR_UNPROCESSABLE_TEXT',
-               501: 'ERR_LANG_NOT_SUPPORTED'
-       };
+               sourceLang = yandexLanguageNameMap[ sourceLang ] || sourceLang;
+               targetLang = yandexLanguageNameMap[ targetLang ] || targetLang;
 
-       if ( code in errormap ) {
-               return errormap[ code ];
+               postData = {
+                       uri: this.conf.mt.yandex.api + 
'/api/v1.5/tr.json/translate',
+                       proxy: this.conf.proxy,
+                       body: {
+                               key,
+                               lang: sourceLang + '-' + targetLang,
+                               format: 'html',
+                               text: sourceText
+                       }
+               };
+
+               return preq.post( postData )
+                       .then( ( response ) => response.body.text[ 0 ] )
+                       .catch( ( response ) => {
+                               throw new Error( 'Translation with Yandex 
failed. Error: ' +
+                                       this.getErrorName( response.body.code ) 
+ ' ' + sourceLang + '-' + targetLang );
+                       } );
        }
 
-       return 'Unknown error';
-};
+       /**
+        * Returns error name from error code.
+        *
+        * @param {number} code Error code
+        * @return {string}
+        */
+       getErrorName( code ) {
+               // 
http://api.yandex.com/translate/doc/dg/reference/translate.xml
+               var errormap = {
+                       200: 'ERR_OK',
+                       401: 'ERR_KEY_INVALID',
+                       402: 'ERR_KEY_BLOCKED',
+                       403: 'ERR_DAILY_REQ_LIMIT_EXCEEDED',
+                       404: 'ERR_DAILY_CHAR_LIMIT_EXCEEDED',
+                       413: 'ERR_TEXT_TOO_LONG',
+                       422: 'ERR_UNPROCESSABLE_TEXT',
+                       501: 'ERR_LANG_NOT_SUPPORTED'
+               };
 
-Yandex.prototype.requiresAuthorization = function () {
-       return true;
-};
+               if ( code in errormap ) {
+                       return errormap[ code ];
+               }
+
+               return 'Unknown error';
+       }
+
+       requiresAuthorization() {
+               return true;
+       }
+}
 
 module.exports = Yandex;
diff --git a/lib/mt/Youdao.js b/lib/mt/Youdao.js
index 94cb4da..14ecc40 100644
--- a/lib/mt/Youdao.js
+++ b/lib/mt/Youdao.js
@@ -1,160 +1,147 @@
 'use strict';
 
-var
-       util = require( 'util' ),
-       preq = require( 'preq' ),
+const preq = require( 'preq' ),
        LinearDoc = require( './../lineardoc' ),
-       BBPromise = require( 'bluebird' ),
        MTClient = require( './MTClient.js' ),
-       youdaoLanguageNameMap;
+       youdaoLanguageNameMap = {
+               'en>zh': 'EN2ZH_CN', // English to Chinese Simplified
+               'simple>zh': 'EN2ZH_CN', // English to Chinese Simplified
+               'en>zh-cn': 'EN2ZH_CN', // English to Chinese Simplified
+               'simple>zh-cn': 'EN2ZH_CN', // English to Chinese Simplified
+               'ja>zh-cn': 'JA2ZH_CN', // Japanese to Chinese Simplified,
+               'ja>zh': 'JA2ZH_CN', // Japanese to Chinese Simplified,
+               'ko>zh-cn': 'KR2ZH_CN', // Korean to Chinese Simplified
+               'fr>zh-cn': 'FR2ZH_CN', // Korean to Chinese Simplified
+               'ru>zh-cn': 'RU2ZH_CN', // Russian to Chinese Simplified
+               'es>zh-cn': 'SP2ZH_CN', // Spanish to Chinese Simplified
+               'zh>en': 'ZH_CN2EN', // Chinese Simplified/Traditional to 
English
+               'zh>simple': 'ZH_CN2EN', // Chinese Simplified/Traditional to 
Simple English
+               'zh>ja': 'ZH_CN2JA', // Chinese Simplified/Traditional to 
Japanese
+               'zh>ko': 'ZH_CN2KR', // Chinese Simplified/Traditional to Korean
+               'zh>fr': 'ZH_CN2FR', // Chinese Simplified/Traditional to French
+               'zh>ru': 'ZH_CN2RU', // Chinese Simplified/Traditional to 
Russian
+               'zh>es': 'ZH_CN2SP' // Chinese Simplified/Traditional to Spanish
+       };
 
-// Youdao language codes differ from the language codes that we use.
-youdaoLanguageNameMap = {
-       'en>zh': 'EN2ZH_CN', // English to Chinese Simplified
-       'simple>zh': 'EN2ZH_CN', // English to Chinese Simplified
-       'en>zh-cn': 'EN2ZH_CN', // English to Chinese Simplified
-       'simple>zh-cn': 'EN2ZH_CN', // English to Chinese Simplified
-       'ja>zh-cn': 'JA2ZH_CN', // Japanese to Chinese Simplified,
-       'ja>zh': 'JA2ZH_CN', // Japanese to Chinese Simplified,
-       'ko>zh-cn': 'KR2ZH_CN', // Korean to Chinese Simplified
-       'fr>zh-cn': 'FR2ZH_CN', // Korean to Chinese Simplified
-       'ru>zh-cn': 'RU2ZH_CN', // Russian to Chinese Simplified
-       'es>zh-cn': 'SP2ZH_CN', // Spanish to Chinese Simplified
-       'zh>en': 'ZH_CN2EN', // Chinese Simplified/Traditional to English
-       'zh>simple': 'ZH_CN2EN', // Chinese Simplified/Traditional to Simple 
English
-       'zh>ja': 'ZH_CN2JA', // Chinese Simplified/Traditional to Japanese
-       'zh>ko': 'ZH_CN2KR', // Chinese Simplified/Traditional to Korean
-       'zh>fr': 'ZH_CN2FR', // Chinese Simplified/Traditional to French
-       'zh>ru': 'ZH_CN2RU', // Chinese Simplified/Traditional to Russian
-       'zh>es': 'ZH_CN2SP' // Chinese Simplified/Traditional to Spanish
-};
+class Youdao extends MTClient {
+       /**
+        * Translate marked-up text
+        * Youdao does not support HTML translation. So we need to pass the 
plain text
+        * version. We are not piping this to translateText because we want to 
preseve
+        * the textblocks. But we cannot do annotation mapping because of 
complexity of
+        * segmentation for CJK languages.
+        *
+        * @param {string} sourceLang Source language code
+        * @param {string} targetLang Target language code
+        * @param {string} sourceHtml Source html
+        * @return {Promise} promise: Translated html
+        */
+       translateHtml( sourceLang, targetLang, sourceHtml ) {
+               var i, len, targetDoc, chain = [],
+                       self = this;
 
-function Youdao( options ) {
-       this.logger = options.logger;
-       this.conf = options.conf;
-}
+               this.buildSourceDoc( sourceHtml );
+               // Clone and adapt sourceDoc
+               targetDoc = new LinearDoc.Doc( this.sourceDoc.wrapperTag );
 
-util.inherits( Youdao, MTClient );
+               function translateItemDeferred( item ) {
+                       if ( item.type !== 'textblock' ) {
+                               return Promise.resolve( item );
+                       }
 
-/**
- * Translate marked-up text
- * Youdao does not support HTML translation. So we need to pass the plain text
- * version. We are not piping this to translateText because we want to preseve
- * the textblocks. But we cannot do annotation mapping because of complexity of
- * segmentation for CJK languages.
- *
- * @param {string} sourceLang Source language code
- * @param {string} targetLang Target language code
- * @param {string} sourceHtml Source html
- * @return {Promise} promise: Translated html
- */
-Youdao.prototype.translateHtml = function ( sourceLang, targetLang, sourceHtml 
) {
-       var i, len, targetDoc, chain = [],
-               self = this;
+                       return self.translateText(
+                               sourceLang,
+                               targetLang,
+                               item.item.getPlainText()
+                       ).then( function ( translated ) {
+                               var newTextBlock;
 
-       this.buildSourceDoc( sourceHtml );
-       // Clone and adapt sourceDoc
-       targetDoc = new LinearDoc.Doc( this.sourceDoc.wrapperTag );
+                               newTextBlock = item.item.translateTags(
+                                       translated, {} // Range mapping is 
empty. We dont do annotation mapping.
+                               );
 
-       function translateItemDeferred( item ) {
-               if ( item.type !== 'textblock' ) {
-                       return BBPromise.resolve( item );
+                               return {
+                                       type: 'textblock',
+                                       item: newTextBlock
+                               };
+                       } );
                }
 
-               return self.translateText(
-                       sourceLang,
-                       targetLang,
-                       item.item.getPlainText()
-               ).then( function ( translated ) {
-                       var newTextBlock;
+               for ( i = 0, len = this.sourceDoc.items.length; i < len; i++ ) {
+                       chain.push( translateItemDeferred( 
this.sourceDoc.items[ i ] ) );
+               }
 
-                       newTextBlock = item.item.translateTags(
-                               translated, {} // Range mapping is empty. We 
dont do annotation mapping.
-                       );
-
-                       return {
-                               type: 'textblock',
-                               item: newTextBlock
-                       };
+               return Promise.all( chain ).then( ( results ) => {
+                       targetDoc.items = results;
+                       return targetDoc.getHtml();
                } );
        }
 
-       for ( i = 0, len = this.sourceDoc.items.length; i < len; i++ ) {
-               chain.push( translateItemDeferred( this.sourceDoc.items[ i ] ) 
);
-       }
+       translateText( sourceLang, targetLang, sourceText ) {
+               var key, postData;
 
-       return BBPromise.all( chain ).then( function ( results ) {
-               targetDoc.items = results;
-               return targetDoc.getHtml();
-       } );
-};
+               key = this.conf.mt.youdao.key;
+               if ( key === null ) {
+                       return Promise.reject( new Error( 'Youdao service is 
misconfigured' ) );
+               }
 
-Youdao.prototype.translateText = function ( sourceLang, targetLang, sourceText 
) {
-       var self = this,
-               key, postData;
-
-       key = this.conf.mt.youdao.key;
-       if ( key === null ) {
-               return BBPromise.reject( new Error( 'Youdao service is 
misconfigured' ) );
-       }
-
-       if ( sourceText.length > 10000 ) {
-               return BBPromise.reject( new Error( 'Source text too long: ' +
+               if ( sourceText.length > 10000 ) {
+                       return Promise.reject( new Error( 'Source text too 
long: ' +
                        sourceLang + '-' + targetLang ) );
+               }
+
+               postData = {
+                       uri: this.conf.mt.youdao.api,
+                       proxy: this.conf.proxy,
+                       body: {
+                               key: key,
+                               type: 'data',
+                               doctype: 'json',
+                               q: sourceText,
+                               l: youdaoLanguageNameMap[ sourceLang + '>' + 
targetLang ],
+                               transtype: 'translate'
+                       }
+               };
+
+               return preq.post( postData ).then( ( response ) => {
+                       if ( response.body.errorCode === 0 ) {
+                               return response.body.translation[ 0 ];
+                       } else {
+                               throw new Error( 'Translation with Youdao 
failed. Error: ' +
+                                       this.getErrorName( 
response.body.errorCode ) +
+                                               ' ' + sourceLang + '>' + 
targetLang );
+                       }
+               } );
        }
 
-       postData = {
-               uri: this.conf.mt.youdao.api,
-               proxy: this.conf.proxy,
-               body: {
-                       key: key,
-                       type: 'data',
-                       doctype: 'json',
-                       q: sourceText,
-                       l: youdaoLanguageNameMap[ sourceLang + '>' + targetLang 
],
-                       transtype: 'translate'
+       /**
+        * Returns error name from error code.
+        *
+        * @param {number} code Error code
+        * @return {string}
+        */
+       getErrorName( code ) {
+               var errormap = {
+                       10: 'Some sentence in source text is too long',
+                       11: 'No dictionay result',
+                       20: 'Source text too long',
+                       30: 'Server down',
+                       40: 'Unsupported language code',
+                       50: 'Invalid key',
+                       52: 'IP of the request is invalid',
+                       60: 'Reaching the spending limit for today',
+                       70: 'Insufficinent balance'
+               };
+
+               if ( code in errormap ) {
+                       return errormap[ code ];
                }
-       };
 
-       return preq.post( postData ).then( function ( response ) {
-               if ( response.body.errorCode === 0 ) {
-                       return response.body.translation[ 0 ];
-               } else {
-                       throw new Error( 'Translation with Youdao failed. 
Error: ' +
-                               self.getErrorName( response.body.errorCode ) +
-                                       ' ' + sourceLang + '>' + targetLang );
-               }
-       } );
-};
-
-/**
- * Returns error name from error code.
- *
- * @param {number} code Error code
- * @return {string}
- */
-Youdao.prototype.getErrorName = function ( code ) {
-       var errormap = {
-               10: 'Some sentence in source text is too long',
-               11: 'No dictionay result',
-               20: 'Source text too long',
-               30: 'Server down',
-               40: 'Unsupported language code',
-               50: 'Invalid key',
-               52: 'IP of the request is invalid',
-               60: 'Reaching the spending limit for today',
-               70: 'Insufficinent balance'
-       };
-
-       if ( code in errormap ) {
-               return errormap[ code ];
+               return 'Unknown error';
        }
 
-       return 'Unknown error';
-};
-
-Youdao.prototype.requiresAuthorization = function () {
-       return true;
-};
-
+       requiresAuthorization() {
+               return true;
+       }
+}
 module.exports = Youdao;

-- 
To view, visit https://gerrit.wikimedia.org/r/372503
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0a9aaf9b71ee53d093956995cbe3073001f62081
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to