jenkins-bot has submitted this change and it was merged.

Change subject: Apertium: Use the APY API running on apertium.wmflabs.org
......................................................................


Apertium: Use the APY API running on apertium.wmflabs.org

Change-Id: I6518a891034122646e4f0e19110eae74ba7182a8
---
M ContentTranslationService.js
M config.example.js
M mt/Apertium.js
M tests/apertium/Apertium.test.js
4 files changed, 50 insertions(+), 32 deletions(-)

Approvals:
  Divec: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/ContentTranslationService.js b/ContentTranslationService.js
index 6e7a5eb..a7e97bb 100644
--- a/ContentTranslationService.js
+++ b/ContentTranslationService.js
@@ -119,7 +119,7 @@
        req.on( 'end', function () {
                sourceHtmlChunks.push( '</div>' );
                sourceHtml = sourceHtmlChunks.join( '' );
-               mtClient.translateHtmlWithNativeMarkup( sourceLang, targetLang, 
sourceHtml ).then(
+               mtClient.translate( sourceLang, targetLang, sourceHtml ).then(
                        function ( data ) {
                                res.send( data );
                        },
diff --git a/config.example.js b/config.example.js
index bfb6268..4c25ec2 100644
--- a/config.example.js
+++ b/config.example.js
@@ -5,5 +5,6 @@
        logDir: 'log',
        parsoid: {
                api: 'http://parsoid-lb.eqiad.wikimedia.org'
-       }
+       },
+       'mt.apertium.api': 'http://apertium.wmflabs.org'
 };
diff --git a/mt/Apertium.js b/mt/Apertium.js
index 602e11a..b2479e1 100644
--- a/mt/Apertium.js
+++ b/mt/Apertium.js
@@ -1,4 +1,7 @@
-var Q = require( 'q' ),
+var config,
+       Q = require( 'q' ),
+       apertiumLangMapping,
+       request = require( 'request' ),
        LinearDoc = require( '../lineardoc/LinearDoc' ),
        Entities = require( 'html-entities' ).AllHtmlEntities,
        logger = require( '../utils/Logger.js' ),
@@ -6,6 +9,23 @@
        // TODO: Tokenize properly. These work for English/Spanish/Catalan
        TOKENS = 
/[\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+(?:[·'][\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+)?|[^\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+/g,
        IS_WORD = 
/^[\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+(?:[·'][\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+)?$/;
+
+try {
+       config = require( __dirname + '/../config.js' );
+} catch ( e ) {
+       config = {
+               'mt.apertium.api': 'http://apertium.wmflabs.org'
+       };
+}
+
+apertiumLangMapping = {
+       es: 'spa',
+       en: 'eng',
+       ca: 'cat',
+       pt: 'por',
+       it: 'ita',
+       kk: 'kaz'
+};
 
 function getTokens( text ) {
        // TODO: implement for other languages than English/Spanish/Catalan
@@ -90,7 +110,7 @@
                deferred = Q.defer();
        rangedSourceText = getRangedText( sourceText );
        apertium = spawn(
-               'python', [ 'mt/apertium.py', sourceLang + '-' + targetLang, 
'-u', '-f', 'html' ], {
+               'python', [ 'mt/apertium.py', sourceLang + '-' + targetLang, 
'-u' ], {
                        stdio: 'pipe',
                        env: {
                                PATH: process.env.PATH,
@@ -124,34 +144,31 @@
  * @param {string} sourceHtml Source rich text
  * @return {Object} Deferred promise: Translated rich text
  */
-function translateHtmlWithNativeMarkup( sourceLang, targetLang, sourceHtml ) {
-       var apertium,
-               translation = '',
-               deferred = Q.defer();
-       apertium = spawn(
-               'apertium', [ sourceLang + '-' + targetLang, '-u', '-f', 'html' 
], {
-                       stdio: 'pipe',
-                       env: {
-                               PATH: process.env.PATH,
-                               LC_ALL: 'en_US.utf8'
+function translate( sourceLang, targetLang, sourceHtml ) {
+       var deferred = Q.defer(),
+               postData;
+
+       postData = {
+               url: config['mt.apertium.api'] + '/translate',
+               form: {
+                       markUnknown: 0,
+                       langpair: apertiumLangMapping[ sourceLang ] + '|' + 
apertiumLangMapping[ targetLang ],
+                       q: sourceHtml
+               }
+       };
+       request.post( postData,
+               function ( error, response, body ) {
+                       if ( error ) {
+                               deferred.reject( new Error( error ) );
+                               return;
                        }
+                       if ( response.statusCode !== 200 ) {
+                               deferred.reject( new Error( 'Error while 
translating content using apertium' ) );
+                               return;
+                       }
+                       deferred.resolve( JSON.parse( body 
).responseData.translatedText );
                }
        );
-       apertium.stderr.on( 'data', function ( data ) {
-               logger.error( data );
-       } );
-       apertium.stdout.on( 'data', function ( data ) {
-               translation += data;
-       } );
-       apertium.on( 'close', function ( code ) {
-               if ( code !== 0 ) {
-                       deferred.reject( new Error( '' + code ) );
-                       return;
-               }
-               deferred.resolve( translation );
-       } );
-       apertium.stdin.write( sourceHtml );
-       apertium.stdin.end();
        return deferred.promise;
 }
 
@@ -204,7 +221,7 @@
 }
 
 module.exports = {
-       translateHtmlWithNativeMarkup: translateHtmlWithNativeMarkup,
+       translate: translate,
        translateHtml: translateHtml,
        translateText: translateText
 };
diff --git a/tests/apertium/Apertium.test.js b/tests/apertium/Apertium.test.js
index 053b7fc..ff9ae93 100644
--- a/tests/apertium/Apertium.test.js
+++ b/tests/apertium/Apertium.test.js
@@ -1,8 +1,8 @@
-var caHtml, Apertium = require( '../mt/Apertium' );
+var caHtml, Apertium = require( '../../mt/Apertium' );
 
 caHtml = '<p>un dos <b><a href="3">tres</a> quatre <i><a href="5">cinc</a> sis 
set</i> vuit nou</b> deu</p>.';
 
-Apertium.translateHtmlWithNativeMarkup( 'ca', 'es', caHtml ).then( function ( 
esHtml ) {
+Apertium.translate( 'ca', 'es', caHtml ).then( function ( esHtml ) {
        console.log( 'caHtml:', caHtml );
        console.log( 'esHtml:', esHtml );
 } );

-- 
To view, visit https://gerrit.wikimedia.org/r/154758
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I6518a891034122646e4f0e19110eae74ba7182a8
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Divec <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to