jenkins-bot has submitted this change and it was merged.
Change subject: Apertium: Use the APY API running on apertium.wmflabs.org
......................................................................
Apertium: Use the APY API running on apertium.wmflabs.org
Change-Id: I6518a891034122646e4f0e19110eae74ba7182a8
---
M ContentTranslationService.js
M config.example.js
M mt/Apertium.js
M tests/apertium/Apertium.test.js
4 files changed, 50 insertions(+), 32 deletions(-)
Approvals:
Divec: Looks good to me, approved
jenkins-bot: Verified
diff --git a/ContentTranslationService.js b/ContentTranslationService.js
index 6e7a5eb..a7e97bb 100644
--- a/ContentTranslationService.js
+++ b/ContentTranslationService.js
@@ -119,7 +119,7 @@
req.on( 'end', function () {
sourceHtmlChunks.push( '</div>' );
sourceHtml = sourceHtmlChunks.join( '' );
- mtClient.translateHtmlWithNativeMarkup( sourceLang, targetLang,
sourceHtml ).then(
+ mtClient.translate( sourceLang, targetLang, sourceHtml ).then(
function ( data ) {
res.send( data );
},
diff --git a/config.example.js b/config.example.js
index bfb6268..4c25ec2 100644
--- a/config.example.js
+++ b/config.example.js
@@ -5,5 +5,6 @@
logDir: 'log',
parsoid: {
api: 'http://parsoid-lb.eqiad.wikimedia.org'
- }
+ },
+ 'mt.apertium.api': 'http://apertium.wmflabs.org'
};
diff --git a/mt/Apertium.js b/mt/Apertium.js
index 602e11a..b2479e1 100644
--- a/mt/Apertium.js
+++ b/mt/Apertium.js
@@ -1,4 +1,7 @@
-var Q = require( 'q' ),
+var config,
+ Q = require( 'q' ),
+ apertiumLangMapping,
+ request = require( 'request' ),
LinearDoc = require( '../lineardoc/LinearDoc' ),
Entities = require( 'html-entities' ).AllHtmlEntities,
logger = require( '../utils/Logger.js' ),
@@ -6,6 +9,23 @@
// TODO: Tokenize properly. These work for English/Spanish/Catalan
TOKENS =
/[\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+(?:[·'][\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+)?|[^\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+/g,
IS_WORD =
/^[\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+(?:[·'][\wáàçéèíïóòúüñÁÀÇÉÈÍÏÓÒÚÜÑ]+)?$/;
+
+try {
+ config = require( __dirname + '/../config.js' );
+} catch ( e ) {
+ config = {
+ 'mt.apertium.api': 'http://apertium.wmflabs.org'
+ };
+}
+
+apertiumLangMapping = {
+ es: 'spa',
+ en: 'eng',
+ ca: 'cat',
+ pt: 'por',
+ it: 'ita',
+ kk: 'kaz'
+};
function getTokens( text ) {
// TODO: implement for other languages than English/Spanish/Catalan
@@ -90,7 +110,7 @@
deferred = Q.defer();
rangedSourceText = getRangedText( sourceText );
apertium = spawn(
- 'python', [ 'mt/apertium.py', sourceLang + '-' + targetLang,
'-u', '-f', 'html' ], {
+ 'python', [ 'mt/apertium.py', sourceLang + '-' + targetLang,
'-u' ], {
stdio: 'pipe',
env: {
PATH: process.env.PATH,
@@ -124,34 +144,31 @@
* @param {string} sourceHtml Source rich text
* @return {Object} Deferred promise: Translated rich text
*/
-function translateHtmlWithNativeMarkup( sourceLang, targetLang, sourceHtml ) {
- var apertium,
- translation = '',
- deferred = Q.defer();
- apertium = spawn(
- 'apertium', [ sourceLang + '-' + targetLang, '-u', '-f', 'html'
], {
- stdio: 'pipe',
- env: {
- PATH: process.env.PATH,
- LC_ALL: 'en_US.utf8'
+function translate( sourceLang, targetLang, sourceHtml ) {
+ var deferred = Q.defer(),
+ postData;
+
+ postData = {
+ url: config['mt.apertium.api'] + '/translate',
+ form: {
+ markUnknown: 0,
+ langpair: apertiumLangMapping[ sourceLang ] + '|' +
apertiumLangMapping[ targetLang ],
+ q: sourceHtml
+ }
+ };
+ request.post( postData,
+ function ( error, response, body ) {
+ if ( error ) {
+ deferred.reject( new Error( error ) );
+ return;
}
+ if ( response.statusCode !== 200 ) {
+ deferred.reject( new Error( 'Error while
translating content using apertium' ) );
+ return;
+ }
+ deferred.resolve( JSON.parse( body
).responseData.translatedText );
}
);
- apertium.stderr.on( 'data', function ( data ) {
- logger.error( data );
- } );
- apertium.stdout.on( 'data', function ( data ) {
- translation += data;
- } );
- apertium.on( 'close', function ( code ) {
- if ( code !== 0 ) {
- deferred.reject( new Error( '' + code ) );
- return;
- }
- deferred.resolve( translation );
- } );
- apertium.stdin.write( sourceHtml );
- apertium.stdin.end();
return deferred.promise;
}
@@ -204,7 +221,7 @@
}
module.exports = {
- translateHtmlWithNativeMarkup: translateHtmlWithNativeMarkup,
+ translate: translate,
translateHtml: translateHtml,
translateText: translateText
};
diff --git a/tests/apertium/Apertium.test.js b/tests/apertium/Apertium.test.js
index 053b7fc..ff9ae93 100644
--- a/tests/apertium/Apertium.test.js
+++ b/tests/apertium/Apertium.test.js
@@ -1,8 +1,8 @@
-var caHtml, Apertium = require( '../mt/Apertium' );
+var caHtml, Apertium = require( '../../mt/Apertium' );
caHtml = '<p>un dos <b><a href="3">tres</a> quatre <i><a href="5">cinc</a> sis
set</i> vuit nou</b> deu</p>.';
-Apertium.translateHtmlWithNativeMarkup( 'ca', 'es', caHtml ).then( function (
esHtml ) {
+Apertium.translate( 'ca', 'es', caHtml ).then( function ( esHtml ) {
console.log( 'caHtml:', caHtml );
console.log( 'esHtml:', esHtml );
} );
--
To view, visit https://gerrit.wikimedia.org/r/154758
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6518a891034122646e4f0e19110eae74ba7182a8
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Divec <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits