jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/370185 )

Change subject: Gallery adaptation and tests
......................................................................


Gallery adaptation and tests

Bug: T170674
Change-Id: Ifd79a2f754af442483ef5b6c661be82d2fea648e
---
M lib/mt/MTClient.js
M lib/mw/ApiRequest.js
M lib/routes/v1.js
A lib/translationunits/MWGallery.js
M lib/translationunits/MWImage.js
M lib/translationunits/TranslationUnit.js
M lib/translationunits/index.js
M test/adaptation/AdaptationTest.js
M test/adaptation/AdaptationTests.json
9 files changed, 139 insertions(+), 9 deletions(-)

Approvals:
  jenkins-bot: Verified
  Nikerabbit: Looks good to me, approved



diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js
index 582b8e5..c1caf51 100644
--- a/lib/mt/MTClient.js
+++ b/lib/mt/MTClient.js
@@ -357,10 +357,6 @@
 MTClient.prototype.buildSourceDoc = function ( sourceHtml ) {
        var parser;
 
-       if ( this.sourceDoc ) {
-               return;
-       }
-
        if ( !sourceHtml ) {
                throw new Error( 'Invalid sourceHtml' );
        }
diff --git a/lib/mw/ApiRequest.js b/lib/mw/ApiRequest.js
index 343cc7c..7175157 100644
--- a/lib/mw/ApiRequest.js
+++ b/lib/mw/ApiRequest.js
@@ -126,6 +126,53 @@
                        .then( ( res ) => res.query );
                return MWApiRequest.siteInfoCache[ domain ];
        }
+
+       htmlToWikiText( html, language ) {
+               var restReq, path, domain = this.getDomain( language );
+
+               if ( !html || !html.trim() || !/<[a-zA-Z][\s\S]*>/i.test( html 
) ) {
+                       // Does not contain HTML elements. Save api call.
+                       return Promise.resolve( html );
+               }
+
+               restReq = {
+                       method: 'post',
+                       body: { html },
+                       headers: {
+                               // See 
https://www.mediawiki.org/wiki/Specs/HTML/1.5.0
+                               accept: 'text/html; charset=utf-8; 
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0";'
+                       }
+               };
+               path = '/transform/html/to/wikitext';
+
+               return apiUtil.restApiGet( this.context, domain, path, restReq )
+                       .then( ( response ) => response.body );
+       }
+
+       wikitextToHTML( wikitext, language ) {
+               var restReq, path, domain = this.getDomain( language );
+
+               if ( !wikitext || !wikitext.trim() ) {
+                       // Save api call.
+                       return Promise.resolve( wikitext );
+               }
+
+               restReq = {
+                       method: 'post',
+                       body: {
+                               wikitext,
+                               body_only: true, // eslint-disable-line
+                       },
+                       headers: {
+                               // See 
https://www.mediawiki.org/wiki/Specs/HTML/1.5.0
+                               accept: 'text/html; charset=utf-8; 
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0";'
+                       }
+               };
+               path = '/transform/wikitext/to/html';
+
+               return apiUtil.restApiGet( this.context, domain, path, restReq )
+                       .then( ( response ) => response.body );
+       }
 }
 
 /**
diff --git a/lib/routes/v1.js b/lib/routes/v1.js
index 7506a81..6a43dfc 100644
--- a/lib/routes/v1.js
+++ b/lib/routes/v1.js
@@ -247,6 +247,8 @@
        return machineTranslationRequest.then( ( translatedHTML ) => {
                var CXAdapter = require( __dirname + '/../Adapter' );
 
+               app.conf.mtClient = mtClient;
+
                return new CXAdapter( from, to, app )
                        .adapt( translatedHTML )
                        .then( ( adaptedDoc ) => {
diff --git a/lib/translationunits/MWGallery.js 
b/lib/translationunits/MWGallery.js
new file mode 100644
index 0000000..afb75c4
--- /dev/null
+++ b/lib/translationunits/MWGallery.js
@@ -0,0 +1,67 @@
+const cxutil = require( '../util.js' ),
+       TranslationUnit = require( './TranslationUnit.js' ),
+       MWApiRequest = require( '../mw/ApiRequest.js' ),
+       MWApiRequestManager = require( '../mw/ApiRequestManager.js' );
+
+class MWGallery extends TranslationUnit {}
+
+MWGallery.prototype.adapt = cxutil.async( function*() {
+       var imageItems, imageTitle, namespaceAlias, imageCaptionWikitext, 
requestManager,
+               imageCaptionHTML, imageCaptionHTMLTranslated;
+
+       this.galleryData = JSON.parse( this.node.attributes[ 'data-mw' ] );
+       if ( !this.galleryData ) {
+               this.log( 'error', 'Skipping invalid data-mw for gallery node: 
' + this.node.attributes.id );
+               return this.node;
+       }
+       // mw-data.body.extsrc has each image of the gallery separated by line 
breaks.
+       // The line breaks can be \r, \n or \r\n
+       imageItems = this.galleryData.body.extsrc.split( /[\r\n]+/ );
+
+       requestManager = new MWApiRequestManager( this.context );
+
+       for ( let i = 0; i < imageItems.length; i++ ) {
+               if ( !imageItems[ i ].trim() ) {
+                       continue;
+               }
+               // Each item has image title and caption separated by '|'. But 
the caption itself
+               // can have | for MW links, split by first occurrence alone.
+               [ imageTitle, imageCaptionWikitext ] = imageItems[ i 
].trim().split( /\|(.+)/ );
+               // Adapt namespace alias of title
+               namespaceAlias = yield requestManager.getNamespaceAlias( 
'File', this.targetLanguage );
+               imageTitle = imageTitle.replace( /^(\.\.?\/)*(.+)(:)/, '$1' + 
namespaceAlias + '$3' );
+
+               // Translate image caption if there is MT client for the 
adaptation context
+               if ( this.context.conf.mtClient ) {
+                       // Convert the current image caption to html
+                       imageCaptionHTML = yield new MWApiRequest( {
+                               context: this.context,
+                               sourceLanguage: this.sourceLanguage,
+                               targetLanguage: this.targetLanguage
+                       } ).wikitextToHTML( imageCaptionWikitext, 
this.sourceLanguage );
+
+                       // Machine translate the HTML caption
+                       imageCaptionHTMLTranslated = yield 
this.context.conf.mtClient.translate(
+                               this.sourceLanguage, this.targetLanguage, 
imageCaptionHTML
+                       );
+
+                       // Convert the machine translated html caption to 
wikitext.
+                       imageCaptionWikitext = yield new MWApiRequest( {
+                               context: this.context,
+                               sourceLanguage: this.sourceLanguage,
+                               targetLanguage: this.targetLanguage
+                       } ).htmlToWikiText( imageCaptionHTMLTranslated, 
this.targetLanguage );
+               }
+
+               imageItems[ i ] = [ imageTitle, imageCaptionWikitext ].join( 
'|' ).replace( '\n', '' );
+       }
+
+       this.galleryData.body.extsrc = imageItems.join( '\n' );
+       this.node.attributes[ 'data-mw' ] = JSON.stringify( this.galleryData );
+
+       return this.node;
+} );
+
+MWGallery.matchRdfaTypes = [ 'mw:Extension/gallery' ];
+
+module.exports = MWGallery;
diff --git a/lib/translationunits/MWImage.js b/lib/translationunits/MWImage.js
index f3052d4..9ae5704 100644
--- a/lib/translationunits/MWImage.js
+++ b/lib/translationunits/MWImage.js
@@ -74,7 +74,7 @@
 
        if ( this.isCommonsImage( sourceImage.attributes[ 'src' ] ) ) {
                namespaceAlias = yield new MWApiRequestManager( this.context 
).getNamespaceAlias( 'File', this.targetLanguage );
-               targetResource = this.sourceResource.replace( 
/^(\.\.?\/)*(.+)(:)/, '$1' + namespaceAlias + '$3' );
+               targetResource = this.sourceResource.replace( 
/^(\.\.?\/)*([^:]+)(:)/, '$1' + namespaceAlias + '$3' );
                sourceImage.attributes[ 'resource' ] = imageLink.attributes[ 
'href' ] = targetResource;
        } else {
                // TODO: This format is not decided yet. We do need to inform 
client about failed
diff --git a/lib/translationunits/TranslationUnit.js 
b/lib/translationunits/TranslationUnit.js
index 89f218a..58bc94a 100644
--- a/lib/translationunits/TranslationUnit.js
+++ b/lib/translationunits/TranslationUnit.js
@@ -9,6 +9,13 @@
                this.sourceLanguage = sourceLanguage;
                this.targetLanguage = targetLanguage;
                this.context = context;
+               this.logger = context.logger;
+       }
+
+       log( level, info ) {
+               if ( this.logger && this.logger.log ) {
+                       this.logger.log( level, info );
+               }
        }
 
        adapt() {
diff --git a/lib/translationunits/index.js b/lib/translationunits/index.js
index c8ddd27..050f7a1 100644
--- a/lib/translationunits/index.js
+++ b/lib/translationunits/index.js
@@ -2,5 +2,6 @@
 
 module.exports = {
        MWLink: require( './MWLink.js' ),
-       MWImage: require( './MWImage.js' )
+       MWImage: require( './MWImage.js' ),
+       MWGallery: require( './MWGallery.js' )
 };
diff --git a/test/adaptation/AdaptationTest.js 
b/test/adaptation/AdaptationTest.js
index c6c0500..1d61e8f 100644
--- a/test/adaptation/AdaptationTest.js
+++ b/test/adaptation/AdaptationTest.js
@@ -4,6 +4,7 @@
        server = require( '../utils/server.js' ),
        LinearDoc = require( '../../lib/lineardoc' ),
        async = require( 'async' ),
+       Apertium = require( '../../lib/mt' ).Apertium,
        Adapter = require( '../../lib/Adapter' ),
        tests = require( './AdaptationTests.json' );
 
@@ -16,11 +17,13 @@
 
 describe( 'Adaptation tests', function () {
        async.forEach( tests, function ( test ) {
-               var expectedResultData, adapter;
+               var expectedResultData, adapter, cxserver;
 
-               adapter = new Adapter( test.from, test.to, server.config );
+               cxserver = server.config.conf.services[ 
server.config.conf.services.length - 1 ];
+               cxserver.conf.mtClient = new Apertium( cxserver );
+               adapter = new Adapter( test.from, test.to, cxserver );
                it( 'should not have any errors when: ' + test.desc, function 
() {
-                       return adapter.adapt( test.source ).then( function( 
result ) {
+                       return adapter.adapt( test.source ).then( ( result ) => 
{
                                result = normalize( result.getHtml() );
                                expectedResultData = normalize( test.result );
                                assert.deepEqual( result, expectedResultData, 
test.source + ': ' + test.desc || '' );
diff --git a/test/adaptation/AdaptationTests.json 
b/test/adaptation/AdaptationTests.json
index a97d3d6..9fb8671 100644
--- a/test/adaptation/AdaptationTests.json
+++ b/test/adaptation/AdaptationTests.json
@@ -33,5 +33,12 @@
                "to": "ml",
                "source": "<figure class='mw-default-size mw-halign-left' 
typeof='mw:Image/Frame' id='mwUQ'><a 
href='./File:Philos_experiment_of_the_burning_candle.PNG' id='mwUg'><img 
alt='Drawing of a burning candle enclosed in a glass bulb.' 
resource='./File:Philos_experiment_of_the_burning_candle.PNG' 
src='//upload.wikimedia.org/wikipedia/mediawiki/e/e6/Philos_experiment_of_the_burning_candle.PNG'
 data-file-width='125' data-file-height='248' data-file-type='bitmap' 
height='248' width='125' id='mwUw'/></a><figcaption id='mwVA'><a 
rel='mw:WikiLink' href='./Philo_of_Byzantium' title='Philo of Byzantium' 
id='mwVQ'>Philo's</a> experiment inspired later <a rel='mw:WikiLink' 
href='./Detective' title='Detective' 
id='mwVg'>investigators</a>.</figcaption></figure>",
                "result": "<figure class='mw-default-size mw-halign-left' 
data-cx='{&#34;adapted&#34;:false,&#34;resource&#34;:&#34;./File:Philos_experiment_of_the_burning_candle.PNG&#34;}'
 id='mwUQ' typeof='mw:Image/Frame'><a 
href='./File:Philos_experiment_of_the_burning_candle.PNG' id='mwUg'><img 
alt='Drawing of a burning candle enclosed in a glass bulb.' 
data-file-height='248' data-file-type='bitmap' data-file-width='125' 
height='248' id='mwUw' 
resource='./File:Philos_experiment_of_the_burning_candle.PNG' 
src='//upload.wikimedia.org/wikipedia/mediawiki/e/e6/Philos_experiment_of_the_burning_candle.PNG'
 width='125' /></a><figcaption id='mwVA'><a 
data-cx='{&#34;adapted&#34;:false,&#34;sourceTitle&#34;:&#34;Philo of 
Byzantium&#34;}' href='./Philo_of_Byzantium' id='mwVQ' rel='mw:WikiLink' 
title='Philo of Byzantium'>Philo's</a> experiment inspired later <a 
data-cx='{&#34;adapted&#34;:false,&#34;sourceTitle&#34;:&#34;Detective&#34;}' 
href='./Detective' id='mwVg' rel='mw:WikiLink' 
title='Detective'>investigators</a>.</figcaption></figure>"
+       },
+       {
+               "desc": "Gallery adaptation",
+               "from": "en",
+               "to": "es",
+               "source": "<ul class='gallery mw-gallery-packed' 
typeof='mw:Extension/gallery' about='#mwt2' 
data-mw='{\"name\":\"gallery\",\"attrs\":{\"mode\":\"packed\"},\"body\":{\"extsrc\":\"\\r\\nFile:Nilgiri
 Tahr 1.jpg|[[Nilgiri tahr]]\\r\\nFile:Malabar Giant Squirrel77.jpg|[[Malabar 
Giant Squirrel]], [[Silent Valley National Park]]\\r\\nFile:Periyar 
Thekkady.jpg|Elephants at Thekkady\\r\\nFile:Rhacophorus 
malabaricus.jpg|[[Rhacophorus malabaricus]]\\r\\nFile:Brahminy 
kite2.jpg|[[Brahminy kite]]\\r\\nFile:Goldenbacked woodpecker.jpg|[[Dinopium 
benghalense|Lesser golden-backed woodpecker]] water\\r\\n\"}}' id='mwAQ'></ul>",
+               "result": "<ul about=\"#mwt2\" class=\"gallery 
mw-gallery-packed\" 
data-mw=\"{&#34;name&#34;:&#34;gallery&#34;,&#34;attrs&#34;:{&#34;mode&#34;:&#34;packed&#34;},&#34;body&#34;:{&#34;extsrc&#34;:&#34;\\nArchivo:Nilgiri
 Tahr 1.jpg|[[Nilgiri tahr]]\\nArchivo:Malabar Giant Squirrel77.jpg|[[Malabar 
Giant Squirrel|Malabar Ardilla giganta]], [[Silent Valley National Park|Valle 
Silencioso Parque Nacional]]\\nArchivo:Periyar Thekkady.jpg|Elefantes en 
Thekkady\\nArchivo:Rhacophorus malabaricus.jpg|[[Rhacophorus 
malabaricus]]\\nArchivo:Brahminy kite2.jpg|[[Brahminy kite|Brahminy 
Cometa]]\\nArchivo:Goldenbacked woodpecker.jpg|[[Dinopium benghalense|Menor 
dorado-reculado woodpecker]] agua\\n&#34;}}\" id=\"mwAQ\" 
typeof=\"mw:Extension/gallery\"></ul>"
        }
 ]

-- 
To view, visit https://gerrit.wikimedia.org/r/370185
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd79a2f754af442483ef5b6c661be82d2fea648e
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to