jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/370185 )
Change subject: Gallery adaptation and tests
......................................................................
Gallery adaptation and tests
Bug: T170674
Change-Id: Ifd79a2f754af442483ef5b6c661be82d2fea648e
---
M lib/mt/MTClient.js
M lib/mw/ApiRequest.js
M lib/routes/v1.js
A lib/translationunits/MWGallery.js
M lib/translationunits/MWImage.js
M lib/translationunits/TranslationUnit.js
M lib/translationunits/index.js
M test/adaptation/AdaptationTest.js
M test/adaptation/AdaptationTests.json
9 files changed, 139 insertions(+), 9 deletions(-)
Approvals:
jenkins-bot: Verified
Nikerabbit: Looks good to me, approved
diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js
index 582b8e5..c1caf51 100644
--- a/lib/mt/MTClient.js
+++ b/lib/mt/MTClient.js
@@ -357,10 +357,6 @@
MTClient.prototype.buildSourceDoc = function ( sourceHtml ) {
var parser;
- if ( this.sourceDoc ) {
- return;
- }
-
if ( !sourceHtml ) {
throw new Error( 'Invalid sourceHtml' );
}
diff --git a/lib/mw/ApiRequest.js b/lib/mw/ApiRequest.js
index 343cc7c..7175157 100644
--- a/lib/mw/ApiRequest.js
+++ b/lib/mw/ApiRequest.js
@@ -126,6 +126,53 @@
.then( ( res ) => res.query );
return MWApiRequest.siteInfoCache[ domain ];
}
+
+ htmlToWikiText( html, language ) {
+ var restReq, path, domain = this.getDomain( language );
+
+ if ( !html || !html.trim() || !/<[a-zA-Z][\s\S]*>/i.test( html
) ) {
+ // Does not contain HTML elements. Save api call.
+ return Promise.resolve( html );
+ }
+
+ restReq = {
+ method: 'post',
+ body: { html },
+ headers: {
+ // See
https://www.mediawiki.org/wiki/Specs/HTML/1.5.0
+ accept: 'text/html; charset=utf-8;
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0"'
+ }
+ };
+ path = '/transform/html/to/wikitext';
+
+ return apiUtil.restApiGet( this.context, domain, path, restReq )
+ .then( ( response ) => response.body );
+ }
+
+ wikitextToHTML( wikitext, language ) {
+ var restReq, path, domain = this.getDomain( language );
+
+ if ( !wikitext || !wikitext.trim() ) {
+ // Save api call.
+ return Promise.resolve( wikitext );
+ }
+
+ restReq = {
+ method: 'post',
+ body: {
+ wikitext,
+ body_only: true, // eslint-disable-line
+ },
+ headers: {
+ // See
https://www.mediawiki.org/wiki/Specs/HTML/1.5.0
+ accept: 'text/html; charset=utf-8;
profile="https://www.mediawiki.org/wiki/Specs/HTML/1.5.0"'
+ }
+ };
+ path = '/transform/wikitext/to/html';
+
+ return apiUtil.restApiGet( this.context, domain, path, restReq )
+ .then( ( response ) => response.body );
+ }
}
/**
diff --git a/lib/routes/v1.js b/lib/routes/v1.js
index 7506a81..6a43dfc 100644
--- a/lib/routes/v1.js
+++ b/lib/routes/v1.js
@@ -247,6 +247,8 @@
return machineTranslationRequest.then( ( translatedHTML ) => {
var CXAdapter = require( __dirname + '/../Adapter' );
+ app.conf.mtClient = mtClient;
+
return new CXAdapter( from, to, app )
.adapt( translatedHTML )
.then( ( adaptedDoc ) => {
diff --git a/lib/translationunits/MWGallery.js
b/lib/translationunits/MWGallery.js
new file mode 100644
index 0000000..afb75c4
--- /dev/null
+++ b/lib/translationunits/MWGallery.js
@@ -0,0 +1,67 @@
+const cxutil = require( '../util.js' ),
+ TranslationUnit = require( './TranslationUnit.js' ),
+ MWApiRequest = require( '../mw/ApiRequest.js' ),
+ MWApiRequestManager = require( '../mw/ApiRequestManager.js' );
+
+class MWGallery extends TranslationUnit {}
+
+MWGallery.prototype.adapt = cxutil.async( function*() {
+ var imageItems, imageTitle, namespaceAlias, imageCaptionWikitext,
requestManager,
+ imageCaptionHTML, imageCaptionHTMLTranslated;
+
+ this.galleryData = JSON.parse( this.node.attributes[ 'data-mw' ] );
+ if ( !this.galleryData ) {
+ this.log( 'error', 'Skipping invalid data-mw for gallery node:
' + this.node.attributes.id );
+ return this.node;
+ }
+ // mw-data.body.extsrc has each image of the gallery separated by line
breaks.
+ // The line breaks can be \r, \n or \r\n
+ imageItems = this.galleryData.body.extsrc.split( /[\r\n]+/ );
+
+ requestManager = new MWApiRequestManager( this.context );
+
+ for ( let i = 0; i < imageItems.length; i++ ) {
+ if ( !imageItems[ i ].trim() ) {
+ continue;
+ }
+ // Each item has image title and caption separated by '|'. But
the caption itself
+ // can have | for MW links, split by first occurrence alone.
+ [ imageTitle, imageCaptionWikitext ] = imageItems[ i
].trim().split( /\|(.+)/ );
+ // Adapt namespace alias of title
+ namespaceAlias = yield requestManager.getNamespaceAlias(
'File', this.targetLanguage );
+ imageTitle = imageTitle.replace( /^(\.\.?\/)*(.+)(:)/, '$1' +
namespaceAlias + '$3' );
+
+ // Translate image caption if there is MT client for the
adaptation context
+ if ( this.context.conf.mtClient ) {
+ // Convert the current image caption to html
+ imageCaptionHTML = yield new MWApiRequest( {
+ context: this.context,
+ sourceLanguage: this.sourceLanguage,
+ targetLanguage: this.targetLanguage
+ } ).wikitextToHTML( imageCaptionWikitext,
this.sourceLanguage );
+
+ // Machine translate the HTML caption
+ imageCaptionHTMLTranslated = yield
this.context.conf.mtClient.translate(
+ this.sourceLanguage, this.targetLanguage,
imageCaptionHTML
+ );
+
+ // Convert the machine translated html caption to
wikitext.
+ imageCaptionWikitext = yield new MWApiRequest( {
+ context: this.context,
+ sourceLanguage: this.sourceLanguage,
+ targetLanguage: this.targetLanguage
+ } ).htmlToWikiText( imageCaptionHTMLTranslated,
this.targetLanguage );
+ }
+
+ imageItems[ i ] = [ imageTitle, imageCaptionWikitext ].join(
'|' ).replace( '\n', '' );
+ }
+
+ this.galleryData.body.extsrc = imageItems.join( '\n' );
+ this.node.attributes[ 'data-mw' ] = JSON.stringify( this.galleryData );
+
+ return this.node;
+} );
+
+MWGallery.matchRdfaTypes = [ 'mw:Extension/gallery' ];
+
+module.exports = MWGallery;
diff --git a/lib/translationunits/MWImage.js b/lib/translationunits/MWImage.js
index f3052d4..9ae5704 100644
--- a/lib/translationunits/MWImage.js
+++ b/lib/translationunits/MWImage.js
@@ -74,7 +74,7 @@
if ( this.isCommonsImage( sourceImage.attributes[ 'src' ] ) ) {
namespaceAlias = yield new MWApiRequestManager( this.context
).getNamespaceAlias( 'File', this.targetLanguage );
- targetResource = this.sourceResource.replace(
/^(\.\.?\/)*(.+)(:)/, '$1' + namespaceAlias + '$3' );
+ targetResource = this.sourceResource.replace(
/^(\.\.?\/)*([^:]+)(:)/, '$1' + namespaceAlias + '$3' );
sourceImage.attributes[ 'resource' ] = imageLink.attributes[
'href' ] = targetResource;
} else {
// TODO: This format is not decided yet. We do need to inform
client about failed
diff --git a/lib/translationunits/TranslationUnit.js
b/lib/translationunits/TranslationUnit.js
index 89f218a..58bc94a 100644
--- a/lib/translationunits/TranslationUnit.js
+++ b/lib/translationunits/TranslationUnit.js
@@ -9,6 +9,13 @@
this.sourceLanguage = sourceLanguage;
this.targetLanguage = targetLanguage;
this.context = context;
+ this.logger = context.logger;
+ }
+
+ log( level, info ) {
+ if ( this.logger && this.logger.log ) {
+ this.logger.log( level, info );
+ }
}
adapt() {
diff --git a/lib/translationunits/index.js b/lib/translationunits/index.js
index c8ddd27..050f7a1 100644
--- a/lib/translationunits/index.js
+++ b/lib/translationunits/index.js
@@ -2,5 +2,6 @@
module.exports = {
MWLink: require( './MWLink.js' ),
- MWImage: require( './MWImage.js' )
+ MWImage: require( './MWImage.js' ),
+ MWGallery: require( './MWGallery.js' )
};
diff --git a/test/adaptation/AdaptationTest.js
b/test/adaptation/AdaptationTest.js
index c6c0500..1d61e8f 100644
--- a/test/adaptation/AdaptationTest.js
+++ b/test/adaptation/AdaptationTest.js
@@ -4,6 +4,7 @@
server = require( '../utils/server.js' ),
LinearDoc = require( '../../lib/lineardoc' ),
async = require( 'async' ),
+ Apertium = require( '../../lib/mt' ).Apertium,
Adapter = require( '../../lib/Adapter' ),
tests = require( './AdaptationTests.json' );
@@ -16,11 +17,13 @@
describe( 'Adaptation tests', function () {
async.forEach( tests, function ( test ) {
- var expectedResultData, adapter;
+ var expectedResultData, adapter, cxserver;
- adapter = new Adapter( test.from, test.to, server.config );
+ cxserver = server.config.conf.services[
server.config.conf.services.length - 1 ];
+ cxserver.conf.mtClient = new Apertium( cxserver );
+ adapter = new Adapter( test.from, test.to, cxserver );
it( 'should not have any errors when: ' + test.desc, function
() {
- return adapter.adapt( test.source ).then( function(
result ) {
+ return adapter.adapt( test.source ).then( ( result ) =>
{
result = normalize( result.getHtml() );
expectedResultData = normalize( test.result );
assert.deepEqual( result, expectedResultData,
test.source + ': ' + test.desc || '' );
diff --git a/test/adaptation/AdaptationTests.json
b/test/adaptation/AdaptationTests.json
index a97d3d6..9fb8671 100644
--- a/test/adaptation/AdaptationTests.json
+++ b/test/adaptation/AdaptationTests.json
@@ -33,5 +33,12 @@
"to": "ml",
"source": "<figure class='mw-default-size mw-halign-left'
typeof='mw:Image/Frame' id='mwUQ'><a
href='./File:Philos_experiment_of_the_burning_candle.PNG' id='mwUg'><img
alt='Drawing of a burning candle enclosed in a glass bulb.'
resource='./File:Philos_experiment_of_the_burning_candle.PNG'
src='//upload.wikimedia.org/wikipedia/mediawiki/e/e6/Philos_experiment_of_the_burning_candle.PNG'
data-file-width='125' data-file-height='248' data-file-type='bitmap'
height='248' width='125' id='mwUw'/></a><figcaption id='mwVA'><a
rel='mw:WikiLink' href='./Philo_of_Byzantium' title='Philo of Byzantium'
id='mwVQ'>Philo's</a> experiment inspired later <a rel='mw:WikiLink'
href='./Detective' title='Detective'
id='mwVg'>investigators</a>.</figcaption></figure>",
"result": "<figure class='mw-default-size mw-halign-left'
data-cx='{"adapted":false,"resource":"./File:Philos_experiment_of_the_burning_candle.PNG"}'
id='mwUQ' typeof='mw:Image/Frame'><a
href='./File:Philos_experiment_of_the_burning_candle.PNG' id='mwUg'><img
alt='Drawing of a burning candle enclosed in a glass bulb.'
data-file-height='248' data-file-type='bitmap' data-file-width='125'
height='248' id='mwUw'
resource='./File:Philos_experiment_of_the_burning_candle.PNG'
src='//upload.wikimedia.org/wikipedia/mediawiki/e/e6/Philos_experiment_of_the_burning_candle.PNG'
width='125' /></a><figcaption id='mwVA'><a
data-cx='{"adapted":false,"sourceTitle":"Philo of
Byzantium"}' href='./Philo_of_Byzantium' id='mwVQ' rel='mw:WikiLink'
title='Philo of Byzantium'>Philo's</a> experiment inspired later <a
data-cx='{"adapted":false,"sourceTitle":"Detective"}'
href='./Detective' id='mwVg' rel='mw:WikiLink'
title='Detective'>investigators</a>.</figcaption></figure>"
+ },
+ {
+ "desc": "Gallery adaptation",
+ "from": "en",
+ "to": "es",
+ "source": "<ul class='gallery mw-gallery-packed'
typeof='mw:Extension/gallery' about='#mwt2'
data-mw='{\"name\":\"gallery\",\"attrs\":{\"mode\":\"packed\"},\"body\":{\"extsrc\":\"\\r\\nFile:Nilgiri
Tahr 1.jpg|[[Nilgiri tahr]]\\r\\nFile:Malabar Giant Squirrel77.jpg|[[Malabar
Giant Squirrel]], [[Silent Valley National Park]]\\r\\nFile:Periyar
Thekkady.jpg|Elephants at Thekkady\\r\\nFile:Rhacophorus
malabaricus.jpg|[[Rhacophorus malabaricus]]\\r\\nFile:Brahminy
kite2.jpg|[[Brahminy kite]]\\r\\nFile:Goldenbacked woodpecker.jpg|[[Dinopium
benghalense|Lesser golden-backed woodpecker]] water\\r\\n\"}}' id='mwAQ'></ul>",
+ "result": "<ul about=\"#mwt2\" class=\"gallery
mw-gallery-packed\"
data-mw=\"{"name":"gallery","attrs":{"mode":"packed"},"body":{"extsrc":"\\nArchivo:Nilgiri
Tahr 1.jpg|[[Nilgiri tahr]]\\nArchivo:Malabar Giant Squirrel77.jpg|[[Malabar
Giant Squirrel|Malabar Ardilla giganta]], [[Silent Valley National Park|Valle
Silencioso Parque Nacional]]\\nArchivo:Periyar Thekkady.jpg|Elefantes en
Thekkady\\nArchivo:Rhacophorus malabaricus.jpg|[[Rhacophorus
malabaricus]]\\nArchivo:Brahminy kite2.jpg|[[Brahminy kite|Brahminy
Cometa]]\\nArchivo:Goldenbacked woodpecker.jpg|[[Dinopium benghalense|Menor
dorado-reculado woodpecker]] agua\\n"}}\" id=\"mwAQ\"
typeof=\"mw:Extension/gallery\"></ul>"
}
]
--
To view, visit https://gerrit.wikimedia.org/r/370185
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd79a2f754af442483ef5b6c661be82d2fea648e
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits