Santhosh has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/363804 )
Change subject: WIP: Adaptation in cxserver
......................................................................
WIP: Adaptation in cxserver
Change-Id: I8f748fb290767cf3ca913448d66695a0dec00736
---
A adapt/Adapter.js
A adapt/TranslationUnits/Image.js
A adapt/TranslationUnits/Link.js
A adapt/TranslationUnits/Reference.js
A adapt/TranslationUnits/index.js
M lineardoc/Doc.js
M lineardoc/TextBlock.js
7 files changed, 231 insertions(+), 0 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver
refs/changes/04/363804/1
diff --git a/adapt/Adapter.js b/adapt/Adapter.js
new file mode 100644
index 0000000..d71b5dc
--- /dev/null
+++ b/adapt/Adapter.js
@@ -0,0 +1,53 @@
+'use strict';
+
+var LinearDoc = require( '../lineardoc' );
+
+function Adapter( content, sourceLanguage, targetLanguage ) {
+ this.parser = new LinearDoc.Parser();
+ this.parser.init();
+ this.sourceLanguage = sourceLanguage;
+ this.targetLanguage = targetLanguage;
+ this.content = content;
+ this.originalDoc = null;
+ this.adaptedDoc = null;
+}
+
+Adapter.prototype.adapt = function () {
+ this.parser.write( this.content );
+ this.originalDoc = this.parser.builder.doc;
+ this.adaptedDoc = this.originalDoc.adapt( this.getAdapter.bind( this )
);
+ return this.adaptedDoc;
+};
+
+/**
+ * Get the adapter for the given tag(translation unit).
+ */
+Adapter.prototype.getAdapter = function ( element ) {
+ var name, match = false, translationUnit, translationUnits, context;
+ translationUnits = require( __dirname + '/TranslationUnits/' );
+ for ( name in translationUnits ) {
+ translationUnit = translationUnits[ name ];
+ if ( translationUnit.matchTagNames ) {
+ match = translationUnit.matchTagNames.includes(
element.name );
+ }
+ if ( translationUnit.matchRdfaTypes ) {
+ match = translationUnit.matchRdfaTypes.includes(
element.attributes.rel ) || translationUnit.matchRdfaTypes.includes(
element.attributes.typeof );
+ }
+ if ( match ) {
+ break;
+ }
+ }
+ if ( !match ) {
+ console.log( 'No adapter for' );
+ console.dir( element );
+ return null;
+ }
+ context = {
+ sourceLanguage: this.sourceLanguage,
+ targetLanguage: this.targetLanguage
+ };
+ console.log( 'Adapting: ' + translationUnit.name );
+ return new translationUnits[ name ]( element, context );
+};
+
+module.exports.Adapter = Adapter;
diff --git a/adapt/TranslationUnits/Image.js b/adapt/TranslationUnits/Image.js
new file mode 100644
index 0000000..1715ee3
--- /dev/null
+++ b/adapt/TranslationUnits/Image.js
@@ -0,0 +1,78 @@
+function MWImage( node, context ) {
+ this.node = node;
+ this.context = context;
+}
+
+MWImage.name = 'image';
+MWImage.matchTagNames = [ 'figure' ];
+MWImage.matchRdfaTypes = [ 'mw:Image/Thumb' ];
+
+MWImage.prototype.adapt = function() {
+ var imageSource;
+
+ this.node.attributes[ 'adapted' ] = 'true';
+ console.dir( this.node );
+ /*
+ this.sourceImage = this.sourceDocument.getElementsByTagName( 'img' )[ 0
];
+ this.sourceResource = this.sourceImage.attributes[ 'resource' );
+ imageSource = this.sourceImage.getAttribute( 'src' );
+ if ( !this.isCommonsImage( imageSource ) ) {
+ // Create an empty paragraph
+ this.targetDocument = document.createElement( 'p' );
+ this.setTargetId( this.targetDocument );
+ mw.log( '[CX] Could not adapt non Commons image ' + imageSource
);
+ this.emit( 'adapt', this.targetDocument, this.MTProvider );
+ return;
+ }
+
+ mw.log( '[CX] Adapting image ' + imageSource );*/
+ return this.node;
+};
+
+/**
+ * Adapt the image namespace to target language.
+ * @return {[type]} [description]
+ */
+MWImage.prototype.adaptNamespace = function () {
+ return this.requestManager.getNamespaceAlias(
+ this.targetLanguage,
+ 'File'
+ ).then( function ( namespaceAlias ) {
+ this.targetResource = this.sourceResource.replace(
+ /(\.\/)*(.+)(:)/g,
+ '$1' + namespaceAlias + '$3'
+ );
+ this.targetImage.setAttribute( 'resource', this.targetResource
);
+ }.bind( this ) );
+};
+
+/**
+ * Check if an image is coming from Commons or not. Uses the URL pattern of
the common file
+ * repository to determine whether the image is stored there.
+ * @static
+ * @param {string} imageSrc
+ * @return {boolean}
+ */
+MWImage.isCommonsImage = function ( imageSrc ) {
+ return imageSrc.indexOf( '//upload.wikimedia.org/wikipedia/commons/' )
=== 0;
+};
+
+/**
+ * Adapt the image's alignment settings for the target language.
+ *
+ * @param {Element} targetFigure
+ */
+MWImage.prototype.adaptImageAlignment = function ( targetFigure ) {
+ if ( $.uls.data.getDir( this.sourceLanguage ) === $.uls.data.getDir(
this.targetLanguage ) ) {
+ // If the target language's direction is the same, there's
nothing to do
+ return;
+ }
+
+ // If the image has an explicit alignment class in HTML,
+ // this means that it has explicit alignment defined in wiki syntax.
+ // It must be explicitly flipped if the target language's direction is
different.
+ targetFigure.classList.toggle( 'mw-halign-left' );
+ targetFigure.classList.toggle( 'mw-halign-right' );
+};
+
+module.exports = MWImage;
diff --git a/adapt/TranslationUnits/Link.js b/adapt/TranslationUnits/Link.js
new file mode 100644
index 0000000..d73092a
--- /dev/null
+++ b/adapt/TranslationUnits/Link.js
@@ -0,0 +1,15 @@
+function MWLink( node, context ) {
+ this.node = node;
+ this.context = context;
+}
+
+MWLink.name = 'link';
+MWLink.matchTagNames = [ 'a' ];
+MWLink.matchRdfaTypes = [ 'mw:WikiLink' ];
+
+MWLink.prototype.adapt = function() {
+ this.node.attributes[ 'adapted' ] = 'true';
+ return this.node;
+};
+
+module.exports = MWLink;
diff --git a/adapt/TranslationUnits/Reference.js
b/adapt/TranslationUnits/Reference.js
new file mode 100644
index 0000000..33e5c65
--- /dev/null
+++ b/adapt/TranslationUnits/Reference.js
@@ -0,0 +1,15 @@
+function MWReference( node, context ) {
+ this.node = node;
+ this.context = context;
+}
+
+MWReference.name = 'reference';
+MWReference.matchTagNames = [ 'span' ];
+MWReference.matchRdfaTypes = [ 'dc:references', 'mw:Extension/ref' ];
+
+MWReference.prototype.adapt = function() {
+ this.node.attributes[ 'adapted' ] = 'true';
+ return this.node;
+};
+
+module.exports = MWReference;
diff --git a/adapt/TranslationUnits/index.js b/adapt/TranslationUnits/index.js
new file mode 100644
index 0000000..6a64596
--- /dev/null
+++ b/adapt/TranslationUnits/index.js
@@ -0,0 +1,5 @@
+module.exports = {
+ Link: require( './Link.js' ),
+ Image: require( './Image.js' ),
+ Reference: require( './Reference.js' )
+};
diff --git a/lineardoc/Doc.js b/lineardoc/Doc.js
index a5d1661..cc51ff9 100644
--- a/lineardoc/Doc.js
+++ b/lineardoc/Doc.js
@@ -131,6 +131,50 @@
};
/**
+ * Segment the document into sentences
+ *
+ * @method
+ * @param {Function} getAdapter Function taking a tag, returning adapted output
+ * @return {Doc} Segmented version of document TODO: warning: *shallow copied*.
+ */
+Doc.prototype.adapt = function ( getAdapter ) {
+ var i, len, item, tag, textBlock, adapter,
+ newDoc = new Doc();
+
+ if ( this.wrapperTag ) {
+ adapter = getAdapter( this.wrapperTag );
+ if ( adapter ) {
+ newDoc = new Doc( adapter.adapt() );
+ }
+ }
+ for ( i = 0, len = this.items.length; i < len; i++ ) {
+ item = this.items[ i ];
+ if ( this.items[ i ].type === 'open' ) {
+ tag = Utils.cloneOpenTag( item.item );
+ if ( i + 1 < len && this.items[ i + 1 ].type ===
'textblock' ) {
+ tag.children = this.items[ i + 1 ].item;
+ }
+ // TDOD: Make async
+ adapter = getAdapter( tag );
+ if ( adapter ) {
+ newDoc.addItem( item.type, adapter.adapt() );
+ } else {
+ newDoc.addItem( item.type, tag );
+ }
+ } else if ( this.items[ i ].type !== 'textblock' ) {
+ newDoc.addItem( item.type, item.item );
+ } else {
+ textBlock = item.item;
+ newDoc.addItem(
+ 'textblock',
+ textBlock.adapt( getAdapter )
+ );
+ }
+ }
+ return newDoc;
+};
+
+/**
* Dump an XML version of the linear representation, for debugging
*
* @method
diff --git a/lineardoc/TextBlock.js b/lineardoc/TextBlock.js
index 0845095..1b77970 100644
--- a/lineardoc/TextBlock.js
+++ b/lineardoc/TextBlock.js
@@ -412,4 +412,25 @@
return dump;
};
+TextBlock.prototype.adapt = function ( getAdapter ) {
+ var i, j, chunk, tags, len, adapter;
+ for ( i = 0, len = this.textChunks.length; i < len; i++ ) {
+ chunk = this.textChunks[ i ];
+ tags = chunk.tags;
+ for ( j = 0; j < tags.length; j++ ) {
+ adapter = getAdapter( tags[ j ] );
+ if ( adapter ) {
+ tags[ j ] = adapter.adapt();
+ }
+ }
+ if ( chunk.inlineContent ) {
+ if ( chunk.inlineContent.adapt ) {
+ // sub-doc: concatenate
+ chunk.inlineContent =
chunk.inlineContent.adapt( getAdapter );
+ }
+ }
+ }
+ return this;
+};
+
module.exports = TextBlock;
--
To view, visit https://gerrit.wikimedia.org/r/363804
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I8f748fb290767cf3ca913448d66695a0dec00736
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits