[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: MT: Abstract the wrapping of content from API usage

2017-10-05 Thread Nikerabbit (Code Review)
Nikerabbit has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/374520 )

Change subject: MT: Abstract the  wrapping of content from API usage
..


MT: Abstract the  wrapping of content from API usage

MTClient does a quick check whether the content is plaintext, if so,
translateText is used. Otherwise proceeds to translateHTML, but the
sourceDoc building step checks if we were able to build one. Without
a wrapper tag, the sourceDoc will be empty. At that point we will
re-attempt parsing with wrapper tag. For the result of this wrapped
input, unwrap it after MT is recieved.

The annoying  wrapper for plain text input will not occur now.

Depends-On: I014ced5bc306654053cf47c8cdfe4384ce90bdfb
Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559
---
M lib/mt/MTClient.js
M lib/routes/v1.js
M spec.yaml
3 files changed, 14 insertions(+), 3 deletions(-)

Approvals:
  Catrope: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js
index 755283d..c99874a 100644
--- a/lib/mt/MTClient.js
+++ b/lib/mt/MTClient.js
@@ -3,6 +3,7 @@
 const LinearDoc = require( __dirname + '/../lineardoc' ),
SubSequenceMatcher = require( 
'./annotationmapper/SubsequenceMatcher.js' ),
createDOMPurify = require( 'dompurify' ),
+   cxUtil = require( '../util.js' ),
jsdom = require( 'jsdom' );
 
 /**
@@ -17,6 +18,7 @@
this.conf = options.conf;
this.sourceDoc = null;
this.sourceHTML = null;
+   this.contentWrapped = false;
}
 
log( level, info ) {
@@ -35,7 +37,7 @@
 * @return {Object} Deferred promise: Target language text
 */
translate( sourceLang, targetLang, content, format ) {
-   if ( format === 'text' ) {
+   if ( format === 'text' || cxUtil.isPlainText( content ) ) {
return this.translateText( sourceLang, targetLang, 
content );
} else {
return this.translateHtml( sourceLang, targetLang, 
content );
@@ -85,6 +87,10 @@
 
return Promise.all( chain ).then( ( results ) => {
targetDoc.items = results;
+   if ( this.contentWrapped ) {
+   // Unwrap
+   targetDoc = targetDoc.items[ 1 ].item;
+   }
// Return sanitized HTML output
return this.sanitize( targetDoc.getHtml() );
} );
@@ -360,6 +366,11 @@
parser.write( sourceHtml );
this.sourceHTML = sourceHtml;
this.sourceDoc = parser.builder.doc;
+   if ( !this.sourceDoc || !this.sourceDoc.items.length ) {
+   this.log( 'debug', 'Could not build a source doc. May 
be plain text? Re-attempting with  wrapper' );
+   this.contentWrapped = true;
+   this.buildSourceDoc( [ '', sourceHtml, '' 
].join( '' ) );
+   }
}
 
/**
diff --git a/lib/routes/v1.js b/lib/routes/v1.js
index d976697..715e59e 100644
--- a/lib/routes/v1.js
+++ b/lib/routes/v1.js
@@ -112,7 +112,7 @@
 
// We support setting html as body or as body.html. But body.html is 
the recommended way.
// The other way will be removed soon.
-   sourceHtml = [ '', req.body.html || req.rawBody, '' ].join( 
'' );
+   sourceHtml = req.body.html || req.rawBody;
return mtClient.translate( from, to, sourceHtml ).then(
function ( data ) {
res.json( {
diff --git a/spec.yaml b/spec.yaml
index df2f066..0e27708 100644
--- a/spec.yaml
+++ b/spec.yaml
@@ -182,7 +182,7 @@
- Matxin
 - name: html
   in: formData
-  description: The HTML content to translate
+  description: The HTML or plaintext content to translate
   type: string
   required: true
   x-textarea: true

-- 
To view, visit https://gerrit.wikimedia.org/r/374520
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh 
Gerrit-Reviewer: Catrope 
Gerrit-Reviewer: KartikMistry 
Gerrit-Reviewer: Nikerabbit 
Gerrit-Reviewer: Santhosh 
Gerrit-Reviewer: jenkins-bot <>

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: MT: Abstract the wrapping of content from API usage

2017-08-29 Thread Santhosh (Code Review)
Santhosh has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/374520 )

Change subject: MT: Abstract the  wrapping of content from API usage
..

MT: Abstract the  wrapping of content from API usage

MTClient does a quick check whether the content is plaintext, if so,
translateText is used. Otherwise proceeds to translateHTML, but the
sourceDoc building step checks if we were able to build one. Without
a wrapper tag, the sourceDoc will be empty. At that point we will
re-attempt parsing with wrapper tag. For the result of this wrapped
input, unwrap it after MT is recieved.

The annoying  wrapper for plain text input will not occur now.

Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559
---
M lib/mt/MTClient.js
M lib/routes/v1.js
M spec.yaml
3 files changed, 15 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver 
refs/changes/20/374520/1

diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js
index 242c9c2..90b5e44 100644
--- a/lib/mt/MTClient.js
+++ b/lib/mt/MTClient.js
@@ -3,6 +3,7 @@
 const LinearDoc = require( __dirname + '/../lineardoc' ),
SubSequenceMatcher = require( 
'./annotationmapper/SubsequenceMatcher.js' ),
createDOMPurify = require( 'dompurify' ),
+   cxUtil = require( '../util.js' ),
jsdom = require( 'jsdom' );
 
 /**
@@ -17,6 +18,7 @@
this.conf = options.conf;
this.sourceDoc = null;
this.sourceHTML = null;
+   this.contentWrapped = false;
}
 
log( level, info ) {
@@ -35,7 +37,7 @@
 * @return {Object} Deferred promise: Target language text
 */
translate( sourceLang, targetLang, content, format ) {
-   if ( format === 'text' ) {
+   if ( format === 'text' || cxUtil.isPlainText( content ) ) {
return this.translateText( sourceLang, targetLang, 
content );
} else {
return this.translateHtml( sourceLang, targetLang, 
content );
@@ -85,8 +87,12 @@
 
return Promise.all( chain ).then( ( results ) => {
targetDoc.items = results;
+   if ( this.contentWrapped ) {
+   // Unwrap
+   targetDoc = targetDoc.items[ 1 ].item;
+   }
// Return sanitized HTML output
-   return this.sanitize( targetDoc.getHtml() );
+   { return this.sanitize( targetDoc.getHtml() ); }
} );
}
 
@@ -359,6 +365,11 @@
parser.write( sourceHtml );
this.sourceHTML = sourceHtml;
this.sourceDoc = parser.builder.doc;
+   if ( !this.sourceDoc || !this.sourceDoc.items.length ) {
+   this.log( 'debug', 'Could not build a source doc. May 
be plain text? Re-attempting with  wrapper' );
+   this.contentWrapped = true;
+   this.buildSourceDoc( [ '', sourceHtml, '' 
].join( '' ) );
+   }
}
 
/**
diff --git a/lib/routes/v1.js b/lib/routes/v1.js
index d976697..715e59e 100644
--- a/lib/routes/v1.js
+++ b/lib/routes/v1.js
@@ -112,7 +112,7 @@
 
// We support setting html as body or as body.html. But body.html is 
the recommended way.
// The other way will be removed soon.
-   sourceHtml = [ '', req.body.html || req.rawBody, '' ].join( 
'' );
+   sourceHtml = req.body.html || req.rawBody;
return mtClient.translate( from, to, sourceHtml ).then(
function ( data ) {
res.json( {
diff --git a/spec.yaml b/spec.yaml
index df2f066..0e27708 100644
--- a/spec.yaml
+++ b/spec.yaml
@@ -182,7 +182,7 @@
- Matxin
 - name: html
   in: formData
-  description: The HTML content to translate
+  description: The HTML or plaintext content to translate
   type: string
   required: true
   x-textarea: true

-- 
To view, visit https://gerrit.wikimedia.org/r/374520
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/cxserver
Gerrit-Branch: master
Gerrit-Owner: Santhosh 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits