[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: MT: Abstract the wrapping of content from API usage
Nikerabbit has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/374520 ) Change subject: MT: Abstract the wrapping of content from API usage .. MT: Abstract the wrapping of content from API usage MTClient does a quick check whether the content is plaintext, if so, translateText is used. Otherwise proceeds to translateHTML, but the sourceDoc building step checks if we were able to build one. Without a wrapper tag, the sourceDoc will be empty. At that point we will re-attempt parsing with wrapper tag. For the result of this wrapped input, unwrap it after MT is recieved. The annoying wrapper for plain text input will not occur now. Depends-On: I014ced5bc306654053cf47c8cdfe4384ce90bdfb Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559 --- M lib/mt/MTClient.js M lib/routes/v1.js M spec.yaml 3 files changed, 14 insertions(+), 3 deletions(-) Approvals: Catrope: Looks good to me, approved jenkins-bot: Verified diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js index 755283d..c99874a 100644 --- a/lib/mt/MTClient.js +++ b/lib/mt/MTClient.js @@ -3,6 +3,7 @@ const LinearDoc = require( __dirname + '/../lineardoc' ), SubSequenceMatcher = require( './annotationmapper/SubsequenceMatcher.js' ), createDOMPurify = require( 'dompurify' ), + cxUtil = require( '../util.js' ), jsdom = require( 'jsdom' ); /** @@ -17,6 +18,7 @@ this.conf = options.conf; this.sourceDoc = null; this.sourceHTML = null; + this.contentWrapped = false; } log( level, info ) { @@ -35,7 +37,7 @@ * @return {Object} Deferred promise: Target language text */ translate( sourceLang, targetLang, content, format ) { - if ( format === 'text' ) { + if ( format === 'text' || cxUtil.isPlainText( content ) ) { return this.translateText( sourceLang, targetLang, content ); } else { return this.translateHtml( sourceLang, targetLang, content ); @@ -85,6 +87,10 @@ return Promise.all( chain ).then( ( results ) => { targetDoc.items = results; + if ( this.contentWrapped ) { + // Unwrap + targetDoc = targetDoc.items[ 1 ].item; + } // Return sanitized HTML output return this.sanitize( targetDoc.getHtml() ); } ); @@ -360,6 +366,11 @@ parser.write( sourceHtml ); this.sourceHTML = sourceHtml; this.sourceDoc = parser.builder.doc; + if ( !this.sourceDoc || !this.sourceDoc.items.length ) { + this.log( 'debug', 'Could not build a source doc. May be plain text? Re-attempting with wrapper' ); + this.contentWrapped = true; + this.buildSourceDoc( [ '', sourceHtml, '' ].join( '' ) ); + } } /** diff --git a/lib/routes/v1.js b/lib/routes/v1.js index d976697..715e59e 100644 --- a/lib/routes/v1.js +++ b/lib/routes/v1.js @@ -112,7 +112,7 @@ // We support setting html as body or as body.html. But body.html is the recommended way. // The other way will be removed soon. - sourceHtml = [ '', req.body.html || req.rawBody, '' ].join( '' ); + sourceHtml = req.body.html || req.rawBody; return mtClient.translate( from, to, sourceHtml ).then( function ( data ) { res.json( { diff --git a/spec.yaml b/spec.yaml index df2f066..0e27708 100644 --- a/spec.yaml +++ b/spec.yaml @@ -182,7 +182,7 @@ - Matxin - name: html in: formData - description: The HTML content to translate + description: The HTML or plaintext content to translate type: string required: true x-textarea: true -- To view, visit https://gerrit.wikimedia.org/r/374520 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/services/cxserver Gerrit-Branch: master Gerrit-Owner: SanthoshGerrit-Reviewer: Catrope Gerrit-Reviewer: KartikMistry Gerrit-Reviewer: Nikerabbit Gerrit-Reviewer: Santhosh Gerrit-Reviewer: jenkins-bot <> ___ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
[MediaWiki-commits] [Gerrit] mediawiki...cxserver[master]: MT: Abstract the wrapping of content from API usage
Santhosh has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/374520 ) Change subject: MT: Abstract the wrapping of content from API usage .. MT: Abstract the wrapping of content from API usage MTClient does a quick check whether the content is plaintext, if so, translateText is used. Otherwise proceeds to translateHTML, but the sourceDoc building step checks if we were able to build one. Without a wrapper tag, the sourceDoc will be empty. At that point we will re-attempt parsing with wrapper tag. For the result of this wrapped input, unwrap it after MT is recieved. The annoying wrapper for plain text input will not occur now. Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559 --- M lib/mt/MTClient.js M lib/routes/v1.js M spec.yaml 3 files changed, 15 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver refs/changes/20/374520/1 diff --git a/lib/mt/MTClient.js b/lib/mt/MTClient.js index 242c9c2..90b5e44 100644 --- a/lib/mt/MTClient.js +++ b/lib/mt/MTClient.js @@ -3,6 +3,7 @@ const LinearDoc = require( __dirname + '/../lineardoc' ), SubSequenceMatcher = require( './annotationmapper/SubsequenceMatcher.js' ), createDOMPurify = require( 'dompurify' ), + cxUtil = require( '../util.js' ), jsdom = require( 'jsdom' ); /** @@ -17,6 +18,7 @@ this.conf = options.conf; this.sourceDoc = null; this.sourceHTML = null; + this.contentWrapped = false; } log( level, info ) { @@ -35,7 +37,7 @@ * @return {Object} Deferred promise: Target language text */ translate( sourceLang, targetLang, content, format ) { - if ( format === 'text' ) { + if ( format === 'text' || cxUtil.isPlainText( content ) ) { return this.translateText( sourceLang, targetLang, content ); } else { return this.translateHtml( sourceLang, targetLang, content ); @@ -85,8 +87,12 @@ return Promise.all( chain ).then( ( results ) => { targetDoc.items = results; + if ( this.contentWrapped ) { + // Unwrap + targetDoc = targetDoc.items[ 1 ].item; + } // Return sanitized HTML output - return this.sanitize( targetDoc.getHtml() ); + { return this.sanitize( targetDoc.getHtml() ); } } ); } @@ -359,6 +365,11 @@ parser.write( sourceHtml ); this.sourceHTML = sourceHtml; this.sourceDoc = parser.builder.doc; + if ( !this.sourceDoc || !this.sourceDoc.items.length ) { + this.log( 'debug', 'Could not build a source doc. May be plain text? Re-attempting with wrapper' ); + this.contentWrapped = true; + this.buildSourceDoc( [ '', sourceHtml, '' ].join( '' ) ); + } } /** diff --git a/lib/routes/v1.js b/lib/routes/v1.js index d976697..715e59e 100644 --- a/lib/routes/v1.js +++ b/lib/routes/v1.js @@ -112,7 +112,7 @@ // We support setting html as body or as body.html. But body.html is the recommended way. // The other way will be removed soon. - sourceHtml = [ '', req.body.html || req.rawBody, '' ].join( '' ); + sourceHtml = req.body.html || req.rawBody; return mtClient.translate( from, to, sourceHtml ).then( function ( data ) { res.json( { diff --git a/spec.yaml b/spec.yaml index df2f066..0e27708 100644 --- a/spec.yaml +++ b/spec.yaml @@ -182,7 +182,7 @@ - Matxin - name: html in: formData - description: The HTML content to translate + description: The HTML or plaintext content to translate type: string required: true x-textarea: true -- To view, visit https://gerrit.wikimedia.org/r/374520 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If7d93a1f268d87b6980e022b107608cb6af8e559 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/cxserver Gerrit-Branch: master Gerrit-Owner: Santhosh___ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits