Santhosh has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/383788 )
Change subject: WIP: V2 apis ...................................................................... WIP: V2 apis Change-Id: I8ca37ce4884aa8d65c16b14ae682b395afb848ab --- M app.js D lib/routes/doc.js M lib/routes/v1.js A lib/routes/v2.js A lib/swagger-ui.js M spec.yaml 6 files changed, 533 insertions(+), 161 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/cxserver refs/changes/88/383788/1 diff --git a/app.js b/app.js index 1795d04..e18ac46 100644 --- a/app.js +++ b/app.js @@ -148,7 +148,7 @@ } // import the route file route = require( __dirname + '/lib/routes/' + fname ); - return route( app ); + return route.create ? route.create( app ) : route( app ); } ).then( function ( route ) { if ( route === undefined ) { return undefined; diff --git a/lib/routes/doc.js b/lib/routes/doc.js deleted file mode 100644 index b2be3b1..0000000 --- a/lib/routes/doc.js +++ /dev/null @@ -1,93 +0,0 @@ -'use strict'; - -var app, sUtil = require( '../util' ), - BBPromise = require( 'bluebird' ), - fs = BBPromise.promisifyAll( require( 'fs' ) ), - // Swagger-ui helpfully exports the absolute path of its dist directory - docRoot = require( 'swagger-ui' ).dist + '/', - router = sUtil.router(); - -function staticServe( req ) { - // Expand any relative paths for security - var filePath = req.query.path.replace( /\.\.\//g, '' ); - return fs.readFileAsync( docRoot + filePath, 'utf8' ) - .then( function ( body ) { - var contentType; - if ( filePath === '/index.html' ) { - // Rewrite the HTML to use a query string - body = body.replace( /((?:src|href)=['"])/g, '$1?doc=&path=' ) - // Some self-promotion - .replace( /<a id="logo".*?<\/a>/, - '<a id="logo" href="https://www.mediawiki.org/wiki/CX">' + app.conf.spec.info.title + '</a>' ) - .replace( /<title>[^<]*<\/title>/, - '<title>' + app.conf.spec.info.title + ' docs</title>' ) - // Replace the default url with ours, switch off validation & - // limit the size of documents to apply syntax highlighting to - .replace( /Sorter: "alpha"/, 'Sorter: "alpha", validatorUrl: null, ' + - 'highlightSizeThreshold: 10000, docExpansion: "list"' ) - .replace( / url: url,/, 'url: "?spec",' ); - } - - contentType = 'text/html'; - if ( /\.js$/.test( filePath ) ) { - contentType = 'text/javascript'; - } else if ( /\.png/.test( filePath ) ) { - contentType = 'image/png'; - } else if ( /\.css/.test( filePath ) ) { - contentType = 'text/css'; - body = body.replace( /\.\.\/(images|fonts)\//g, '?doc&path=$1/' ); - } - return BBPromise.resolve( { - status: 200, - headers: { - 'content-type': contentType, - 'content-security-policy': 'default-src \'none\'; ' + - 'script-src \'self\' \'unsafe-inline\'; connect-src \'self\'; ' + - 'style-src \'self\' \'unsafe-inline\'; img-src \'self\'; font-src \'self\';' - }, - body: body - } ); - } ); -} - -router.get( '/', function ( req, res, next ) { - var spec; - if ( req.query.spec !== undefined && app.conf.spec ) { - spec = Object.assign( {}, app.conf.spec, { - // Set the base path dynamically - basePath: req.path.toString().replace( /\/$/, '' ) - } ); - - if ( req.params.domain === req.headers.host.replace( /:[0-9]+$/, '' ) ) { - // This is a host-based request. Set an appropriate base path. - spec.basePath = spec[ 'x-host-basePath' ] || spec.basePath; - } - - res.send( spec ); - } else if ( req.query.doc !== undefined || - ( /\btext\/html\b/.test( req.headers.accept ) && req.url.length <= 2 ) - ) { - // Return swagger UI & load spec from /?spec - if ( !req.query.path ) { - req.query.path = '/index.html'; - } - return staticServe( req ).then( - function ( data ) { - res.set( data.headers ); - res.send( data.body ); - } ); - } else { - next(); - } -} ); - -module.exports = function ( appObj ) { - app = appObj; - return { - path: '/v1', - // eslint-disable-next-line camelcase - skip_domain: true, - router: router - }; - -}; diff --git a/lib/routes/v1.js b/lib/routes/v1.js index 29f729b..ef16537 100644 --- a/lib/routes/v1.js +++ b/lib/routes/v1.js @@ -4,9 +4,10 @@ languageData = require( 'language-data' ), jwt = require( 'jsonwebtoken' ), MWPageLoader = require( '../mw/MWPageLoader' ), - CXConfig = require( '../Config.js' ); + swaggerUi = require( '../swagger-ui' ), + CXConfig = require( '../Config' ); -class Routes { +class RoutesV1 { constructor( app, registry ) { this.app = app; this.registry = registry; @@ -25,6 +26,7 @@ */ get routes() { return { + '/': this.getSpec, '/page/:language/:title/:revision?': this.fetchPage, 'POST /mt/:from/:to/:provider?': this.machineTranslate, '/dictionary/:word/:from/:to/:provider?': this.dictionary, @@ -32,8 +34,7 @@ '/list/pair/:from/:to': this.listToolForLanguagePair, '/languagepairs': this.listLanguagePairs, '/list/languagepairs': this.listLanguagePairs, - '/list/:tool/:from?/:to?': this.listToolForLanguagePairsAndTool, - 'POST /translate/:from/:to/:provider?': this.translate + '/list/:tool/:from?/:to?': this.listToolForLanguagePairsAndTool }; } @@ -54,6 +55,25 @@ } this.router[ verb ]( parts[ 1 ] || parts[ 0 ], routes[ path ].bind( this ) ); } ); + } + + /** + * GET / + * Main entry point. Currently it only responds if the spec or doc query + * parameter is given, otherwise lets the next middleware handle it + * @param {Request} req request object + * @param {Response} res response object + * @param {Function} next Next handler + * @return {Promise|null} + */ + getSpec( req, res, next ) { + if ( {}.hasOwnProperty.call( req.query || {}, 'spec' ) ) { + res.json( this.app.conf.spec ); + } else if ( {}.hasOwnProperty.call( req.query || {}, 'doc' ) ) { + return swaggerUi.processRequest( this.app, req, res ); + } else { + next(); + } } fetchPage( req, res ) { @@ -323,17 +343,19 @@ } ); } ); } + + static create( appObj ) { + const registry = new CXConfig( appObj ); + const routes = new RoutesV1( appObj, registry ); + return { + path: '/v1/', + // eslint-disable-next-line camelcase + api_version: 1, + router: routes.router, + // eslint-disable-next-line camelcase + skip_domain: true + }; + } } -module.exports = ( appObj ) => { - const registry = new CXConfig( appObj ); - const routes = new Routes( appObj, registry ); - return { - path: '/v1/', - // eslint-disable-next-line camelcase - api_version: 1, - router: routes.router, - // eslint-disable-next-line camelcase - skip_domain: true - }; -}; +module.exports = RoutesV1; diff --git a/lib/routes/v2.js b/lib/routes/v2.js new file mode 100644 index 0000000..b97f35e --- /dev/null +++ b/lib/routes/v2.js @@ -0,0 +1,115 @@ +'use strict'; + +const languageData = require( 'language-data' ), + MWPageLoader = require( '../mw/MWPageLoader' ), + CXConfig = require( '../Config' ), + RoutesV1 = require( './v1' ); + +class RoutesV2 extends RoutesV1 { + + /** + * route definitions + */ + get routes() { + return Object.assign( { + 'POST /translate/:from/:to/:provider?': this.translate + }, super.routes ); + } + + fetchPage( req, res ) { + const title = req.params.title, + revision = req.params.revision; + + // In case of wikimedia service hosting, cxserver is configured behind + // xx.wikipedia.org/api/.. instead of language code, we get domain name. + // Split by . seems safe here. But note that this is not respecting + // domain pattern configured by mw_host + const sourceLanguageOrDomain = req.params.language; + const sourceLanguage = sourceLanguageOrDomain.split( '.' )[ 0 ]; + if ( !languageData.isKnown( sourceLanguageOrDomain ) ) { + return res.status( 400 ) + .end( `Invalid language code for page fetch: ${sourceLanguage}` ); + } + + const pageLoader = new MWPageLoader( { + context: this.app, + sourceLanguage + } ); + + this.app.logger.log( 'debug', `Getting page ${sourceLanguage}:${title}` ); + return pageLoader.getPage( title, revision ).then( + ( response ) => { + res.send( { + sourceLanguage, + title, + revision: response.revision, + segmentedContent: response.content + } ); + this.app.logger.log( 'debug', 'Page sent' ); + }, + ( error ) => { + res.status( 404 ) + .end( `Page ${sourceLanguage}:${title} could not be found. ` + error.toString() ); + } + ); + } + + /** + * @param {Request} req request object + * @param {Response} res response object + * @return {Promise} + */ + translate( req, res ) { + var mtClient, sourceHtml, machineTranslationRequest, + from = req.params.from, + to = req.params.to; + + if ( req.params.provider ) { + mtClient = this.getMTClient( req, res ); + if ( !mtClient ) { + // With explicit provider, if not MT Client found, it is an error. + return; + } + } + + sourceHtml = req.body.html; + + if ( !mtClient ) { + machineTranslationRequest = Promise.resolve( sourceHtml ); + } else { + machineTranslationRequest = mtClient.translate( from, to, sourceHtml ); + } + + return machineTranslationRequest.then( ( translatedHTML ) => { + var CXAdapter = require( __dirname + '/../Adapter' ); + + this.app.conf.mtClient = mtClient; + + return new CXAdapter( from, to, this.app ) + .adapt( translatedHTML ) + .then( ( adaptedDoc ) => { + res.json( { + contents: adaptedDoc.getHtml() + } ); + }, ( error ) => { + res.status( 500 ).end( error.stack ); + this.app.logger.log( 'error', 'MT processing error: ' + error.stack ); + } ); + } ); + } + + static create( appObj ) { + const registry = new CXConfig( appObj ); + const routes = new RoutesV2( appObj, registry ); + return { + path: '/v2/', + // eslint-disable-next-line camelcase + api_version: 2, + router: routes.router, + // eslint-disable-next-line camelcase + skip_domain: true + }; + } +} + +module.exports = RoutesV2; diff --git a/lib/swagger-ui.js b/lib/swagger-ui.js new file mode 100644 index 0000000..70055eb --- /dev/null +++ b/lib/swagger-ui.js @@ -0,0 +1,76 @@ +'use strict'; + +const BBPromise = require( 'bluebird' ); +const fs = BBPromise.promisifyAll( require( 'fs' ) ); +const path = require( 'path' ); +const HTTPError = require( '../lib/util.js' ).HTTPError; + +// Swagger-ui helpfully exporting the absolute path of its dist directory +const docRoot = `${require( 'swagger-ui' ).dist}/`; + +function processRequest( app, req, res ) { + + const reqPath = req.query.path || '/index.html'; + const filePath = path.join( docRoot, reqPath ); + + // Disallow relative paths. + // Test relies on docRoot ending on a slash. + if ( filePath.substring( 0, docRoot.length ) !== docRoot ) { + throw new HTTPError( { + status: 404, + type: 'not_found', + title: 'File not found', + detail: `${reqPath} could not be found.` + } ); + } + + return fs.readFileAsync( filePath ) + .then( ( body ) => { + if ( reqPath === '/index.html' ) { + body = body.toString() + .replace( /((?:src|href)=['"])/g, '$1?doc&path=' ) + // Some self-promotion + .replace( /<a id="logo".*?<\/a>/, + `<a id="logo" href="${app.info.homepage}">${app.info.name}</a>` ) + .replace( /<title>[^<]*<\/title>/, `<title>${app.info.name}</title>` ) + // Replace the default url with ours, switch off validation & + // limit the size of documents to apply syntax highlighting to + .replace( /docExpansion: "none"/, 'docExpansion: "list", ' + + 'validatorUrl: null, ' + + 'highlightSizeThreshold: 10000' ) + .replace( / url: url,/, 'url: "/?spec",' ); + } + + let contentType = 'text/html'; + if ( /\.js$/.test( reqPath ) ) { + contentType = 'text/javascript'; + body = body.toString() + .replace( /underscore-min\.map/, '?doc&path=lib/underscore-min.map' ); + } else if ( /\.png$/.test( reqPath ) ) { + contentType = 'image/png'; + } else if ( /\.map$/.test( reqPath ) ) { + contentType = 'application/json'; + } else if ( /\.ttf$/.test( reqPath ) ) { + contentType = 'application/x-font-ttf'; + } else if ( /\.css$/.test( reqPath ) ) { + contentType = 'text/css'; + body = body.toString().replace( /\.\.\/(images|fonts)\//g, '?doc&path=$1/' ); + } + + res.setHeader( 'Content-Type', contentType ); + res.setHeader( 'content-security-policy', 'default-src \'none\'; ' + + 'script-src \'self\' \'unsafe-inline\'; connect-src *; ' + + 'style-src \'self\' \'unsafe-inline\'; img-src \'self\'; font-src \'self\';' ); + res.send( body.toString() ); + } ) + .catch( { code: 'ENOENT' }, () => { + res.status( 404 ) + .type( 'not_found' ) + .send( 'not found' ); + } ); + +} + +module.exports = { + processRequest +}; diff --git a/spec.yaml b/spec.yaml index 4e4ac0f..0cc4320 100644 --- a/spec.yaml +++ b/spec.yaml @@ -201,54 +201,6 @@ contents: /.+/ headers: content-type: application/json - /v1/translate/{from}/{to}{/provider}: - post: - tags: - - Machine translation - description: Translate the given content from source language to target langauge. Also adapt the content for the target language wiki. Some machine translation providers require an authorization header and it is forbidden to use them outside the Content Translation tool. - consumes: - - application/x-www-form-urlencoded - produces: - - application/json - parameters: - - name: from - in: path - description: The source language code - type: string - required: true - - name: to - in: path - description: The target language code - type: string - required: true - - name: provider - in: path - description: The machine translation provider id - type: string - required: false - enum: - - Apertium - - name: html - in: formData - description: The HTML content to translate - type: string - required: true - x-textarea: true - x-amples: - - title: Machine translate an HTML fragment using Apertium, adapt the links to target language wiki. - request: - params: - from: en - to: es - provider: Apertium - body: - html: <p><a rel="mw:WikiLink" href='Oxygen'>Oxygen</a> is a chemical element with symbol O and <a rel="mw:WikiLink" href='Atomic number'>atomic number</a> 8.</p> - response: - status: 200 - body: - contents: /.+/ - headers: - content-type: application/json /v1/list/tool/{tool}: get: tags: @@ -355,6 +307,309 @@ status: 200 headers: content-type: application/json + # from routes/v2.js + /v2/page/{language}/{title}{/revision}: + get: + tags: + - Page content + description: Fetches segmented mediawiki page + produces: + - application/json + parameters: + - name: language + in: path + description: The language code or the domain of the wiki + type: string + required: true + - name: title + in: path + description: The page title + type: string + required: true + - name: revision + in: path + description: The page revision id + type: string + required: false + x-amples: + - title: Fetch enwiki Oxygen page + request: + params: + language: en + title: Oxygen + revision: 702870951 + response: + status: 200 + headers: + content-type: application/json + /v2/dictionary/{word}/{from}/{to}{/provider}: + get: + tags: + - Dictionary + description: Fetches the dictionary meaning of a word. + produces: + - application/json + parameters: + - name: word + in: path + description: The word to lookup + type: string + required: true + - name: from + in: path + description: The source language code + type: string + required: true + - name: to + in: path + description: The target language code + type: string + required: true + - name: provider + in: path + description: The dictionary provider id + type: string + required: false + enum: + - JsonDict + - Dictd + x-amples: + - title: Fetch dictionay meaning with a given provider + request: + params: + word: water + from: en + to: es + provider: JsonDict + response: + status: 200 + body: + source: water + translations: + - phrase: /.+/ + sources: + - fd-eng-spa + headers: + content-type: application/json + - title: Fetch dictionay meaning without specifying a provider + request: + params: + word: water + from: en + to: es + response: + status: 200 + body: + source: water + translations: + - phrase: /.+/ + sources: + - fd-eng-spa + headers: + content-type: application/json + /v2/mt/{from}/{to}{/provider}: + post: + tags: + - Machine translation + description: Fetches the machine translation. Some providers require an authorization header and it is forbidden to use them outside the Content Translation tool. + consumes: + - application/x-www-form-urlencoded + produces: + - application/json + parameters: + - name: from + in: path + description: The source language code + type: string + required: true + - name: to + in: path + description: The target language code + type: string + required: true + - name: provider + in: path + description: The machine translation provider id + type: string + required: false + enum: + - Apertium + - Matxin + - name: html + in: formData + description: The HTML or plaintext content to translate + type: string + required: true + x-textarea: true + x-amples: + - title: Machine translate an HTML fragment using Apertium. + request: + params: + from: en + to: es + provider: Apertium + body: + html: <p><a href='Oxygen'>Oxygen</a> is a chemical element with symbol O and <a href='Atomic number'>atomic number</a> 8.</p> + response: + status: 200 + body: + contents: /.+/ + headers: + content-type: application/json + /v2/translate/{from}/{to}{/provider}: + post: + tags: + - Machine translation + description: Translate the given content from source language to target langauge. Also adapt the content for the target language wiki. Some machine translation providers require an authorization header and it is forbidden to use them outside the Content Translation tool. + consumes: + - application/x-www-form-urlencoded + produces: + - application/json + parameters: + - name: from + in: path + description: The source language code + type: string + required: true + - name: to + in: path + description: The target language code + type: string + required: true + - name: provider + in: path + description: The machine translation provider id + type: string + required: false + enum: + - Apertium + - name: html + in: formData + description: The HTML content to translate + type: string + required: true + x-textarea: true + x-amples: + - title: Machine translate an HTML fragment using Apertium, adapt the links to target language wiki. + request: + params: + from: en + to: es + provider: Apertium + body: + html: <p><a rel="mw:WikiLink" href='Oxygen'>Oxygen</a> is a chemical element with symbol O and <a rel="mw:WikiLink" href='Atomic number'>atomic number</a> 8.</p> + response: + status: 200 + body: + contents: /.+/ + headers: + content-type: application/json + /v2/list/tool/{tool}: + get: + tags: + - Tools + - Service information + description: Lists the tools for all language pairs + produces: + - application/json + parameters: + - name: tool + in: path + description: The tool name + type: string + required: true + enum: + - mt + - dictionary + x-amples: + - title: Get the tools for all language pairs + request: + params: + tool: mt + response: + status: 200 + headers: + content-type: application/json + /v2/list/pair/{from}/{to}: + get: + tags: + - Tools + description: Lists the tools for a given language pair + produces: + - application/json + parameters: + - name: from + in: path + description: The source language code + type: string + required: true + - name: to + in: path + description: The target language code + type: string + required: true + x-amples: + - title: Get the tools between two language pairs + request: + params: + from: en + to: es + response: + status: 200 + headers: + content-type: application/json + /v2/list/languagepairs: + get: + tags: + - Languages + - Service information + description: Lists the language pairs supported by the server + produces: + - application/json + x-amples: + - title: Get all the language pairs + response: + status: 200 + headers: + content-type: application/json + /v2/list/{tool}{/from}{/to}: + get: + tags: + - Tools + - Service information + description: Lists all language pairs that tool supports + produces: + - application/json + parameters: + - name: tool + in: path + description: The tool name + type: string + required: true + enum: + - mt + - dictionary + - name: from + in: path + description: The source language code + type: string + required: false + - name: to + in: path + description: The target language code + type: string + required: false + x-amples: + - title: Get the MT tool between two language pairs + request: + params: + from: en + to: es + tool: mt + response: + status: 200 + headers: + content-type: application/json # from routes/info.js /_info: get: @@ -379,7 +634,6 @@ get: tags: - Service information - - Service name description: Gets the name of the service produces: - application/json @@ -396,7 +650,6 @@ get: tags: - Service information - - Service version description: Gets the running version of the service produces: - application/json @@ -413,7 +666,6 @@ get: tags: - Service information - - Service homepage description: Redirects to the home page x-amples: - title: redirect to the home page -- To view, visit https://gerrit.wikimedia.org/r/383788 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8ca37ce4884aa8d65c16b14ae682b395afb848ab Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/cxserver Gerrit-Branch: master Gerrit-Owner: Santhosh <santhosh.thottin...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits