C. Scott Ananian has uploaded a new change for review. https://gerrit.wikimedia.org/r/321005
Change subject: POC: Translate extension experiment using Annotation service. ...................................................................... POC: Translate extension experiment using Annotation service. Change-Id: I86f1fbe5bceef93b2654ded636719550daf72010 --- M lib/api/ParsoidService.js M lib/api/apiUtils.js M lib/api/routes.js A lib/translate/HRequest.js A lib/translate/Translator.js 5 files changed, 203 insertions(+), 2 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid refs/changes/05/321005/1 diff --git a/lib/api/ParsoidService.js b/lib/api/ParsoidService.js index 7aa1bb5..7d6d765 100644 --- a/lib/api/ParsoidService.js +++ b/lib/api/ParsoidService.js @@ -183,6 +183,7 @@ // v3 API routes app.get('/:domain/v3/page/:format/:title/:revision?', v3, u, p, a, routes.v3Get); app.post('/:domain/v3/transform/:from/to/:format/:title?/:revision?', v3, u, p, a, routes.v3Post); + app.get('/:domain/v3/translate/:lang/:format/:title/:revision?', v3, u, p, a, routes.v3Translate); var server; return new Promise(function(resolve, reject) { diff --git a/lib/api/apiUtils.js b/lib/api/apiUtils.js index e3d77e9..ad3f947 100644 --- a/lib/api/apiUtils.js +++ b/lib/api/apiUtils.js @@ -402,7 +402,7 @@ * @param {Request} req * @param {Response} res */ -apiUtils.redirectToOldid = function(req, res) { +apiUtils.redirectToOldid = function(req, res, isTranslate) { var opts = res.locals.opts; var env = res.locals.env; var metrics = env.conf.parsoid.metrics; @@ -418,7 +418,12 @@ format, encodeURIComponent(target), revid, - ].join('/'); + ]; + // HACK + if (isTranslate) { + path.splice(3, 1, 'translate', opts.lang); + } + path = path.join('/'); if (Object.keys(req.query).length > 0) { path += '?' + qs.stringify(req.query); } diff --git a/lib/api/routes.js b/lib/api/routes.js index 9a56651..c1e2ff3 100644 --- a/lib/api/routes.js +++ b/lib/api/routes.js @@ -14,6 +14,7 @@ var Promise = require('../utils/promise.js'); var LogData = require('../logger/LogData.js').LogData; var ApiRequest = require('../mw/ApiRequest.js'); +var Translator = require('../translate/Translator.js'); var TemplateRequest = ApiRequest.TemplateRequest; @@ -744,6 +745,47 @@ } return p.catch(apiUtils.timeoutResp.bind(null, env)); }; + // GET requests + routes.v3Translate = function(req, res) { + var opts = res.locals.opts; + var env = res.locals.env; + var lang = opts.lang = req.params.lang; + var oldid = res.locals.oldid; + var target = env.normalizeAndResolvePageTitle(); + // XXX explicit oldid is still going through this path! + if (!oldid) { + return TemplateRequest.setPageSrcInfo(env, target).then(function(){ + return apiUtils.redirectToOldid(req, res, 'translate'); + }); + } + return Promise.resolve().then(function() { + if (req.headers.cookie) { + // Don't cache requests with a session. + apiUtils.setHeader(res, env, 'Cache-Control', 'private,no-cache,s-maxage=0'); + } + return TemplateRequest.setPageSrcInfo(env, target, oldid); + }).then(function() { + apiUtils.setHeader(res, env, 'content-revision-id', oldid); + return env.getContentHandler().toHTML(env); + }).then(function(doc) { + return new Translator(env, lang).translate(doc); + }).then(function(doc) { + var html, pb; + if (env.pageBundle) { + var out = DU.extractDpAndSerialize(res.locals.bodyOnly ? doc.body : doc, { + innerXML: res.locals.bodyOnly, + }); + html = out.str; + pb = out.pb; + } else { + html = DU.toXML(res.locals.bodyOnly ? doc.body : doc, { + innerXML: res.locals.bodyOnly, + }); + } + apiUtils.wt2htmlRes(env, res, html, pb); + }).catch(apiUtils.timeoutResp.bind(null, env)); + }; + // POST requests routes.v3Post = function(req, res) { diff --git a/lib/translate/HRequest.js b/lib/translate/HRequest.js new file mode 100644 index 0000000..eec57d2 --- /dev/null +++ b/lib/translate/HRequest.js @@ -0,0 +1,64 @@ +'use strict'; +require('../../core-upgrade.js'); + +/** + * Requests to Hyptothes.is service. + */ +var Promise = require('../utils/promise.js'); +var request = require('request'); + +var HRequest = module.exports = function HRequest(env) { + this.env = env; + // XXX update user auth? + this.hypothesisBase = 'https://hypothes.is/api'; +} +HRequest.prototype.request = function(options) { + var env = this.env; + // Forward the request id + if (!options.headers) { options.headers = {}; } + options.headers['X-Request-ID'] = env.reqId; + // Set default options, forward cookie if set. + options.headers['User-Agent'] = env.conf.parsoid.userAgent; + options.headers.Connection = 'close'; + options.strictSSL = env.conf.parsoid.strictSSL; + // Pass API token + if (env.conf.parsoid.hypothesis_token) { + options.headers['Authorization'] = 'Bearer ' + + env.conf.parsoid.hypothesis_token; + } else { + env.log('warning/hypothesis', 'No hypothesis API token'); + } + // XXX pass the authentication information + return new Promise(function(resolve, reject) { + request(options, function cb(error, response, body) { + if (error) { + this.trace("Received error:", error); + this.env.log( + 'warning/hypothesis', + 'Failed hyptothes.is request', + { error: error, status: response && response.statusCode } + ); + reject(new Error("Hypothes.is failure")); + } else { + resolve({ status: response.statusCode, body: body }); + } + }); + }); +}; + +HRequest.prototype.search = function(args) { + var requestOptions = { + method: 'GET', + uri: this.hypothesisBase + '/search', + qs: args + }; + // XXX if limit is missing, should add a sensible limit, then + // call back into the API multiple times until all of the results + // are retrieved. + return this.request(requestOptions).then(function(r) { + if (r.status !== 200) { + throw new Error('Bad request: '+r.status); + } + return JSON.parse(r.body); + }); +}; diff --git a/lib/translate/Translator.js b/lib/translate/Translator.js new file mode 100644 index 0000000..f0e7f90 --- /dev/null +++ b/lib/translate/Translator.js @@ -0,0 +1,89 @@ +/** + * An experiment to implement translation using an annotation service. + */ +var HRequest = require('./HRequest.js'); +var url = require('url'); + +var Translator = module.exports = function Translator(env, lang) { + this.env = env; + this.lang = lang; + this.hrequest = new HRequest(env); +}; +var TP = Translator.prototype; + +var appendHead = function(document, content) { + if (typeof content === 'string') { + var el = document.createElement('head'), next; + el.innerHTML = content; + for (el = el.firstElementChild; el; el = next) { + next = el.nextElementSibling; + document.head.appendChild(el); + } + } else { + document.head.appendChild(content); + } +}; + +TP.translate = function(document) { + // Fetch the annotations for the document. + var env = this.env; + var lang = this.lang; + var hrequest = this.hrequest; + var revid = env.page.meta.revision.revid; + var canonicalURL = url.resolve( + 'http://', + env.conf.wiki.baseURI + 'Special:Redirect/revision/' + revid + ); + var pageURL = url.resolve( + 'http://', + env.conf.wiki.baseURI + env.page.name.split('/').map(encodeURIComponent).join('/') + ); + var link = document.createElement('link'); + link.setAttribute('rel', 'canonical'); + link.setAttribute('href', canonicalURL/*pageURL*/); + appendHead(document, link); + // Add hypothes.is scripts + appendHead( + document, + '<script type="application/json" class="js-hypothesis-config">' + + '{"showHighlights": false}' + + '</script>' + + '<script src="https://hypothes.is/embed.js" async></script>' + ); + return hrequest.search({ + limit: 1, + uri: pageURL, + tag: 'mw:translate' + }).then(function(results) { + var rows = results.rows || []; + if (rows.length === 0) { + // XXX no translations for this page. + appendHead( + document, + '<script>document.alert("No translations.");</script>' + ); + return document; + } + var last_translated = +rows[0].text; + if (last_translated !== +revid) { + // XXX provide interface to update annotations + appendHead( + document, + '<script>document.alert("There is a newer version of this document which has not been translated.");</script>' + ); + return document; + } + // Okay, translate! + return hrequest.search({ + uri: canonicalURL, + tag: 'mw:translate/'+lang, + }).then(function(results) { + console.log('GOT', JSON.stringify(results, null, 2)); + // XXX OK, apply these annotations! + var pre = document.createElement('pre'); + pre.textContent = JSON.stringify(results, null, 2); + document.body.appendChild(pre); + return document; + }); + }); +}; -- To view, visit https://gerrit.wikimedia.org/r/321005 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I86f1fbe5bceef93b2654ded636719550daf72010 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: C. Scott Ananian <canan...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits