jenkins-bot has submitted this change and it was merged. Change subject: ParserService middleware cleanup ......................................................................
ParserService middleware cleanup Moves getParserServiceEnv to an express middleware. Change-Id: I2bdc3aa5102d96e90515fe80b762f88bc8537175 --- M js/api/ParserService.js 1 file changed, 241 insertions(+), 248 deletions(-) Approvals: GWicke: Looks good to me, approved jenkins-bot: Verified diff --git a/js/api/ParserService.js b/js/api/ParserService.js index 6a1cfbe..d4a7851 100644 --- a/js/api/ParserService.js +++ b/js/api/ParserService.js @@ -369,26 +369,56 @@ res.end('</body></html>'); }); +function ParserError( msg, stack, code ) { + Error.call( this, msg ); + this.stack = stack; + this.code = code; +} -var getParserServiceEnv = function ( res, iwp, pageName, cb, req ) { - MWParserEnvironment.getParserEnv( parsoidConfig, null, iwp || '', pageName, - req.headers.cookie, function ( err, env ) { +function errorHandler( err, req, res, next ) { + if ( !(err instanceof ParserError) ) { + return next( err ); + } + + console.error( 'ERROR in ' + res.locals.iwp + ':' + res.locals.pageName + ':\n' + err.message ); + console.error( "Stack trace: " + err.stack ); + res.send( err.stack, err.code ); + + // Force a clean restart of this worker + process.exit( 1 ); +} + +app.use( errorHandler ); + +function defaultParams( req, res, next ) { + res.locals.iwp = parsoidConfig.defaultWiki || ''; + res.locals.pageName = req.params[0]; + next(); +} + +function interParams( req, res, next ) { + res.locals.iwp = req.params[0]; + res.locals.pageName = req.params[1]; + next(); +} + +function parserEnvMw( req, res, next ) { + MWParserEnvironment.getParserEnv( parsoidConfig, null, res.locals.iwp, res.locals.pageName, req.headers.cookie, function ( err, env ) { env.errCB = function ( e ) { - var errmsg = e.stack || e.toString(); - var code = e.code || 500; - console.error( 'ERROR in ' + iwp + ':' + pageName + ':\n' + e.message); - console.error("Stack trace: " + errmsg); - res.send( errmsg, code ); - // Force a clean restart of this worker - process.exit(1); + e = new ParserError( + e.message, + e.stack || e.toString(), + e.code || 500 + ); + next( e ); }; - if ( err === null ) { - cb( env ); - } else { - env.errCB( err ); + if ( err ) { + return env.errCb( err ); } - } ); -}; + res.locals.env = env; + next(); + }); +} // robots.txt: no indexing. app.get(/^\/robots.txt$/, function ( req, res ) { @@ -427,201 +457,172 @@ res.end( ); }); - // Form-based HTML DOM -> wikitext interface for manual testing -app.get(/\/_html\/(.*)/, function ( req, res ) { - var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - res.write( "Your HTML DOM:" ); - textarea( res ); - res.end(''); - }; +app.get(/\/_html\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) { + res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' ); + res.write( "Your HTML DOM:" ); + textarea( res ); + res.end(); +}); - getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); - -app.post(/\/_html\/(.*)/, function ( req, res ) { - var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - var doc = DU.parseHTML(req.body.content.replace(/\r/g, '')); - res.write('<pre style="background-color: #efefef">'); - // Always use the non-selective serializer for this mode - new WikitextSerializer({env: env}).serializeDOM( - doc.body, - function( c ) { - res.write( htmlSpecialChars( c ) ); - }, - function() { - res.write('</pre>'); - res.write( "<hr>Your HTML DOM:" ); - textarea( res, req.body.content.replace(/\r/g, '') ); - res.end(''); - } - ); - }; - - getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); +app.post(/\/_html\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) { + res.setHeader('Content-Type', 'text/html; charset=UTF-8'); + var doc = DU.parseHTML(req.body.content.replace(/\r/g, '')); + res.write('<pre style="background-color: #efefef">'); + // Always use the non-selective serializer for this mode + new WikitextSerializer({ env: res.locals.env }).serializeDOM( + doc.body, + function( c ) { + res.write( htmlSpecialChars( c ) ); + }, + function() { + res.write('</pre>'); + res.write( "<hr>Your HTML DOM:" ); + textarea( res, req.body.content.replace(/\r/g, '') ); + res.end(); + } + ); +}); // Form-based wikitext -> HTML DOM interface for manual testing -app.get(/\/_wikitext\/(.*)/, function ( req, res ) { - var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - res.write( "Your wikitext:" ); - textarea( res ); - res.end(''); - }; +app.get(/\/_wikitext\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) { + res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' ); + res.write( "Your wikitext:" ); + textarea( res ); + res.end(); +}); - getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); - -app.post(/\/_wikitext\/(.*)/, function ( req, res ) { - var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - var parser = Util.getParserPipeline(env, 'text/x-mediawiki/full'), - src = req.body.content.replace(/\r/g, ''); - parser.on('document', function ( document ) { - if (req.body.format==='html') { - res.write(DU.serializeNode(document)); - } else { - res.write('<pre style="white-space: pre-wrap; white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap; word-wrap: break-word;">'); - res.write(htmlSpecialChars(DU.serializeNode(document.body))); - res.write('</pre>'); - res.write('<hr/>'); - res.write(document.body.innerHTML); - res.write('<hr style="clear:both;"/>Your wikitext:'); - textarea( res, src ); - } - res.end(''); - }); - if (env.conf.parsoid.allowCORS) { - // allow cross-domain requests (CORS) so that parsoid service - // can be used by third-party sites - res.setHeader('Access-Control-Allow-Origin', - env.conf.parsoid.allowCORS); +app.post(/\/_wikitext\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) { + var env = res.locals.env; + res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' ); + var parser = Util.getParserPipeline(env, 'text/x-mediawiki/full'), + src = req.body.content.replace(/\r/g, ''); + parser.on('document', function ( document ) { + if (req.body.format==='html') { + res.write(DU.serializeNode(document)); + } else { + res.write('<pre style="white-space: pre-wrap; white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap; word-wrap: break-word;">'); + res.write(htmlSpecialChars(DU.serializeNode(document.body))); + res.write('</pre>'); + res.write('<hr/>'); + res.write(document.body.innerHTML); + res.write('<hr style="clear:both;"/>Your wikitext:'); + textarea( res, src ); } - try { - console.log('starting parsing of ' + req.params[0]); - // FIXME: This does not handle includes or templates correctly - env.setPageSrcInfo( src ); - parser.processToplevelDoc( src ); - } catch (e) { - res.setHeader('Content-Type', 'text/plain; charset=UTF-8'); - console.error( e.stack || e.toString() ); - res.send( e.stack || e.toString(), 500 ); - } - }; - - getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); + res.end(); + }); + if (env.conf.parsoid.allowCORS) { + // allow cross-domain requests (CORS) so that parsoid service + // can be used by third-party sites + res.setHeader('Access-Control-Allow-Origin', + env.conf.parsoid.allowCORS); + } + try { + console.log('starting parsing of ' + req.params[0]); + // FIXME: This does not handle includes or templates correctly + env.setPageSrcInfo( src ); + parser.processToplevelDoc( src ); + } catch (e) { + res.setHeader('Content-Type', 'text/plain; charset=UTF-8'); + console.error( e.stack || e.toString() ); + res.send( e.stack || e.toString(), 500 ); + } +}); // Round-trip article testing -app.get( new RegExp('/_rt/(' + getInterwikiRE() + ')/(.*)'), function(req, res) { - var cb = function ( env ) { - req.connection.setTimeout(300 * 1000); +app.get( new RegExp('/_rt/(' + getInterwikiRE() + ')/(.*)'), interParams, parserEnvMw, function(req, res) { + var env = res.locals.env; + req.connection.setTimeout(300 * 1000); - if ( env.page.name === 'favicon.ico' ) { - res.send( 'no favicon yet..', 404 ); - return; - } + if ( env.page.name === 'favicon.ico' ) { + res.send( 'no favicon yet..', 404 ); + return; + } - var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); + var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); - console.log('starting parsing of ' + target); - var oldid = null; - if ( req.query.oldid ) { - oldid = req.query.oldid; - } - var tpr = new TemplateRequest( env, target, oldid ); - tpr.once('src', parse.bind( tpr, env, req, res, roundTripDiff.bind( null, false ) )); - }; - - getParserServiceEnv( res, req.params[0], req.params[1], cb, req ); -} ); + console.log('starting parsing of ' + target); + var oldid = null; + if ( req.query.oldid ) { + oldid = req.query.oldid; + } + var tpr = new TemplateRequest( env, target, oldid ); + tpr.once('src', parse.bind( tpr, env, req, res, roundTripDiff.bind( null, false ) )); +}); // Round-trip article testing with newline stripping for editor-created HTML // simulation -app.get( new RegExp('/_rtve/(' + getInterwikiRE() + ')/(.*)') , function(req, res) { - var cb = function ( env ) { - if ( env.page.name === 'favicon.ico' ) { - res.send( 'no favicon yet..', 404 ); - return; - } +app.get( new RegExp('/_rtve/(' + getInterwikiRE() + ')/(.*)'), interParams, parserEnvMw, function(req, res) { + var env = res.locals.env; + if ( env.page.name === 'favicon.ico' ) { + res.send( 'no favicon yet..', 404 ); + return; + } - var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); + var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); - console.log('starting parsing of ' + target); - var oldid = null; - if ( req.query.oldid ) { - oldid = req.query.oldid; - } - var tpr = new TemplateRequest( env, target, oldid ), - cb = function ( req, res, src, document ) { - // strip newlines from the html - var html = document.innerHTML.replace(/[\r\n]/g, ''), - newDocument = DU.parseHTML(html); - roundTripDiff( false, req, res, src, newDocument ); - }; + console.log('starting parsing of ' + target); + var oldid = null; + if ( req.query.oldid ) { + oldid = req.query.oldid; + } + var tpr = new TemplateRequest( env, target, oldid ), + cb = function ( req, res, src, document ) { + // strip newlines from the html + var html = document.innerHTML.replace(/[\r\n]/g, ''), + newDocument = DU.parseHTML(html); + roundTripDiff( false, req, res, src, newDocument ); + }; - tpr.once('src', parse.bind( tpr, env, req, res, cb )); - }; - - getParserServiceEnv( res, req.params[0], req.params[1], cb, req ); + tpr.once('src', parse.bind( tpr, env, req, res, cb )); }); // Round-trip article testing with selser over re-parsed HTML. -app.get( new RegExp('/_rtselser/(' + getInterwikiRE() + ')/(.*)') , function (req, res) { - var envCb = function ( env ) { - if ( env.page.name === 'favicon.ico' ) { - res.send( 'no favicon yet..', 404 ); - return; - } +app.get( new RegExp('/_rtselser/(' + getInterwikiRE() + ')/(.*)'), interParams, parserEnvMw, function (req, res) { + var env = res.locals.env; + if ( env.page.name === 'favicon.ico' ) { + res.send( 'no favicon yet..', 404 ); + return; + } - var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); + var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); - console.log( 'starting parsing of ' + target ); - var oldid = null; - if ( req.query.oldid ) { - oldid = req.query.oldid; - } - var tpr = new TemplateRequest( env, target, oldid ), - tprCb = function ( req, res, src, document ) { - var newDocument = DU.parseHTML( DU.serializeNode(document) ); - roundTripDiff( true, req, res, src, newDocument ); - }; + console.log( 'starting parsing of ' + target ); + var oldid = null; + if ( req.query.oldid ) { + oldid = req.query.oldid; + } + var tpr = new TemplateRequest( env, target, oldid ), + tprCb = function ( req, res, src, document ) { + var newDocument = DU.parseHTML( DU.serializeNode(document) ); + roundTripDiff( true, req, res, src, newDocument ); + }; - tpr.once( 'src', parse.bind( tpr, env, req, res, tprCb ) ); - }; - - getParserServiceEnv( res, req.params[0], req.params[1], envCb ); + tpr.once( 'src', parse.bind( tpr, env, req, res, tprCb ) ); }); // Form-based round-tripping for manual testing -app.get(/\/_rtform\/(.*)/, function ( req, res ) { - var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - res.write( "Your wikitext:" ); - textarea( res ); - res.end(''); - }; - - getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); +app.get(/\/_rtform\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) { + res.setHeader('Content-Type', 'text/html; charset=UTF-8'); + res.write( "Your wikitext:" ); + textarea( res ); + res.end(); }); -app.post(/\/_rtform\/(.*)/, function ( req, res ) { - var cb = function ( env ) { - res.setHeader('Content-Type', 'text/html; charset=UTF-8'); - // we don't care about \r, and normalize everything to \n - parse( env, req, res, roundTripDiff.bind( null, false ), null, { - revision: { '*': req.body.content.replace(/\r/g, '') } - }); - }; - - getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb, req ); -} ); +app.post(/\/_rtform\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) { + var env = res.locals.env; + res.setHeader('Content-Type', 'text/html; charset=UTF-8'); + // we don't care about \r, and normalize everything to \n + parse( env, req, res, roundTripDiff.bind( null, false ), null, { + revision: { '*': req.body.content.replace(/\r/g, '') } + }); +}); // Regular article parsing -app.get(new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), function(req, res) { +app.get(new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), interParams, parserEnvMw, function(req, res) { + var env = res.locals.env; + var prefix = req.params[0]; + // TODO gwicke: re-enable this when actually using Varnish //if (/only-if-cached/.test(req.headers['cache-control'])) { // res.send( 'Clearly not cached since this request reached Parsoid. Please fix Varnish.', @@ -629,83 +630,75 @@ // return; //} - var cb = function ( env ) { - if ( env.page.name === 'favicon.ico' ) { - res.send( 'no favicon yet..', 404 ); - return; - } + if ( env.page.name === 'favicon.ico' ) { + res.send( 'no favicon yet..', 404 ); + return; + } - //console.log(req.headers); + //console.log(req.headers); - var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); + var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); - // Set the timeout to 900 seconds.. - req.connection.setTimeout(900 * 1000); + // Set the timeout to 900 seconds.. + req.connection.setTimeout(900 * 1000); - console.log('starting parsing of ' + prefix + ':' + target); - var oldid = null; - if ( req.query.oldid && !req.headers.cookie ) { - oldid = req.query.oldid; - res.setHeader('Cache-Control', 's-maxage=2592000'); - } else { - // Don't cache requests with a session or no oldid - res.setHeader('Cache-Control', 'private,no-cache,s-maxage=0'); - } - if (env.conf.parsoid.allowCORS) { - // allow cross-domain requests (CORS) so that parsoid service - // can be used by third-party sites - res.setHeader('Access-Control-Allow-Origin', - env.conf.parsoid.allowCORS); - } + console.log('starting parsing of ' + prefix + ':' + target); + var oldid = null; + if ( req.query.oldid && !req.headers.cookie ) { + oldid = req.query.oldid; + res.setHeader('Cache-Control', 's-maxage=2592000'); + } else { + // Don't cache requests with a session or no oldid + res.setHeader('Cache-Control', 'private,no-cache,s-maxage=0'); + } + if (env.conf.parsoid.allowCORS) { + // allow cross-domain requests (CORS) so that parsoid service + // can be used by third-party sites + res.setHeader('Access-Control-Allow-Origin', + env.conf.parsoid.allowCORS); + } - var tpr = new TemplateRequest( env, target, oldid ); - tpr.once('src', parse.bind( null, env, req, res, function ( req, res, src, doc ) { - var out = DU.serializeNode(doc.documentElement); - res.setHeader('X-Parsoid-Performance', env.getPerformanceHeader()); - res.end(out); - console.warn("completed parsing of " + prefix + - ':' + target + " in " + env.performance.duration + " ms"); - })); - }; - - var prefix = req.params[0]; - getParserServiceEnv( res, prefix, req.params[1], cb, req ); -} ); + var tpr = new TemplateRequest( env, target, oldid ); + tpr.once('src', parse.bind( null, env, req, res, function ( req, res, src, doc ) { + var out = DU.serializeNode(doc.documentElement); + res.setHeader('X-Parsoid-Performance', env.getPerformanceHeader()); + res.end(out); + console.warn("completed parsing of " + prefix + + ':' + target + " in " + env.performance.duration + " ms"); + })); +}); // Regular article serialization using POST -app.post( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), function ( req, res ) { - var cb = function ( env ) { - var doc, oldid = req.body.oldid || null; - env.page.id = oldid; +app.post( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), interParams, parserEnvMw, function ( req, res ) { + var env = res.locals.env; + var doc, oldid = req.body.oldid || null; + env.page.id = oldid; - res.setHeader('Content-Type', 'text/x-mediawiki; charset=UTF-8'); + res.setHeader('Content-Type', 'text/x-mediawiki; charset=UTF-8'); - try { - doc = DU.parseHTML(req.body.content); - } catch ( e ) { - console.log( 'There was an error in the HTML5 parser! Sending it back to the editor.' ); - env.errCB(e); - return; - } + try { + doc = DU.parseHTML(req.body.content); + } catch ( e ) { + console.log( 'There was an error in the HTML5 parser! Sending it back to the editor.' ); + env.errCB(e); + return; + } - try { - var out = []; - new Serializer( { env: env, oldid: env.page.id } ).serializeDOM( - doc.body, - function ( chunk ) { - out.push(chunk); - }, function () { - res.setHeader('X-Parsoid-Performance', env.getPerformanceHeader()); - res.write( out.join('') ); - res.end(''); - } ); - } catch ( e ) { - env.errCB( e ); - } - }; - - getParserServiceEnv( res, req.params[0], req.params[1], cb, req ); -} ); + try { + var out = []; + new Serializer( { env: env, oldid: env.page.id } ).serializeDOM( + doc.body, + function ( chunk ) { + out.push(chunk); + }, function () { + res.setHeader('X-Parsoid-Performance', env.getPerformanceHeader()); + res.write( out.join('') ); + res.end(''); + } ); + } catch ( e ) { + env.errCB( e ); + } +}); /** * Continuous integration end points -- To view, visit https://gerrit.wikimedia.org/r/91335 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2bdc3aa5102d96e90515fe80b762f88bc8537175 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Parsoid Gerrit-Branch: master Gerrit-Owner: Arlolra <abrea...@wikimedia.org> Gerrit-Reviewer: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits