jenkins-bot has submitted this change and it was merged.
Change subject: ParserService middleware cleanup
......................................................................
ParserService middleware cleanup
Moves getParserServiceEnv to an express middleware.
Change-Id: I2bdc3aa5102d96e90515fe80b762f88bc8537175
---
M js/api/ParserService.js
1 file changed, 241 insertions(+), 248 deletions(-)
Approvals:
GWicke: Looks good to me, approved
jenkins-bot: Verified
diff --git a/js/api/ParserService.js b/js/api/ParserService.js
index 6a1cfbe..d4a7851 100644
--- a/js/api/ParserService.js
+++ b/js/api/ParserService.js
@@ -369,26 +369,56 @@
res.end('</body></html>');
});
+function ParserError( msg, stack, code ) {
+ Error.call( this, msg );
+ this.stack = stack;
+ this.code = code;
+}
-var getParserServiceEnv = function ( res, iwp, pageName, cb, req ) {
- MWParserEnvironment.getParserEnv( parsoidConfig, null, iwp || '',
pageName,
- req.headers.cookie, function ( err, env ) {
+function errorHandler( err, req, res, next ) {
+ if ( !(err instanceof ParserError) ) {
+ return next( err );
+ }
+
+ console.error( 'ERROR in ' + res.locals.iwp + ':' + res.locals.pageName
+ ':\n' + err.message );
+ console.error( "Stack trace: " + err.stack );
+ res.send( err.stack, err.code );
+
+ // Force a clean restart of this worker
+ process.exit( 1 );
+}
+
+app.use( errorHandler );
+
+function defaultParams( req, res, next ) {
+ res.locals.iwp = parsoidConfig.defaultWiki || '';
+ res.locals.pageName = req.params[0];
+ next();
+}
+
+function interParams( req, res, next ) {
+ res.locals.iwp = req.params[0];
+ res.locals.pageName = req.params[1];
+ next();
+}
+
+function parserEnvMw( req, res, next ) {
+ MWParserEnvironment.getParserEnv( parsoidConfig, null, res.locals.iwp,
res.locals.pageName, req.headers.cookie, function ( err, env ) {
env.errCB = function ( e ) {
- var errmsg = e.stack || e.toString();
- var code = e.code || 500;
- console.error( 'ERROR in ' + iwp + ':' + pageName +
':\n' + e.message);
- console.error("Stack trace: " + errmsg);
- res.send( errmsg, code );
- // Force a clean restart of this worker
- process.exit(1);
+ e = new ParserError(
+ e.message,
+ e.stack || e.toString(),
+ e.code || 500
+ );
+ next( e );
};
- if ( err === null ) {
- cb( env );
- } else {
- env.errCB( err );
+ if ( err ) {
+ return env.errCb( err );
}
- } );
-};
+ res.locals.env = env;
+ next();
+ });
+}
// robots.txt: no indexing.
app.get(/^\/robots.txt$/, function ( req, res ) {
@@ -427,201 +457,172 @@
res.end( );
});
-
// Form-based HTML DOM -> wikitext interface for manual testing
-app.get(/\/_html\/(.*)/, function ( req, res ) {
- var cb = function ( env ) {
- res.setHeader('Content-Type', 'text/html; charset=UTF-8');
- res.write( "Your HTML DOM:" );
- textarea( res );
- res.end('');
- };
+app.get(/\/_html\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) {
+ res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' );
+ res.write( "Your HTML DOM:" );
+ textarea( res );
+ res.end();
+});
- getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb,
req );
-} );
-
-app.post(/\/_html\/(.*)/, function ( req, res ) {
- var cb = function ( env ) {
- res.setHeader('Content-Type', 'text/html; charset=UTF-8');
- var doc = DU.parseHTML(req.body.content.replace(/\r/g, ''));
- res.write('<pre style="background-color: #efefef">');
- // Always use the non-selective serializer for this mode
- new WikitextSerializer({env: env}).serializeDOM(
- doc.body,
- function( c ) {
- res.write( htmlSpecialChars( c ) );
- },
- function() {
- res.write('</pre>');
- res.write( "<hr>Your HTML DOM:" );
- textarea( res, req.body.content.replace(/\r/g,
'') );
- res.end('');
- }
- );
- };
-
- getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb,
req );
-} );
+app.post(/\/_html\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) {
+ res.setHeader('Content-Type', 'text/html; charset=UTF-8');
+ var doc = DU.parseHTML(req.body.content.replace(/\r/g, ''));
+ res.write('<pre style="background-color: #efefef">');
+ // Always use the non-selective serializer for this mode
+ new WikitextSerializer({ env: res.locals.env }).serializeDOM(
+ doc.body,
+ function( c ) {
+ res.write( htmlSpecialChars( c ) );
+ },
+ function() {
+ res.write('</pre>');
+ res.write( "<hr>Your HTML DOM:" );
+ textarea( res, req.body.content.replace(/\r/g, '') );
+ res.end();
+ }
+ );
+});
// Form-based wikitext -> HTML DOM interface for manual testing
-app.get(/\/_wikitext\/(.*)/, function ( req, res ) {
- var cb = function ( env ) {
- res.setHeader('Content-Type', 'text/html; charset=UTF-8');
- res.write( "Your wikitext:" );
- textarea( res );
- res.end('');
- };
+app.get(/\/_wikitext\/(.*)/, defaultParams, parserEnvMw, function ( req, res )
{
+ res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' );
+ res.write( "Your wikitext:" );
+ textarea( res );
+ res.end();
+});
- getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb,
req );
-} );
-
-app.post(/\/_wikitext\/(.*)/, function ( req, res ) {
- var cb = function ( env ) {
- res.setHeader('Content-Type', 'text/html; charset=UTF-8');
- var parser = Util.getParserPipeline(env,
'text/x-mediawiki/full'),
- src = req.body.content.replace(/\r/g, '');
- parser.on('document', function ( document ) {
- if (req.body.format==='html') {
- res.write(DU.serializeNode(document));
- } else {
- res.write('<pre style="white-space: pre-wrap;
white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap;
word-wrap: break-word;">');
-
res.write(htmlSpecialChars(DU.serializeNode(document.body)));
- res.write('</pre>');
- res.write('<hr/>');
- res.write(document.body.innerHTML);
- res.write('<hr style="clear:both;"/>Your
wikitext:');
- textarea( res, src );
- }
- res.end('');
- });
- if (env.conf.parsoid.allowCORS) {
- // allow cross-domain requests (CORS) so that parsoid
service
- // can be used by third-party sites
- res.setHeader('Access-Control-Allow-Origin',
- env.conf.parsoid.allowCORS);
+app.post(/\/_wikitext\/(.*)/, defaultParams, parserEnvMw, function ( req, res
) {
+ var env = res.locals.env;
+ res.setHeader( 'Content-Type', 'text/html; charset=UTF-8' );
+ var parser = Util.getParserPipeline(env, 'text/x-mediawiki/full'),
+ src = req.body.content.replace(/\r/g, '');
+ parser.on('document', function ( document ) {
+ if (req.body.format==='html') {
+ res.write(DU.serializeNode(document));
+ } else {
+ res.write('<pre style="white-space: pre-wrap;
white-space: -moz-pre-wrap; white-space: -pre-wrap; white-space: -o-pre-wrap;
word-wrap: break-word;">');
+
res.write(htmlSpecialChars(DU.serializeNode(document.body)));
+ res.write('</pre>');
+ res.write('<hr/>');
+ res.write(document.body.innerHTML);
+ res.write('<hr style="clear:both;"/>Your wikitext:');
+ textarea( res, src );
}
- try {
- console.log('starting parsing of ' + req.params[0]);
- // FIXME: This does not handle includes or templates
correctly
- env.setPageSrcInfo( src );
- parser.processToplevelDoc( src );
- } catch (e) {
- res.setHeader('Content-Type', 'text/plain;
charset=UTF-8');
- console.error( e.stack || e.toString() );
- res.send( e.stack || e.toString(), 500 );
- }
- };
-
- getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb,
req );
-} );
+ res.end();
+ });
+ if (env.conf.parsoid.allowCORS) {
+ // allow cross-domain requests (CORS) so that parsoid service
+ // can be used by third-party sites
+ res.setHeader('Access-Control-Allow-Origin',
+ env.conf.parsoid.allowCORS);
+ }
+ try {
+ console.log('starting parsing of ' + req.params[0]);
+ // FIXME: This does not handle includes or templates correctly
+ env.setPageSrcInfo( src );
+ parser.processToplevelDoc( src );
+ } catch (e) {
+ res.setHeader('Content-Type', 'text/plain; charset=UTF-8');
+ console.error( e.stack || e.toString() );
+ res.send( e.stack || e.toString(), 500 );
+ }
+});
// Round-trip article testing
-app.get( new RegExp('/_rt/(' + getInterwikiRE() + ')/(.*)'), function(req,
res) {
- var cb = function ( env ) {
- req.connection.setTimeout(300 * 1000);
+app.get( new RegExp('/_rt/(' + getInterwikiRE() + ')/(.*)'), interParams,
parserEnvMw, function(req, res) {
+ var env = res.locals.env;
+ req.connection.setTimeout(300 * 1000);
- if ( env.page.name === 'favicon.ico' ) {
- res.send( 'no favicon yet..', 404 );
- return;
- }
+ if ( env.page.name === 'favicon.ico' ) {
+ res.send( 'no favicon yet..', 404 );
+ return;
+ }
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
+ var target = env.resolveTitle( env.normalizeTitle( env.page.name ), ''
);
- console.log('starting parsing of ' + target);
- var oldid = null;
- if ( req.query.oldid ) {
- oldid = req.query.oldid;
- }
- var tpr = new TemplateRequest( env, target, oldid );
- tpr.once('src', parse.bind( tpr, env, req, res,
roundTripDiff.bind( null, false ) ));
- };
-
- getParserServiceEnv( res, req.params[0], req.params[1], cb, req );
-} );
+ console.log('starting parsing of ' + target);
+ var oldid = null;
+ if ( req.query.oldid ) {
+ oldid = req.query.oldid;
+ }
+ var tpr = new TemplateRequest( env, target, oldid );
+ tpr.once('src', parse.bind( tpr, env, req, res, roundTripDiff.bind(
null, false ) ));
+});
// Round-trip article testing with newline stripping for editor-created HTML
// simulation
-app.get( new RegExp('/_rtve/(' + getInterwikiRE() + ')/(.*)') , function(req,
res) {
- var cb = function ( env ) {
- if ( env.page.name === 'favicon.ico' ) {
- res.send( 'no favicon yet..', 404 );
- return;
- }
+app.get( new RegExp('/_rtve/(' + getInterwikiRE() + ')/(.*)'), interParams,
parserEnvMw, function(req, res) {
+ var env = res.locals.env;
+ if ( env.page.name === 'favicon.ico' ) {
+ res.send( 'no favicon yet..', 404 );
+ return;
+ }
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
+ var target = env.resolveTitle( env.normalizeTitle( env.page.name ), ''
);
- console.log('starting parsing of ' + target);
- var oldid = null;
- if ( req.query.oldid ) {
- oldid = req.query.oldid;
- }
- var tpr = new TemplateRequest( env, target, oldid ),
- cb = function ( req, res, src, document ) {
- // strip newlines from the html
- var html =
document.innerHTML.replace(/[\r\n]/g, ''),
- newDocument = DU.parseHTML(html);
- roundTripDiff( false, req, res, src,
newDocument );
- };
+ console.log('starting parsing of ' + target);
+ var oldid = null;
+ if ( req.query.oldid ) {
+ oldid = req.query.oldid;
+ }
+ var tpr = new TemplateRequest( env, target, oldid ),
+ cb = function ( req, res, src, document ) {
+ // strip newlines from the html
+ var html = document.innerHTML.replace(/[\r\n]/g, ''),
+ newDocument = DU.parseHTML(html);
+ roundTripDiff( false, req, res, src, newDocument );
+ };
- tpr.once('src', parse.bind( tpr, env, req, res, cb ));
- };
-
- getParserServiceEnv( res, req.params[0], req.params[1], cb, req );
+ tpr.once('src', parse.bind( tpr, env, req, res, cb ));
});
// Round-trip article testing with selser over re-parsed HTML.
-app.get( new RegExp('/_rtselser/(' + getInterwikiRE() + ')/(.*)') , function
(req, res) {
- var envCb = function ( env ) {
- if ( env.page.name === 'favicon.ico' ) {
- res.send( 'no favicon yet..', 404 );
- return;
- }
+app.get( new RegExp('/_rtselser/(' + getInterwikiRE() + ')/(.*)'),
interParams, parserEnvMw, function (req, res) {
+ var env = res.locals.env;
+ if ( env.page.name === 'favicon.ico' ) {
+ res.send( 'no favicon yet..', 404 );
+ return;
+ }
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
+ var target = env.resolveTitle( env.normalizeTitle( env.page.name ), ''
);
- console.log( 'starting parsing of ' + target );
- var oldid = null;
- if ( req.query.oldid ) {
- oldid = req.query.oldid;
- }
- var tpr = new TemplateRequest( env, target, oldid ),
- tprCb = function ( req, res, src, document ) {
- var newDocument = DU.parseHTML(
DU.serializeNode(document) );
- roundTripDiff( true, req, res, src, newDocument
);
- };
+ console.log( 'starting parsing of ' + target );
+ var oldid = null;
+ if ( req.query.oldid ) {
+ oldid = req.query.oldid;
+ }
+ var tpr = new TemplateRequest( env, target, oldid ),
+ tprCb = function ( req, res, src, document ) {
+ var newDocument = DU.parseHTML(
DU.serializeNode(document) );
+ roundTripDiff( true, req, res, src, newDocument );
+ };
- tpr.once( 'src', parse.bind( tpr, env, req, res, tprCb ) );
- };
-
- getParserServiceEnv( res, req.params[0], req.params[1], envCb );
+ tpr.once( 'src', parse.bind( tpr, env, req, res, tprCb ) );
});
// Form-based round-tripping for manual testing
-app.get(/\/_rtform\/(.*)/, function ( req, res ) {
- var cb = function ( env ) {
- res.setHeader('Content-Type', 'text/html; charset=UTF-8');
- res.write( "Your wikitext:" );
- textarea( res );
- res.end('');
- };
-
- getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb,
req );
+app.get(/\/_rtform\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) {
+ res.setHeader('Content-Type', 'text/html; charset=UTF-8');
+ res.write( "Your wikitext:" );
+ textarea( res );
+ res.end();
});
-app.post(/\/_rtform\/(.*)/, function ( req, res ) {
- var cb = function ( env ) {
- res.setHeader('Content-Type', 'text/html; charset=UTF-8');
- // we don't care about \r, and normalize everything to \n
- parse( env, req, res, roundTripDiff.bind( null, false ), null, {
- revision: { '*': req.body.content.replace(/\r/g, '') }
- });
- };
-
- getParserServiceEnv( res, parsoidConfig.defaultWiki, req.params[0], cb,
req );
-} );
+app.post(/\/_rtform\/(.*)/, defaultParams, parserEnvMw, function ( req, res ) {
+ var env = res.locals.env;
+ res.setHeader('Content-Type', 'text/html; charset=UTF-8');
+ // we don't care about \r, and normalize everything to \n
+ parse( env, req, res, roundTripDiff.bind( null, false ), null, {
+ revision: { '*': req.body.content.replace(/\r/g, '') }
+ });
+});
// Regular article parsing
-app.get(new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), function(req, res) {
+app.get(new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), interParams,
parserEnvMw, function(req, res) {
+ var env = res.locals.env;
+ var prefix = req.params[0];
+
// TODO gwicke: re-enable this when actually using Varnish
//if (/only-if-cached/.test(req.headers['cache-control'])) {
// res.send( 'Clearly not cached since this request reached
Parsoid. Please fix Varnish.',
@@ -629,83 +630,75 @@
// return;
//}
- var cb = function ( env ) {
- if ( env.page.name === 'favicon.ico' ) {
- res.send( 'no favicon yet..', 404 );
- return;
- }
+ if ( env.page.name === 'favicon.ico' ) {
+ res.send( 'no favicon yet..', 404 );
+ return;
+ }
- //console.log(req.headers);
+ //console.log(req.headers);
- var target = env.resolveTitle( env.normalizeTitle(
env.page.name ), '' );
+ var target = env.resolveTitle( env.normalizeTitle( env.page.name ), ''
);
- // Set the timeout to 900 seconds..
- req.connection.setTimeout(900 * 1000);
+ // Set the timeout to 900 seconds..
+ req.connection.setTimeout(900 * 1000);
- console.log('starting parsing of ' + prefix + ':' + target);
- var oldid = null;
- if ( req.query.oldid && !req.headers.cookie ) {
- oldid = req.query.oldid;
- res.setHeader('Cache-Control', 's-maxage=2592000');
- } else {
- // Don't cache requests with a session or no oldid
- res.setHeader('Cache-Control',
'private,no-cache,s-maxage=0');
- }
- if (env.conf.parsoid.allowCORS) {
- // allow cross-domain requests (CORS) so that parsoid
service
- // can be used by third-party sites
- res.setHeader('Access-Control-Allow-Origin',
- env.conf.parsoid.allowCORS);
- }
+ console.log('starting parsing of ' + prefix + ':' + target);
+ var oldid = null;
+ if ( req.query.oldid && !req.headers.cookie ) {
+ oldid = req.query.oldid;
+ res.setHeader('Cache-Control', 's-maxage=2592000');
+ } else {
+ // Don't cache requests with a session or no oldid
+ res.setHeader('Cache-Control', 'private,no-cache,s-maxage=0');
+ }
+ if (env.conf.parsoid.allowCORS) {
+ // allow cross-domain requests (CORS) so that parsoid service
+ // can be used by third-party sites
+ res.setHeader('Access-Control-Allow-Origin',
+ env.conf.parsoid.allowCORS);
+ }
- var tpr = new TemplateRequest( env, target, oldid );
- tpr.once('src', parse.bind( null, env, req, res, function (
req, res, src, doc ) {
- var out = DU.serializeNode(doc.documentElement);
- res.setHeader('X-Parsoid-Performance',
env.getPerformanceHeader());
- res.end(out);
- console.warn("completed parsing of " + prefix +
- ':' + target + " in " +
env.performance.duration + " ms");
- }));
- };
-
- var prefix = req.params[0];
- getParserServiceEnv( res, prefix, req.params[1], cb, req );
-} );
+ var tpr = new TemplateRequest( env, target, oldid );
+ tpr.once('src', parse.bind( null, env, req, res, function ( req, res,
src, doc ) {
+ var out = DU.serializeNode(doc.documentElement);
+ res.setHeader('X-Parsoid-Performance',
env.getPerformanceHeader());
+ res.end(out);
+ console.warn("completed parsing of " + prefix +
+ ':' + target + " in " + env.performance.duration + "
ms");
+ }));
+});
// Regular article serialization using POST
-app.post( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), function ( req,
res ) {
- var cb = function ( env ) {
- var doc, oldid = req.body.oldid || null;
- env.page.id = oldid;
+app.post( new RegExp( '/(' + getInterwikiRE() + ')/(.*)' ), interParams,
parserEnvMw, function ( req, res ) {
+ var env = res.locals.env;
+ var doc, oldid = req.body.oldid || null;
+ env.page.id = oldid;
- res.setHeader('Content-Type', 'text/x-mediawiki;
charset=UTF-8');
+ res.setHeader('Content-Type', 'text/x-mediawiki; charset=UTF-8');
- try {
- doc = DU.parseHTML(req.body.content);
- } catch ( e ) {
- console.log( 'There was an error in the HTML5 parser!
Sending it back to the editor.' );
- env.errCB(e);
- return;
- }
+ try {
+ doc = DU.parseHTML(req.body.content);
+ } catch ( e ) {
+ console.log( 'There was an error in the HTML5 parser! Sending
it back to the editor.' );
+ env.errCB(e);
+ return;
+ }
- try {
- var out = [];
- new Serializer( { env: env, oldid: env.page.id }
).serializeDOM(
- doc.body,
- function ( chunk ) {
- out.push(chunk);
- }, function () {
- res.setHeader('X-Parsoid-Performance',
env.getPerformanceHeader());
- res.write( out.join('') );
- res.end('');
- } );
- } catch ( e ) {
- env.errCB( e );
- }
- };
-
- getParserServiceEnv( res, req.params[0], req.params[1], cb, req );
-} );
+ try {
+ var out = [];
+ new Serializer( { env: env, oldid: env.page.id } ).serializeDOM(
+ doc.body,
+ function ( chunk ) {
+ out.push(chunk);
+ }, function () {
+ res.setHeader('X-Parsoid-Performance',
env.getPerformanceHeader());
+ res.write( out.join('') );
+ res.end('');
+ } );
+ } catch ( e ) {
+ env.errCB( e );
+ }
+});
/**
* Continuous integration end points
--
To view, visit https://gerrit.wikimedia.org/r/91335
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I2bdc3aa5102d96e90515fe80b762f88bc8537175
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Arlolra <[email protected]>
Gerrit-Reviewer: GWicke <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits