jenkins-bot has submitted this change and it was merged. Change subject: Try to make sure env and old documents are freed early ......................................................................
Try to make sure env and old documents are freed early Based on heap dumps and memcheck diffs, I tried to make sure that the environment and all data it hangs onto (such as page.dom) are freed as early as possible, even if the HTTP connection is still in a lingering state. Also: * changed some document callbacks from on('document',..) to .once to break the link as early as possible for faster GC * removed old ci entry points in web service * removed old caching code in tokenizer Change-Id: Ic929b054441c786fa4d3fc0f0f9c74e2182471e6 --- M api/ParserService.js M lib/mediawiki.Util.js M lib/mediawiki.tokenizer.peg.js 3 files changed, 23 insertions(+), 101 deletions(-) Approvals: Subramanya Sastry: Looks good to me, approved jenkins-bot: Verified diff --git a/api/ParserService.js b/api/ParserService.js index e6b2826..2103962 100644 --- a/api/ParserService.js +++ b/api/ParserService.js @@ -20,6 +20,7 @@ // global includes var express = require('express'), domino = require('domino'), + // memwatch = require('memwatch'), jsDiff = require('diff'), childProc = require('child_process'), spawn = childProc.spawn, @@ -594,6 +595,7 @@ function html2wt( req, res, html ) { var env = res.locals.env; + res.locals.env = {}; env.page.id = req.body.oldid || null; var html2wtCb = function () { @@ -601,8 +603,9 @@ try { doc = DU.parseHTML( html.replace( /\r/g, '' ) ); } catch ( e ) { - console.log( 'There was an error in the HTML5 parser! Sending it back to the editor.' ); + console.log( 'There was an error in the HTML5 parser!' ); env.errCB( e ); + res.end(); return; } @@ -616,9 +619,11 @@ res.setHeader( 'Content-Type', 'text/x-mediawiki; charset=UTF-8' ); res.setHeader( 'X-Parsoid-Performance', env.getPerformanceHeader() ); res.end( out.join( '' ) ); + res.locals = {}; } ); } catch ( e ) { env.errCB( e ); + res.end(); } }; @@ -640,11 +645,12 @@ function wt2html( req, res, wt ) { var env = res.locals.env; + res.locals.env = {}; var prefix = res.locals.iwp; var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' ); - // Set the timeout to 900 seconds.. - req.connection.setTimeout( 900 * 1000 ); + // Set the timeout to 600 seconds.. + req.connection.setTimeout( 600 * 1000 ); console.log( 'starting parsing of ' + prefix + ':' + target ); @@ -666,7 +672,7 @@ } var parser = Util.getParserPipeline( env, 'text/x-mediawiki/full' ); - parser.on( 'document', function ( document ) { + parser.once( 'document', function ( document ) { // Don't cache requests when wt is set in case somebody uses // GET for wikitext parsing res.setHeader( 'Cache-Control', 'private,no-cache,s-maxage=0' ); @@ -686,6 +692,7 @@ parser.processToplevelDoc( wt ); } catch ( e ) { env.errCB( e, true ); + res.end(); return; } }; @@ -713,6 +720,7 @@ tmpCb = function ( err, src_and_metadata ) { if ( err ) { env.errCB( err, true ); + res.end(); return; } @@ -764,45 +772,6 @@ } }); - -/** - * Continuous integration end points - * - * No longer used currently, as our testing now happens on the central Jenkins - * server. - */ -app.get( /\/_ci\/refs\/changes\/(\d+)\/(\d+)\/(\d+)/, function ( req, res ) { - var gerritChange = 'refs/changes/' + req.params[0] + '/' + req.params[1] + '/' + req.params[2]; - var testSh = spawn( './testGerritChange.sh', [ gerritChange ], { - cwd: '.' - } ); - - res.setHeader('Content-Type', 'text/xml; charset=UTF-8'); - - testSh.stdout.on( 'data', function ( data ) { - res.write( data ); - } ); - - testSh.on( 'exit', function () { - res.end( '' ); - } ); -} ); - -app.get( /\/_ci\/master/, function ( req, res ) { - var testSh = spawn( './testGerritMaster.sh', [], { - cwd: '.' - } ); - - res.setHeader('Content-Type', 'text/xml; charset=UTF-8'); - - testSh.stdout.on( 'data', function ( data ) { - res.write( data ); - } ); - - testSh.on( 'exit', function () { - res.end( '' ); - } ); -} ); app.use( express.static( __dirname + '/scripts' ) ); app.use( express.limit( '15mb' ) ); diff --git a/lib/mediawiki.Util.js b/lib/mediawiki.Util.js index aa0a625..143c499 100644 --- a/lib/mediawiki.Util.js +++ b/lib/mediawiki.Util.js @@ -1113,8 +1113,11 @@ Util.diff = diff; // XXX gwicke: move to a Parser object? +var ParserPipelineFactory; Util.getParserPipeline = function ( env, type ) { - var ParserPipelineFactory = require( './mediawiki.parser.js' ).ParserPipelineFactory; + if (!ParserPipelineFactory) { + ParserPipelineFactory = require( './mediawiki.parser.js' ).ParserPipelineFactory; + } return ( new ParserPipelineFactory( env ) ).makePipeline( type ); }; @@ -1132,7 +1135,7 @@ // Now go ahead with the actual parsing var parser = Util.getParserPipeline( env, 'text/x-mediawiki/full' ); - parser.on( 'document', cb.bind( null, env, null ) ); + parser.once( 'document', cb.bind( null, env, null ) ); try { parser.processToplevelDoc( src ); } catch ( e ) { diff --git a/lib/mediawiki.tokenizer.peg.js b/lib/mediawiki.tokenizer.peg.js index 2e037fc..68df136 100644 --- a/lib/mediawiki.tokenizer.peg.js +++ b/lib/mediawiki.tokenizer.peg.js @@ -12,7 +12,6 @@ var PEG = require('pegjs'), path = require('path'), - LRU = require("lru-cache"), fs = require('fs'), events = require('events'), util = require('util'); @@ -22,10 +21,6 @@ this.env = env; this.options = options || {}; this.offsets = {}; - this.canCache = this.options.canCache; - if ( this.canCache ) { - this.cacheAccum = { chunks: [] }; - } } // Inherit from EventEmitter @@ -39,8 +34,8 @@ * tokenized enabling the tokenized chunks to be processed at * the earliest possible opportunity. */ -PegTokenizer.prototype.process = function( text, cacheKey ) { - this._processText(text, false, cacheKey); +PegTokenizer.prototype.process = function( text ) { + this._processText(text, false); }; /** @@ -54,8 +49,8 @@ /* * Process text synchronously -- the text is tokenized in one shot */ -PegTokenizer.prototype.processSync = function( text, cacheKey ) { - this._processText(text, true, cacheKey); +PegTokenizer.prototype.processSync = function( text ) { + this._processText(text, true); }; /* @@ -63,7 +58,7 @@ * Consumers are supposed to register with PegTokenizer before calling * process(). */ -PegTokenizer.prototype._processText = function( text, fullParse, cacheKey ) { +PegTokenizer.prototype._processText = function( text, fullParse ) { if ( !this.tokenizer ) { // Construct a singleton static tokenizer. var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' ); @@ -102,30 +97,6 @@ PegTokenizer.prototype.tokenizer = eval( tokenizerSource ); // alias the parse method this.tokenizer.tokenize = this.tokenizer.parse; - - // Also, while we are at it, create a tokenizer cache. - PegTokenizer.prototype.cache = LRU(25); - } - if ( this.canCache && cacheKey ) { - var maybeCached = this.cache.get(cacheKey); - if ( maybeCached ) { - try { - this.env.tp( 'tokenizer cache hit for ' + cacheKey ); - //console.warn( JSON.stringify( maybeCached, null, 2 ) ); - for ( var i = 0, l = maybeCached.length; i < l; i++ ) { - var cachedChunk = maybeCached[i]; - // emit a clone of this chunk - this.emit('chunk', cachedChunk ); - } - this.emit('end'); - return; - } catch ( e ) { - this.env.errCB(e); - } - - } else { - this.cacheAccum.key = cacheKey; - } } // Some input normalization: force a trailing newline @@ -133,12 +104,7 @@ // text += "\n"; //} - var chunkCB; - if ( this.canCache && cacheKey ) { - chunkCB = this.onCacheChunk.bind( this ); - } else { - chunkCB = this.emit.bind( this, 'chunk' ); - } + var chunkCB = this.emit.bind( this, 'chunk' ); // Kick it off! var srcOffset = this.offsets.startOffset || 0; @@ -187,24 +153,8 @@ } }; -PegTokenizer.prototype.onCacheChunk = function ( chunk ) { - // make a deep copy of the chunk for now - this.cacheAccum.chunks.push( chunk.slice() ); - //console.warn( 'onCacheChunk: ' + this.cacheAccum.key + JSON.stringify( chunk, null, 2 ) ); - try { - this.emit('chunk', chunk); - } catch ( e ) { - this.env.errCB(e); - } -}; PegTokenizer.prototype.onEnd = function ( ) { - if ( this.canCache && this.cacheAccum.key) { - this.cache.set(this.cacheAccum.key, this.cacheAccum.chunks); - // reset cacheAccum - this.cacheAccum = { chunks: [] }; - } - // Reset source offsets this.offsets.startOffset = undefined; this.offsets.endOffset = undefined; -- To view, visit https://gerrit.wikimedia.org/r/103756 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ic929b054441c786fa4d3fc0f0f9c74e2182471e6 Gerrit-PatchSet: 6 Gerrit-Project: mediawiki/services/parsoid Gerrit-Branch: master Gerrit-Owner: GWicke <gwi...@wikimedia.org> Gerrit-Reviewer: Cscott <canan...@wikimedia.org> Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits