jenkins-bot has submitted this change and it was merged.

Change subject: Try to make sure env and old documents are freed early
......................................................................


Try to make sure env and old documents are freed early

Based on heap dumps and memcheck diffs, I tried to make sure that the
environment and all data it hangs onto (such as page.dom) are freed as early
as possible, even if the HTTP connection is still in a lingering state. Also:

* changed some document callbacks from on('document',..) to .once to break the
  link as early as possible for faster GC

* removed old ci entry points in web service

* removed old caching code in tokenizer

Change-Id: Ic929b054441c786fa4d3fc0f0f9c74e2182471e6
---
M api/ParserService.js
M lib/mediawiki.Util.js
M lib/mediawiki.tokenizer.peg.js
3 files changed, 23 insertions(+), 101 deletions(-)

Approvals:
  Subramanya Sastry: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/api/ParserService.js b/api/ParserService.js
index e6b2826..2103962 100644
--- a/api/ParserService.js
+++ b/api/ParserService.js
@@ -20,6 +20,7 @@
 // global includes
 var express = require('express'),
        domino = require('domino'),
+       // memwatch = require('memwatch'),
        jsDiff = require('diff'),
        childProc = require('child_process'),
        spawn = childProc.spawn,
@@ -594,6 +595,7 @@
 
 function html2wt( req, res, html ) {
        var env = res.locals.env;
+       res.locals.env = {};
        env.page.id = req.body.oldid || null;
 
        var html2wtCb = function () {
@@ -601,8 +603,9 @@
                try {
                        doc = DU.parseHTML( html.replace( /\r/g, '' ) );
                } catch ( e ) {
-                       console.log( 'There was an error in the HTML5 parser! 
Sending it back to the editor.' );
+                       console.log( 'There was an error in the HTML5 parser!' 
);
                        env.errCB( e );
+                       res.end();
                        return;
                }
 
@@ -616,9 +619,11 @@
                                        res.setHeader( 'Content-Type', 
'text/x-mediawiki; charset=UTF-8' );
                                        res.setHeader( 'X-Parsoid-Performance', 
env.getPerformanceHeader() );
                                        res.end( out.join( '' ) );
+                                       res.locals = {};
                                } );
                } catch ( e ) {
                        env.errCB( e );
+                       res.end();
                }
        };
 
@@ -640,11 +645,12 @@
 
 function wt2html( req, res, wt ) {
        var env = res.locals.env;
+       res.locals.env = {};
        var prefix = res.locals.iwp;
        var target = env.resolveTitle( env.normalizeTitle( env.page.name ), '' 
);
 
-       // Set the timeout to 900 seconds..
-       req.connection.setTimeout( 900 * 1000 );
+       // Set the timeout to 600 seconds..
+       req.connection.setTimeout( 600 * 1000 );
 
        console.log( 'starting parsing of ' + prefix + ':' + target );
 
@@ -666,7 +672,7 @@
                }
 
                var parser = Util.getParserPipeline( env, 
'text/x-mediawiki/full' );
-               parser.on( 'document', function ( document ) {
+               parser.once( 'document', function ( document ) {
                        // Don't cache requests when wt is set in case somebody 
uses
                        // GET for wikitext parsing
                        res.setHeader( 'Cache-Control', 
'private,no-cache,s-maxage=0' );
@@ -686,6 +692,7 @@
                                parser.processToplevelDoc( wt );
                        } catch ( e ) {
                                env.errCB( e, true );
+                               res.end();
                                return;
                        }
                };
@@ -713,6 +720,7 @@
                        tmpCb = function ( err, src_and_metadata ) {
                                if ( err ) {
                                        env.errCB( err, true );
+                                       res.end();
                                        return;
                                }
 
@@ -764,45 +772,6 @@
        }
 });
 
-
-/**
- * Continuous integration end points
- *
- * No longer used currently, as our testing now happens on the central Jenkins
- * server.
- */
-app.get( /\/_ci\/refs\/changes\/(\d+)\/(\d+)\/(\d+)/, function ( req, res ) {
-       var gerritChange = 'refs/changes/' + req.params[0] + '/' + 
req.params[1] + '/' + req.params[2];
-       var testSh = spawn( './testGerritChange.sh', [ gerritChange ], {
-               cwd: '.'
-       } );
-
-       res.setHeader('Content-Type', 'text/xml; charset=UTF-8');
-
-       testSh.stdout.on( 'data', function ( data ) {
-               res.write( data );
-       } );
-
-       testSh.on( 'exit', function () {
-               res.end( '' );
-       } );
-} );
-
-app.get( /\/_ci\/master/, function ( req, res ) {
-       var testSh = spawn( './testGerritMaster.sh', [], {
-               cwd: '.'
-       } );
-
-       res.setHeader('Content-Type', 'text/xml; charset=UTF-8');
-
-       testSh.stdout.on( 'data', function ( data ) {
-               res.write( data );
-       } );
-
-       testSh.on( 'exit', function () {
-               res.end( '' );
-       } );
-} );
 
 app.use( express.static( __dirname + '/scripts' ) );
 app.use( express.limit( '15mb' ) );
diff --git a/lib/mediawiki.Util.js b/lib/mediawiki.Util.js
index aa0a625..143c499 100644
--- a/lib/mediawiki.Util.js
+++ b/lib/mediawiki.Util.js
@@ -1113,8 +1113,11 @@
 Util.diff = diff;
 
 // XXX gwicke: move to a Parser object?
+var ParserPipelineFactory;
 Util.getParserPipeline = function ( env, type ) {
-       var ParserPipelineFactory = require( './mediawiki.parser.js' 
).ParserPipelineFactory;
+       if (!ParserPipelineFactory) {
+               ParserPipelineFactory = require( './mediawiki.parser.js' 
).ParserPipelineFactory;
+       }
        return ( new ParserPipelineFactory( env ) ).makePipeline( type );
 };
 
@@ -1132,7 +1135,7 @@
 
                // Now go ahead with the actual parsing
                var parser = Util.getParserPipeline( env, 
'text/x-mediawiki/full' );
-               parser.on( 'document', cb.bind( null, env, null ) );
+               parser.once( 'document', cb.bind( null, env, null ) );
                try {
                        parser.processToplevelDoc( src );
                } catch ( e ) {
diff --git a/lib/mediawiki.tokenizer.peg.js b/lib/mediawiki.tokenizer.peg.js
index 2e037fc..68df136 100644
--- a/lib/mediawiki.tokenizer.peg.js
+++ b/lib/mediawiki.tokenizer.peg.js
@@ -12,7 +12,6 @@
 
 var PEG = require('pegjs'),
        path = require('path'),
-       LRU = require("lru-cache"),
        fs = require('fs'),
        events = require('events'),
        util = require('util');
@@ -22,10 +21,6 @@
        this.env = env;
        this.options = options || {};
        this.offsets = {};
-       this.canCache = this.options.canCache;
-       if ( this.canCache ) {
-               this.cacheAccum = { chunks: [] };
-       }
 }
 
 // Inherit from EventEmitter
@@ -39,8 +34,8 @@
  * tokenized enabling the tokenized chunks to be processed at
  * the earliest possible opportunity.
  */
-PegTokenizer.prototype.process = function( text, cacheKey ) {
-       this._processText(text, false, cacheKey);
+PegTokenizer.prototype.process = function( text ) {
+       this._processText(text, false);
 };
 
 /**
@@ -54,8 +49,8 @@
 /*
  * Process text synchronously -- the text is tokenized in one shot
  */
-PegTokenizer.prototype.processSync = function( text, cacheKey ) {
-       this._processText(text, true, cacheKey);
+PegTokenizer.prototype.processSync = function( text ) {
+       this._processText(text, true);
 };
 
 /*
@@ -63,7 +58,7 @@
  * Consumers are supposed to register with PegTokenizer before calling
  * process().
  */
-PegTokenizer.prototype._processText = function( text, fullParse, cacheKey ) {
+PegTokenizer.prototype._processText = function( text, fullParse ) {
        if ( !this.tokenizer ) {
                // Construct a singleton static tokenizer.
                var pegSrcPath = path.join( __dirname, 'pegTokenizer.pegjs.txt' 
);
@@ -102,30 +97,6 @@
                PegTokenizer.prototype.tokenizer = eval( tokenizerSource );
                // alias the parse method
                this.tokenizer.tokenize = this.tokenizer.parse;
-
-               // Also, while we are at it, create a tokenizer cache.
-               PegTokenizer.prototype.cache = LRU(25);
-       }
-       if ( this.canCache && cacheKey ) {
-               var maybeCached = this.cache.get(cacheKey);
-               if ( maybeCached ) {
-                       try {
-                               this.env.tp( 'tokenizer cache hit for ' + 
cacheKey );
-                               //console.warn( JSON.stringify( maybeCached, 
null, 2 ) );
-                               for ( var i = 0, l = maybeCached.length; i < l; 
i++ ) {
-                                       var cachedChunk = maybeCached[i];
-                                       // emit a clone of this chunk
-                                       this.emit('chunk', cachedChunk );
-                               }
-                               this.emit('end');
-                               return;
-                       } catch ( e ) {
-                               this.env.errCB(e);
-                       }
-
-               } else {
-                       this.cacheAccum.key = cacheKey;
-               }
        }
 
        // Some input normalization: force a trailing newline
@@ -133,12 +104,7 @@
        //      text += "\n";
        //}
 
-       var chunkCB;
-       if ( this.canCache && cacheKey ) {
-               chunkCB = this.onCacheChunk.bind( this );
-       } else {
-               chunkCB = this.emit.bind( this, 'chunk' );
-       }
+       var chunkCB = this.emit.bind( this, 'chunk' );
 
        // Kick it off!
        var srcOffset = this.offsets.startOffset || 0;
@@ -187,24 +153,8 @@
        }
 };
 
-PegTokenizer.prototype.onCacheChunk = function ( chunk ) {
-       // make a deep copy of the chunk for now
-       this.cacheAccum.chunks.push( chunk.slice() );
-       //console.warn( 'onCacheChunk: ' + this.cacheAccum.key + 
JSON.stringify( chunk, null, 2 ) );
-       try {
-               this.emit('chunk', chunk);
-       } catch ( e ) {
-               this.env.errCB(e);
-       }
-};
 
 PegTokenizer.prototype.onEnd = function ( ) {
-       if ( this.canCache && this.cacheAccum.key) {
-               this.cache.set(this.cacheAccum.key, this.cacheAccum.chunks);
-               // reset cacheAccum
-               this.cacheAccum = { chunks: [] };
-       }
-
        // Reset source offsets
        this.offsets.startOffset = undefined;
        this.offsets.endOffset = undefined;

-- 
To view, visit https://gerrit.wikimedia.org/r/103756
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic929b054441c786fa4d3fc0f0f9c74e2182471e6
Gerrit-PatchSet: 6
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: GWicke <gwi...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to