Marcoil has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/92758


Change subject: Bug 55802: Batch database title requests in rt-testing server
......................................................................

Bug 55802: Batch database title requests in rt-testing server

Crashers have been separated from reported errors, there's both a counter
for latest commit crashers on the main page and a new page /crashers with
a list of all the known crashers (which get removed when a new commit
finishes correctly).

Change-Id: I67166ff0d057c622ea18e14bb668b6fb9a55e937
---
M js/tests/server/server.js
M js/tests/server/server.settings.js.example
2 files changed, 179 insertions(+), 89 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Parsoid 
refs/changes/58/92758/1

diff --git a/js/tests/server/server.js b/js/tests/server/server.js
index 4544f13..cd0f7c0 100755
--- a/js/tests/server/server.js
+++ b/js/tests/server/server.js
@@ -15,7 +15,8 @@
        'debug': false,
        'fetches': 6,
        'tries': 6,
-       'cutofftime': 600
+       'cutofftime': 600,
+       'batch': 50
 };
 
 // Settings file
@@ -70,7 +71,11 @@
        .options( 'c', {
                alias: 'cutofftime',
                describe: "Time in seconds to wait for a test result."
-       })
+       } )
+       .options( 'b', {
+               alias: 'batch',
+               describe: "Number of titles to fetch from database in one 
batch."
+       } )
        .argv;
 
 if ( argv.help ) {
@@ -109,6 +114,8 @@
        maxFetchRetries = getOption( 'fetches' ),
        // The time to wait before considering a test has failed
        cutOffTime = getOption( 'cutofftime' ),
+       // The number of pages to fetch at once
+       batchSize = getOption( 'batch' ),
        debug = getOption( 'debug' );
 
 var mysql = require( 'mysql' );
@@ -143,25 +150,29 @@
        'FROM pages ' +
        'WHERE num_fetch_errors < ? AND ' +
        '( claim_hash != ? OR ' +
-               '( claim_num_tries <= ? AND claim_timestamp < ? ) ) ' +
+               '( claim_num_tries < ? AND claim_timestamp < ? ) ) ' +
        'ORDER BY claim_num_tries DESC, latest_score DESC, ' +
-       'claim_timestamp ASC LIMIT ?,1';
+       'claim_timestamp ASC LIMIT ?';
 
 var dbIncrementFetchErrorCount =
-       'UPDATE pages SET num_fetch_errors = num_fetch_errors + 1 WHERE title = 
? AND prefix = ?';
+       'UPDATE pages SET ' +
+               'claim_hash = ?, ' +
+               'num_fetch_errors = num_fetch_errors + 1, ' +
+               'claim_num_tries = 0 ' +
+               'WHERE title = ? AND prefix = ?';
 
 var dbInsertCommit =
        'INSERT IGNORE INTO commits ( hash, timestamp ) ' +
        'VALUES ( ?, ? )';
 
-var dbFindPageByClaimHash =
+var dbFindPage =
        'SELECT id ' +
        'FROM pages ' +
-       'WHERE title = ? AND prefix = ? AND claim_hash = ?';
+       'WHERE title = ? AND prefix = ?';
 
-var dbUpdatePageClaim =
+var dbUpdatePageClaims =
        'UPDATE pages SET claim_hash = ?, claim_timestamp = ?, claim_num_tries 
= claim_num_tries + 1 ' +
-       'WHERE id = ?';
+       'WHERE id IN ( ? )';
 
 var dbInsertResult =
        'INSERT INTO results ( page_id, commit_hash, result ) ' +
@@ -186,7 +197,7 @@
 var dbUpdatePageLatestResults =
        'UPDATE pages ' +
        'SET latest_stat = ?, latest_score = ?, latest_result = ?, ' +
-       'claim_timestamp = NULL, claim_num_tries = 0 ' +
+       'claim_hash = ?, claim_timestamp = NULL, claim_num_tries = 0 ' +
     'WHERE id = ?';
 
 var dbStatsQuery =
@@ -226,7 +237,13 @@
         'JOIN stats AS s2 ON s2.page_id = pages.id ' +
         'WHERE s1.commit_hash = (SELECT hash FROM commits ORDER BY timestamp 
DESC LIMIT 1 ) ' +
        'AND s2.commit_hash = (SELECT hash FROM commits ORDER BY timestamp DESC 
LIMIT 1 OFFSET 1 ) ' +
-       'AND s1.score < s2.score ) as numfixes '  +
+       'AND s1.score < s2.score ) as numfixes, '  +
+       // Get latest commit crashers
+       '(SELECT count(*) ' +
+               'FROM pages ' +
+               'WHERE claim_hash = (SELECT hash FROM commits ORDER BY 
timestamp DESC LIMIT 1) ' +
+                       'AND claim_num_tries >= ? ' +
+                       'AND claim_timestamp < ?) AS crashers ' +
 
        'FROM pages JOIN stats on pages.latest_stat = stats.id';
 
@@ -271,7 +288,13 @@
         'WHERE s1.commit_hash = (SELECT hash FROM commits ORDER BY timestamp 
DESC LIMIT 1 ) ' +
        'AND s2.commit_hash = (SELECT hash FROM commits ORDER BY timestamp DESC 
LIMIT 1 OFFSET 1 ) ' +
        'AND pages.prefix = ? ' +
-       'AND s1.score < s2.score ) as numfixes ' +
+       'AND s1.score < s2.score ) as numfixes, ' +
+       // Get latest commit crashers
+       '(SELECT count(*) ' +
+               'FROM pages WHERE prefix = ? ' +
+                       'AND claim_hash = (SELECT hash FROM commits ORDER BY 
timestamp DESC LIMIT 1) ' +
+                       'AND claim_num_tries >= ? ' +
+                       'AND claim_timestamp < ?) AS crashers ' +
 
        'FROM pages JOIN stats on pages.latest_stat = stats.id WHERE 
pages.prefix = ?';
 
@@ -300,6 +323,13 @@
 
 var dbFailedFetches =
        'SELECT title, prefix FROM pages WHERE num_fetch_errors >= ?';
+
+var dbCrashers =
+       'SELECT pages.title, pages.prefix, pages.claim_hash, commits.timestamp 
' +
+               'FROM pages JOIN commits ON (pages.claim_hash = commits.hash) ' 
+
+               'WHERE claim_num_tries >= ? ' +
+               'AND claim_timestamp < ? ' +
+               'ORDER BY commits.timestamp DESC';
 
 var dbFailsDistribution =
        'SELECT fails, count(*) AS num_pages ' +
@@ -400,85 +430,95 @@
        'ORDER BY commits.timestamp DESC ' +
        'LIMIT 0, ?';
 
-var transUpdateCB = function( title, prefix, hash, type, res, trans, 
success_cb, err, result ) {
+var transFetchCB = function( msg, trans, failCb, successCb, err, result ) {
        if ( err ) {
-               trans.rollback();
-               var msg = "Error inserting/updating " + type + " for page: " +  
prefix + ':' + title + " and hash: " + hash;
-               console.error( msg );
-               if ( res ) {
-                       res.send( msg, 500 );
-               }
-       } else if ( success_cb ) {
-               success_cb( result );
+               trans.rollback( function () {
+                       if ( failCb ) {
+                               failCb ( msg? msg + err.toString() : err, 
result );
+                       }
+               } );
+       } else if ( successCb ) {
+               successCb( result );
        }
 };
 
-var claimPage = function( commitHash, cutOffTimestamp, req, res ) {
-       var trans = db.startTransaction(),
-               // reduce contention with a random offset
-               randOffset = Math.floor(Math.random() * 20);
+var fetchPages = function( commitHash, cutOffTimestamp, cb ) {
+       var trans = db.startTransaction();
 
        trans.query( dbGetTitle,
-                       [ maxFetchRetries, commitHash, maxTries, 
cutOffTimestamp, randOffset ],
-                       function( err, rows ) {
-               if ( err ) {
-                       trans.rollback( function() {
-                               console.error( 'Error getting next title: ' + 
err.toString() );
-                               res.send( "Error: " + err.toString(), 500 );
-                       } );
-               } else if ( !rows || rows.length === 0 ) {
-                       // Couldn't find any page to test, just tell the client 
to wait.
-                       trans.rollback( function() {
-                               res.send( 'No available titles that fit the 
constraints.', 404 );
-                       } );
-               } else {
-                       // Found a title to process.
-                       var page = rows[0];
+                       [ maxFetchRetries, commitHash, maxTries, 
cutOffTimestamp, batchSize ],
+                       transFetchCB.bind( null, "Error getting next titles", 
trans, cb, function ( rows ) {
 
-                       // Check if the selected title has arrived at the 
maximum number of
-                       // tries, which means we need to mark it as an error.
-                       if ( page.claim_hash && page.claim_num_tries === 
maxTries ) {
-                               // Too many failures, insert an error in stats 
and retry fetch.
-                               console.log( ' CRASHER?', page.prefix + ':' + 
page.title );
-                               var score = statsScore( 0, 0, 1 );
-                               var stats = [ 0, 0, 1, score, page.id, 
commitHash ];
-                               trans.query( dbInsertStats, stats,
-                                       transUpdateCB.bind( null, page.title, 
page.prefix, commitHash, "stats", res, trans, function( insertedStat ) {
-                                               trans.query( 
dbUpdatePageLatestResults, [ insertedStat.insertId, score, null, page.id ],
-                                                       transUpdateCB.bind( 
null, page.title, page.prefix, commitHash, "latest_result", res, trans, 
function() {
-                                                               trans.commit( 
function() {
-                                                                       // 
After the error has been committed, go around
-                                                                       // 
again to get a different title.
-                                                                       
claimPage( commitHash, cutOffTimestamp, req, res );
-                                                               } );
-                                               } ) );
-                                       } ) );
-                       } else {
-                               // No outstanding claim with too many tries, so 
update with this hash.
-                               trans.query( dbUpdatePageClaim, [ commitHash, 
new Date(), page.id ],
-                                       transUpdateCB.bind( null, page.title, 
page.prefix, commitHash, "dbUpdatePageClaim", res, trans, function() {
-                                               trans.commit( function() {
-                                                       console.log( ' ->', 
page.prefix + ':' + page.title, 'num_tries: ' + page.claim_num_tries.toString() 
);
-                                                       res.send( { prefix: 
page.prefix, title: page.title },  200);
-                                               } );
-                                       } ) );
+               if ( !rows || rows.length === 0 ) {
+                       trans.commit( cb.bind( null, null, rows ) );
+               } else {
+                       // Process the rows: Weed out the crashers.
+                       var pages = [];
+                       var pageIds = [];
+                       for ( var i = 0; i < rows.length; i++ ) {
+                               var row = rows[i];
+                               pageIds.push( row.id );
+                               pages.push( { prefix: row.prefix, title: 
row.title } );
                        }
+
+                       trans.query( dbUpdatePageClaims, [ commitHash, new 
Date(), pageIds ],
+                               transFetchCB.bind( null, "Error updating 
claims", trans, cb, function () {
+                                       trans.commit( cb.bind( null, null, 
pages ));
+                               } ) );
                }
-       } ).execute();
+       } ) ).execute();
 };
 
+var fetchedPages = [];
+var lastFetchedCommit = null;
+var lastFetchedDate = new Date(0);
+
 var getTitle = function ( req, res ) {
+       var commitHash = req.query.commit;
+
        req.connection.setTimeout(300 * 1000);
        res.setHeader( 'Content-Type', 'text/plain; charset=UTF-8' );
 
-       // Select pages that were not claimed in the 10 minutes.
-       // If we didn't get a result from a client 10 minutes after
-       // it got a rt claim on a page, something is wrong with the client
-       // or with parsing the page.
-       //
-       // Hopefully, no page takes longer than 10 minutes to parse. :)
+       // Keep record of the commit, ignore if already there.
+       db.query( dbInsertCommit, [ commitHash, new Date() ], function ( err ) {
+               if ( err ) {
+                       console.error( "Error inserting commit " + commitHash );
+               }
+       });
 
-       claimPage( req.query.commit, new Date( Date.now() - ( cutOffTime * 1000 
) ), req, res );
+       var fetchCb = function ( err, pages ) {
+               if ( err ) {
+                       res.send( "Error: " + err.toString(), 500 );
+               } else if ( !pages || pages.length === 0 ) {
+                       res.send( 'No available titles that fit the 
constraints.', 404 );
+               } else {
+                       var page = pages.pop();
+                       // Store the pages for the next clients
+                       fetchedPages = pages;
+
+                       console.log( ' ->', page.prefix + ':' + page.title );
+                       res.send( page, 200 );
+               }
+       };
+
+       // Look if there's a title available in the already fetched ones.
+       // Ensure that we load a batch when the commit has changed.
+       if ( fetchedPages.length === 0 ||
+            commitHash !== lastFetchedCommit ||
+            ( lastFetchedDate.getTime() + ( cutOffTime * 1000 ) ) < Date.now() 
) {
+               // Select pages that were not claimed in the 10 minutes.
+               // If we didn't get a result from a client 10 minutes after
+               // it got a rt claim on a page, something is wrong with the 
client
+               // or with parsing the page.
+               //
+               // Hopefully, no page takes longer than 10 minutes to parse. :)
+
+               lastFetchedCommit = commitHash;
+               lastFetchedDate = new Date();
+               fetchPages( commitHash, new Date( Date.now() - ( cutOffTime * 
1000 ) ), fetchCb );
+       } else {
+               fetchCb( null, fetchedPages );
+       }
 };
 
 var statsScore = function(skipCount, failCount, errorCount) {
@@ -494,6 +534,19 @@
                perfstats.push( { type: match[ 1 ], value: match[ 2 ] } );
        }
        return perfstats;
+};
+
+var transUpdateCB = function( title, prefix, hash, type, res, trans, 
success_cb, err, result ) {
+       if ( err ) {
+               trans.rollback();
+               var msg = "Error inserting/updating " + type + " for page: " +  
prefix + ':' + title + " and hash: " + hash;
+               console.error( msg );
+               if ( res ) {
+                       res.send( msg, 500 );
+               }
+       } else if ( success_cb ) {
+               success_cb( result );
+       }
 };
 
 var insertPerfStats = function( db, pageId, commitHash, perfstats, cb ) {
@@ -536,20 +589,13 @@
 
        res.setHeader( 'Content-Type', 'text/plain; charset=UTF-8' );
 
-       // Keep record of the commit, ignore if already there.
-       db.query( dbInsertCommit, [ commitHash, new Date() ], function ( err ) {
-               if ( err ) {
-                       console.error( "Error inserting commit " + commitHash );
-               }
-       });
-
        var trans = db.startTransaction();
        //console.warn("got: " + JSON.stringify([title, commitHash, result, 
skipCount, failCount, errorCount]));
        if ( errorCount > 0 && result.match( 'DoesNotExist' ) ) {
                // Page fetch error, increment the fetch error count so, when 
it goes over
                /// maxFetchRetries, it won't be considered for tests again.
                console.log( 'XX', prefix + ':' + title );
-               trans.query( dbIncrementFetchErrorCount, [title, prefix],
+               trans.query( dbIncrementFetchErrorCount, [commitHash, title, 
prefix],
                        transUpdateCB.bind( null, title, prefix, commitHash, 
"page fetch error count", res, trans, null ) )
                        .commit( function( err ) {
                                if ( err ) {
@@ -559,7 +605,7 @@
                        } );
 
        } else {
-               trans.query( dbFindPageByClaimHash, [ title, prefix, commitHash 
], function ( err, pages ) {
+               trans.query( dbFindPage, [ title, prefix ], function ( err, 
pages ) {
                        if ( !err && pages && pages.length === 1 ) {
                                // Found the correct page, fill the details up
                                var page = pages[0];
@@ -577,7 +623,7 @@
                                                                latest_statId = 
insertedStat.insertId;
 
                                                                // And now 
update the page with the latest info
-                                                               trans.query( 
dbUpdatePageLatestResults, [ latest_statId, score, latest_resultId, page.id ],
+                                                               trans.query( 
dbUpdatePageLatestResults, [ latest_statId, score, latest_resultId, commitHash, 
page.id ],
                                                                        
transUpdateCB.bind( null, title, prefix, commitHash, "latest result", res, 
trans, function() {
                                                                                
trans.commit( function() {
                                                                                
        console.log( '<- ', prefix + ':' + title, ':', skipCount, failCount,
@@ -727,6 +773,7 @@
 };
 var statsWebInterface = function ( req, res ) {
        var query, queryParams, prefix;
+       var cutoffDate = new Date( Date.now() - ( cutOffTime * 1000 ) );
 
        var displayRow = function( res, label, val ) {
                        // round numeric data, but ignore others
@@ -746,10 +793,11 @@
        if ( prefix !== null ) {
                query = dbPerWikiStatsQuery;
                queryParams = [ prefix, prefix, prefix, prefix,
-                               prefix, prefix, prefix, prefix ];
+                               prefix, prefix, prefix, prefix,
+                               prefix, maxTries, cutoffDate ];
        } else {
                query = dbStatsQuery;
-               queryParams = null;
+               queryParams = [ maxTries, cutoffDate ];
        }
 
        // Fetch stats for commit
@@ -778,7 +826,7 @@
                                tests +
                                '</b> articles, of which <ul><li><b>' +
                                noErrors +
-                               '%</b> parsed without crashes </li><li><b>' +
+                               '%</b> parsed without errors </li><li><b>' +
                                syntacticDiffs +
                                '%</b> round-tripped without semantic 
differences, and </li><li><b>' +
                                perfects +
@@ -803,6 +851,8 @@
                        res.write( '<table><tbody>');
                        displayRow(res, "Git SHA1", row[0].maxhash);
                        displayRow(res, "Test Results", row[0].maxresults);
+                       displayRow( res, "Crashers",
+                                  '<a href="/crashers">' + row[0].crashers + 
'</a>' );
                        displayRow(res, "Regressions",
                                   '<a href="/regressions/between/' + 
row[0].secondhash + '/' +
                                   row[0].maxhash + '">' +
@@ -928,6 +978,40 @@
                                res.write( '</ul>');
                        }
                        res.end('</body></html>' );
+               }
+       } );
+};
+
+var GET_crashers = function( req, res ) {
+       var cutoffDate = new Date( Date.now() - ( cutOffTime * 1000 ) );
+       db.query( dbCrashers, [ maxTries, cutoffDate ], function ( err, rows ) {
+               if ( err ) {
+                       console.error( err );
+                       res.send( err.toString(), 500 );
+               } else {
+                       var n = rows.length;
+                       res.setHeader( 'Content-Type', 'text/html; 
charset=UTF-8' );
+                       res.status( 200 );
+                       res.write( '<html><body>' );
+                       if (n === 0) {
+                               res.write( 'No titles crash the testers!  
All\'s well with the world!' );
+                       } else {
+                               res.write( '<h1> The following ' + n + ' titles 
crash the testers' +
+                                         ' at least ' + maxTries + ' 
times</h1>' );
+                               res.write( '<ul>' );
+                               for ( var i = 0; i < n; i++ ) {
+                                       var prefix = rows[i].prefix,
+                                               title = rows[i].title;
+                                       var url = prefix + 
'.wikipedia.org/wiki/' + title;
+                                       var name = prefix + ':' + title;
+                                       var commitHash = rows[i].claim_hash;
+                                       res.write( '<li>' + commitHash + ': <a 
href="http://' +
+                                                         
encodeURI(url).replace('&', '&amp;') + '">' +
+                                                         name.replace('&', 
'&amp;') + '</a></li>\n' );
+                               }
+                               res.write( '</ul>' );
+                       }
+                       res.end( '</body></html>' );
                }
        } );
 };
@@ -1238,6 +1322,9 @@
 // Failed fetches
 app.get( /^\/failedFetches$/, GET_failedFetches );
 
+// Crashers
+app.get( /^\/crashers$/, GET_crashers );
+
 // Regressions between two revisions.
 app.get( /^\/regressions\/between\/([^\/]+)\/([^\/]+)(?:\/(\d+))?$/, 
GET_regressions );
 
diff --git a/js/tests/server/server.settings.js.example 
b/js/tests/server/server.settings.js.example
index 06294ee..c2d555d 100644
--- a/js/tests/server/server.settings.js.example
+++ b/js/tests/server/server.settings.js.example
@@ -37,5 +37,8 @@
        // Time in seconds to wait for a test result. If a test takes longer 
than
        // this, it will be considered a crash error. Be careful not to set 
this to
        // less than what any page takes to parse.
-       cutofftime: 600
+       cutofftime: 600,
+
+       // Number of titles to fetch from database in one batch.
+       batch: 50
 };

-- 
To view, visit https://gerrit.wikimedia.org/r/92758
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I67166ff0d057c622ea18e14bb668b6fb9a55e937
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Parsoid
Gerrit-Branch: master
Gerrit-Owner: Marcoil <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to