Dragons flight has uploaded a new change for review. https://gerrit.wikimedia.org/r/201104
Change subject: Overhaul AbuseFilter internal profiling system ...................................................................... Overhaul AbuseFilter internal profiling system The AbuseFilter internal stats collection has been plagued by several bugs for its entire history. The most prominent bug is caused by a race condition where competing Apache instances can corrupt the data in memcache. This is due to distributing stat data across multiple memcache entries. Other stat corruption is associated with editing filters or filter runs that abort due to reaching the condition limit. The modifications here overhaul the stats system to solve these problems. In particular, all per filter profile data is now hosted in an array that is stored in memcache as a block so that the different stats remain consistent. The restructuring also lays the groundwork for future enhancements to the internal AbuseFilter profiling. Bug: T53294 Change-Id: Ib12e072a245fcad93c6c6bd452041f3441f68bb7 --- M AbuseFilter.class.php M Views/AbuseFilterViewEdit.php M Views/AbuseFilterViewList.php 3 files changed, 154 insertions(+), 108 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/AbuseFilter refs/changes/04/201104/1 diff --git a/AbuseFilter.class.php b/AbuseFilter.class.php index e8df9d9..93cc29e 100644 --- a/AbuseFilter.class.php +++ b/AbuseFilter.class.php @@ -7,6 +7,7 @@ */ class AbuseFilter { public static $statsStoragePeriod = 86400; + public static $statsMaxObservations = 10000; public static $tokenCache = array(); public static $modifyCache = array(); public static $condLimitEnabled = true; @@ -435,7 +436,7 @@ */ public static function checkAllFilters( $vars, $group = 'default' ) { // Fetch from the database. - $filter_matched = array(); + $filterMatched = array(); $dbr = wfGetDB( DB_SLAVE ); $res = $dbr->select( @@ -449,8 +450,10 @@ __METHOD__ ); + $startTime = microtime( true ); + foreach( $res as $row ) { - $filter_matched[$row->af_id] = self::checkFilter( $row, $vars, true ); + $filterMatched[$row->af_id] = self::checkFilter( $row, $vars, true ); } global $wgAbuseFilterCentralDB, $wgAbuseFilterIsCentral, $wgMemc; @@ -483,30 +486,33 @@ } foreach( $res as $row ) { - $filter_matched['global-' . $row->af_id] = + $filterMatched['global-' . $row->af_id] = self::checkFilter( $row, $vars, true, 'global-' ); } } - // Update statistics, and disable filters which are over-blocking. - self::recordStats( $filter_matched, $group ); + $endTime = microtime( true ); + $totalTime = $endTime - $startTime; - return $filter_matched; + // Update statistics, and disable filters which are over-blocking. + self::recordStats( $filterMatched, $totalTime, $group ); + + return $filterMatched; } /** * @static * @param $row * @param $vars - * @param $profile bool + * @param $recordStats bool * @param $prefix string * @return bool */ - public static function checkFilter( $row, $vars, $profile = false, $prefix = '' ) { + public static function checkFilter( $row, $vars, $recordStats = false, $prefix = '' ) { $filterID = $prefix . $row->af_id; $startConds = $startTime = null; - if ( $profile ) { + if ( $recordStats ) { $startConds = self::$condCount; $startTime = microtime( true ); } @@ -528,84 +534,106 @@ $result = false; } - if ( $profile ) { + if ( $recordStats ) { $endTime = microtime( true ); $endConds = self::$condCount; $timeTaken = $endTime - $startTime; $condsUsed = $endConds - $startConds; - self::recordProfilingResult( $row->af_id, $timeTaken, $condsUsed ); + self::recordProfilingResult( $row->af_id, $timeTaken, $condsUsed, $result ); } return $result; } /** + * Clear the statistics profiler for specified filter. + * * @param $filter */ public static function resetFilterProfile( $filter ) { global $wgMemc; - $countKey = wfMemcKey( 'abusefilter', 'profile', $filter, 'count' ); - $totalKey = wfMemcKey( 'abusefilter', 'profile', $filter, 'total' ); + $profileKey = self::filterProfileKey( $filter ); - $wgMemc->delete( $countKey ); - $wgMemc->delete( $totalKey ); + $wgMemc->delete( $profileKey ); } /** + * Updates and records the per filter performance statistics. + * * @param $filter * @param $time * @param $conds + * @param $matched bool */ - public static function recordProfilingResult( $filter, $time, $conds ) { + public static function recordProfilingResult( $filter, $time, $conds, $matched ) { global $wgMemc; - $countKey = wfMemcKey( 'abusefilter', 'profile', $filter, 'count' ); - $totalKey = wfMemcKey( 'abusefilter', 'profile', $filter, 'total' ); - $totalCondKey = wfMemcKey( 'abusefilter', 'profile-conds', 'total' ); + $profileKey = self::filterProfileKey( $filter ); + $profile = $wgMemc->get( $profileKey ); - $curCount = $wgMemc->get( $countKey ); - $curTotal = $wgMemc->get( $totalKey ); - $curTotalConds = $wgMemc->get( $totalCondKey ); + if ( $profile !== false ) { + $curCount = $profile['count']; // Number of observed executions of this filter + } else { + $curCount = false; + } if ( $curCount ) { - $wgMemc->set( $totalCondKey, $curTotalConds + $conds, 3600 ); - $wgMemc->set( $totalKey, $curTotal + $time, 3600 ); - $wgMemc->incr( $countKey ); + $profile['count']++; + if ( $matched ) { + $profile['matches']++; // Number of observed matches of this filter + } + $profile['total-time'] += $time; // Total time spent on this filter from all observed executions (seconds) + $profile['total-cond'] += $conds; // Total number of conditions for this filter from all executions } else { - $wgMemc->set( $countKey, 1, 3600 ); - $wgMemc->set( $totalKey, $time, 3600 ); - $wgMemc->set( $totalCondKey, $conds, 3600 ); + $profile['count'] = 1; + if ( $matched ) { + $profile['matches'] = 1; + } else { + $profile['matches'] = 0; + } + $profile['total-time'] = $time; + $profile['total-cond'] = $conds; } + + // Note: It is important that all key information by stored together in a single memcache entry to avoid + // race conditions where competing Apache instances partially overwrite the stats. + $wgMemc->set( $profileKey, $profile, 3600 ); } /** + * Retrieve per filter statistics. + * * @param $filter * @return array */ public static function getFilterProfile( $filter ) { global $wgMemc; - $countKey = wfMemcKey( 'abusefilter', 'profile', $filter, 'count' ); - $totalKey = wfMemcKey( 'abusefilter', 'profile', $filter, 'total' ); - $totalCondKey = wfMemcKey( 'abusefilter', 'profile-conds', 'total' ); + $profileKey = self::filterProfileKey( $filter ); + $profile = $wgMemc->get( $profileKey ); - $curCount = $wgMemc->get( $countKey ); - $curTotal = $wgMemc->get( $totalKey ); - $curTotalConds = $wgMemc->get( $totalCondKey ); - - if ( !$curCount ) { - return array( 0, 0 ); + if ( $profile !== false ) { + $curCount = $profile['count']; + $matches = $profile['matches']; + $curTotalTime = $profile['total-time']; + $curTotalConds = $profile['total-cond']; + } else { + $curCount = 0; } - $timeProfile = ( $curTotal / $curCount ) * 1000; // 1000 ms in a sec - $timeProfile = round( $timeProfile, 2 ); // Return in ms, rounded to 2dp + if ( !$curCount ) { + return array( 0, 0, 0, 0 ); + } - $condProfile = ( $curTotalConds / $curCount ); - $condProfile = round( $condProfile, 0 ); + $avgTime = ( $curTotalTime / $curCount ) * 1000; // 1000 ms in a sec + $avgTime = round( $avgTime, 2 ); // Return in ms, rounded to 2dp - return array( $timeProfile, $condProfile ); + $avgCond = ( $curTotalConds / $curCount ); + $avgCond = round( $avgCond, 1 ); + + return array( $curCount, $matches, $avgTime, $avgCond ); } /** @@ -974,9 +1002,6 @@ global $wgMemc; - // Increment trigger counter - $wgMemc->incr( self::filterMatchesKey() ); - $local_log_ids = array(); global $wgAbuseFilterNotifications, $wgAbuseFilterNotificationsPrivate; foreach ( $log_rows as $data ) { @@ -1070,8 +1095,8 @@ $vars->setVar( 'local_log_ids', $local_log_ids ); // Check for emergency disabling. - $total = $wgMemc->get( AbuseFilter::filterUsedKey( $group ) ); - self::checkEmergencyDisable( $group, $logged_local_filters, $total ); + $profile = $wgMemc->get( AbuseFilter::filterProfileGroupKey( $group ) ); + self::checkEmergencyDisable( $group, $logged_local_filters, $profile['total'] ); wfProfileOut( __METHOD__ . '-hitstats' ); } @@ -1546,43 +1571,64 @@ } /** - * Update statistics, and disable filters which are over-blocking. + * Update global statistics, and disable filters which are over-blocking. + * * @param $filters + * @param $totalTime * @param string $group The filter's group (as defined in $wgAbuseFilterValidGroups) */ - public static function recordStats( $filters, $group = 'default' ) { + public static function recordStats( $filters, $totalTime, $group = 'default' ) { global $wgAbuseFilterConditionLimit, $wgMemc; + $profileKey = self::filterProfileGroupKey( $group ); // Figure out if we've triggered overflows and blocks. - $overflow_triggered = ( self::$condCount > $wgAbuseFilterConditionLimit ); + $overflowTriggered = ( self::$condCount > $wgAbuseFilterConditionLimit ); - // Store some keys... - $overflow_key = self::filterLimitReachedKey(); - $total_key = self::filterUsedKey( $group ); + $profile = $wgMemc->get( $profileKey ); + if ( $profile !== false ) { + $total = $profile['total']; + } else { + $total = 0; + } - $total = $wgMemc->get( $total_key ); + $storagePeriod = self::$statsStoragePeriod; - $storage_period = self::$statsStoragePeriod; + if ( !$total || $total > self::$statsMaxObservations ) { + // This is for if the total doesn't exist, or has gone past $statsMaxObservations. + // Recreate all the keys. + $profile['total'] = 0; // Total number of actions observed + $profile['overflow'] = 0; // Number of actions ending by exceeding condition limit + $profile['total-time'] = 0; // Total time of execution of all observed actions (seconds) + $profile['total-cond'] = 0; // Total number of conditions from all observed actions + $profile['matches'] = 0; // Total number of filters matched - if ( !$total || $total > 10000 ) { - // This is for if the total doesn't exist, or has gone past 10,000. - // Recreate all the keys at the same time, so they expire together. - $wgMemc->set( $total_key, 0, $storage_period ); - $wgMemc->set( $overflow_key, 0, $storage_period ); + $wgMemc->set( $profileKey, $profile, $storagePeriod ); foreach ( $filters as $filter => $matched ) { - $wgMemc->set( self::filterMatchesKey( $filter ), 0, $storage_period ); + self::resetFilterProfile( $filter ); } - $wgMemc->set( self::filterMatchesKey(), 0, $storage_period ); } // Increment total - $wgMemc->incr( $total_key ); + $profile['total'] += 1; + $profile['total-time'] += $totalTime; + $profile['total-cond'] += self::$condCount; // Increment overflow counter, if our condition limit overflowed - if ( $overflow_triggered ) { - $wgMemc->incr( $overflow_key ); + if ( $overflowTriggered ) { + $profile['overflow'] += 1; } + + // Record number of matches + foreach ( $filters as $filter => $matched ) { + if ( $matched ) { + $profile['matches']++; + } + } + + // Note: All related data is stored in a single memcache entry to avoid race conditions + // where partial updates on competing instances corrupt the data. + $wgMemc->set( $profileKey, $profile, $storagePeriod ); } /** @@ -1601,15 +1647,12 @@ $emergencyDisableAge = self::getEmergencyValue( $wgAbuseFilterEmergencyDisableAge, $group ); // Increment counter - $matchCount = $wgMemc->get( self::filterMatchesKey( $filter ) ); - - // Handle missing keys... - if ( !$matchCount ) { - $wgMemc->set( self::filterMatchesKey( $filter ), 1, self::$statsStoragePeriod ); + $profile = $wgMemc->get( self::filterProfileKey( $filter ) ); + if ( $profile !== false ) { + $matchCount = $profile['matches']; } else { - $wgMemc->incr( self::filterMatchesKey( $filter ) ); + $matchCount = 1; } - $matchCount++; // Figure out if the filter is subject to being deleted. $filter_age = wfTimestamp( TS_UNIX, self::$filters[$filter]->af_timestamp ); @@ -1645,26 +1688,23 @@ } /** - * @return String - */ - public static function filterLimitReachedKey() { - return wfMemcKey( 'abusefilter', 'stats', 'overflow' ); - } - - /** - * @param string $group The filter's group (as defined in $wgAbuseFilterValidGroups) - * @return String - */ - public static function filterUsedKey( $group = null ) { - return wfMemcKey( 'abusefilter', 'stats', 'total', $group ); - } - - /** + * Memcache access key used to store per-filter profiling data. + * * @param $filter * @return String */ - public static function filterMatchesKey( $filter = null ) { - return wfMemcKey( 'abusefilter', 'stats', 'matches', $filter ); + public static function filterProfileKey( $filter = null ) { + return wfMemcKey( 'abusefilter', 'profile', $filter ); + } + + /** + * Memcache access key used to store overall profiling data for rule groups + * + * @param $group + * @return String + */ + public static function filterProfileGroupKey( $group = null ) { + return wfMemcKey( 'abusefilter', 'profile', 'group', $group ); } /** diff --git a/Views/AbuseFilterViewEdit.php b/Views/AbuseFilterViewEdit.php index 7f7c634..a873f19 100644 --- a/Views/AbuseFilterViewEdit.php +++ b/Views/AbuseFilterViewEdit.php @@ -393,16 +393,13 @@ if ( $filter !== 'new' ) { // Statistics - global $wgMemc; - $matches_count = $wgMemc->get( AbuseFilter::filterMatchesKey( $filter ) ); - $total = $wgMemc->get( AbuseFilter::filterUsedKey( $row->af_group ) ); + list( $totalCount, $matchesCount, $avgTime, $avgCond ) = AbuseFilter::getFilterProfile( $filter ); - if ( $total > 0 ) { - $matches_percent = sprintf( '%.2f', 100 * $matches_count / $total ); - list( $timeProfile, $condProfile ) = AbuseFilter::getFilterProfile( $filter ); + if ( $totalCount > 0 ) { + $matchesPercent = sprintf( '%.2f', 100 * $matchesCount / $totalCount ); $fields['abusefilter-edit-status-label'] = $this->msg( 'abusefilter-edit-status' ) - ->numParams( $total, $matches_count, $matches_percent, $timeProfile, $condProfile ) + ->numParams( $totalCount, $matchesCount, $matchesPercent, $avgTime, $avgCond ) ->escaped(); } } diff --git a/Views/AbuseFilterViewList.php b/Views/AbuseFilterViewList.php index 0ccaa73..73d2a34 100644 --- a/Views/AbuseFilterViewList.php +++ b/Views/AbuseFilterViewList.php @@ -162,28 +162,37 @@ $this->getOutput()->addHTML( $output ); } + /** + * Generates a summary of filter activity using the internal statistics. + */ function showStatus() { global $wgMemc, $wgAbuseFilterConditionLimit, $wgAbuseFilterValidGroups; - $overflow_count = (int)$wgMemc->get( AbuseFilter::filterLimitReachedKey() ); - $match_count = (int) $wgMemc->get( AbuseFilter::filterMatchesKey() ); - $total_count = 0; + $totalCount = 0; + $matchCount = 0; + $overflowCount = 0; + foreach ( $wgAbuseFilterValidGroups as $group ) { - $total_count += (int)$wgMemc->get( AbuseFilter::filterUsedKey( $group ) ); + $profile = $wgMemc->get( AbuseFilter::filterProfileGroupKey( $group ) ); + if ( $profile !== false ) { + $totalCount += (int)$profile[ 'total' ]; + $overflowCount += (int)$profile[ 'overflow' ]; + $matchCount += (int)$profile[ 'matches' ]; + } } - if ( $total_count > 0 ) { - $overflow_percent = sprintf( "%.2f", 100 * $overflow_count / $total_count ); - $match_percent = sprintf( "%.2f", 100 * $match_count / $total_count ); + if ( $totalCount > 0 ) { + $overflowPercent = sprintf( "%.2f", 100 * $overflowCount / $totalCount ); + $matchPercent = sprintf( "%.2f", 100 * $matchCount / $totalCount ); $status = $this->msg( 'abusefilter-status' ) ->numParams( - $total_count, - $overflow_count, - $overflow_percent, + $totalCount, + $overflowCount, + $overflowPercent, $wgAbuseFilterConditionLimit, - $match_count, - $match_percent + $matchCount, + $matchPercent )->parse(); $status = Xml::tags( 'div', array( 'class' => 'mw-abusefilter-status' ), $status ); -- To view, visit https://gerrit.wikimedia.org/r/201104 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ib12e072a245fcad93c6c6bd452041f3441f68bb7 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/AbuseFilter Gerrit-Branch: master Gerrit-Owner: Dragons flight <ro...@robertrohde.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits