Dragons flight has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/201104

Change subject: Overhaul AbuseFilter internal profiling system
......................................................................

Overhaul AbuseFilter internal profiling system

The AbuseFilter internal stats collection has been plagued by several bugs
for its entire history.  The most prominent bug is caused by a race
condition where competing Apache instances can corrupt the data in
memcache.  This is due to distributing stat data across multiple memcache
entries.  Other stat corruption is associated with editing filters or
filter runs that abort due to reaching the condition limit.

The modifications here overhaul the stats system to solve these problems.
In particular, all per filter profile data is now hosted in an array
that is stored in memcache as a block so that the different stats
remain consistent.  The restructuring also lays the groundwork for future
enhancements to the internal AbuseFilter profiling.

Bug: T53294
Change-Id: Ib12e072a245fcad93c6c6bd452041f3441f68bb7
---
M AbuseFilter.class.php
M Views/AbuseFilterViewEdit.php
M Views/AbuseFilterViewList.php
3 files changed, 154 insertions(+), 108 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/AbuseFilter 
refs/changes/04/201104/1

diff --git a/AbuseFilter.class.php b/AbuseFilter.class.php
index e8df9d9..93cc29e 100644
--- a/AbuseFilter.class.php
+++ b/AbuseFilter.class.php
@@ -7,6 +7,7 @@
  */
 class AbuseFilter {
        public static $statsStoragePeriod = 86400;
+       public static $statsMaxObservations = 10000;
        public static $tokenCache = array();
        public static $modifyCache = array();
        public static $condLimitEnabled = true;
@@ -435,7 +436,7 @@
         */
        public static function checkAllFilters( $vars, $group = 'default' ) {
                // Fetch from the database.
-               $filter_matched = array();
+               $filterMatched = array();
 
                $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->select(
@@ -449,8 +450,10 @@
                        __METHOD__
                );
 
+               $startTime = microtime( true );
+
                foreach( $res as $row ) {
-                       $filter_matched[$row->af_id] = self::checkFilter( $row, 
$vars, true );
+                       $filterMatched[$row->af_id] = self::checkFilter( $row, 
$vars, true );
                }
 
                global $wgAbuseFilterCentralDB, $wgAbuseFilterIsCentral, 
$wgMemc;
@@ -483,30 +486,33 @@
                        }
 
                        foreach( $res as $row ) {
-                               $filter_matched['global-' . $row->af_id] =
+                               $filterMatched['global-' . $row->af_id] =
                                        self::checkFilter( $row, $vars, true, 
'global-' );
                        }
                }
 
-               // Update statistics, and disable filters which are 
over-blocking.
-               self::recordStats( $filter_matched, $group );
+               $endTime = microtime( true );
+               $totalTime = $endTime - $startTime;
 
-               return $filter_matched;
+               // Update statistics, and disable filters which are 
over-blocking.
+               self::recordStats( $filterMatched, $totalTime, $group );
+
+               return $filterMatched;
        }
 
        /**
         * @static
         * @param $row
         * @param $vars
-        * @param $profile bool
+        * @param $recordStats bool
         * @param $prefix string
         * @return bool
         */
-       public static function checkFilter( $row, $vars, $profile = false, 
$prefix = '' ) {
+       public static function checkFilter( $row, $vars, $recordStats = false, 
$prefix = '' ) {
                $filterID = $prefix . $row->af_id;
 
                $startConds = $startTime = null;
-               if ( $profile ) {
+               if ( $recordStats ) {
                        $startConds = self::$condCount;
                        $startTime = microtime( true );
                }
@@ -528,84 +534,106 @@
                        $result = false;
                }
 
-               if ( $profile ) {
+               if ( $recordStats ) {
                        $endTime = microtime( true );
                        $endConds = self::$condCount;
 
                        $timeTaken = $endTime - $startTime;
                        $condsUsed = $endConds - $startConds;
 
-                       self::recordProfilingResult( $row->af_id, $timeTaken, 
$condsUsed );
+                       self::recordProfilingResult( $row->af_id, $timeTaken, 
$condsUsed, $result );
                }
 
                return $result;
        }
 
        /**
+        * Clear the statistics profiler for specified filter.
+        *
         * @param $filter
         */
        public static function resetFilterProfile( $filter ) {
                global $wgMemc;
-               $countKey = wfMemcKey( 'abusefilter', 'profile', $filter, 
'count' );
-               $totalKey = wfMemcKey( 'abusefilter', 'profile', $filter, 
'total' );
+               $profileKey = self::filterProfileKey( $filter );
 
-               $wgMemc->delete( $countKey );
-               $wgMemc->delete( $totalKey );
+               $wgMemc->delete( $profileKey );
        }
 
        /**
+        * Updates and records the per filter performance statistics.
+        *
         * @param $filter
         * @param $time
         * @param $conds
+        * @param $matched bool
         */
-       public static function recordProfilingResult( $filter, $time, $conds ) {
+       public static function recordProfilingResult( $filter, $time, $conds, 
$matched ) {
                global $wgMemc;
 
-               $countKey = wfMemcKey( 'abusefilter', 'profile', $filter, 
'count' );
-               $totalKey = wfMemcKey( 'abusefilter', 'profile', $filter, 
'total' );
-               $totalCondKey = wfMemcKey( 'abusefilter', 'profile-conds', 
'total' );
+               $profileKey = self::filterProfileKey( $filter );
+               $profile = $wgMemc->get( $profileKey );
 
-               $curCount = $wgMemc->get( $countKey );
-               $curTotal = $wgMemc->get( $totalKey );
-               $curTotalConds = $wgMemc->get( $totalCondKey );
+               if ( $profile !== false ) {
+                       $curCount = $profile['count'];    // Number of observed 
executions of this filter
+               } else {
+                       $curCount = false;
+               }
 
                if ( $curCount ) {
-                       $wgMemc->set( $totalCondKey, $curTotalConds + $conds, 
3600 );
-                       $wgMemc->set( $totalKey, $curTotal + $time, 3600 );
-                       $wgMemc->incr( $countKey );
+                       $profile['count']++;
+                       if ( $matched ) {
+                               $profile['matches']++;   // Number of observed 
matches of this filter
+                       }
+                       $profile['total-time'] += $time;   // Total time spent 
on this filter from all observed executions (seconds)
+                       $profile['total-cond'] += $conds;  // Total number of 
conditions for this filter from all executions
                } else {
-                       $wgMemc->set( $countKey, 1, 3600 );
-                       $wgMemc->set( $totalKey, $time, 3600 );
-                       $wgMemc->set( $totalCondKey, $conds, 3600 );
+                       $profile['count'] = 1;
+                       if ( $matched ) {
+                               $profile['matches'] = 1;
+                       } else {
+                               $profile['matches'] = 0;
+                       }
+                       $profile['total-time'] = $time;
+                       $profile['total-cond'] = $conds;
                }
+
+               // Note: It is important that all key information by stored 
together in a single memcache entry to avoid
+               // race conditions where competing Apache instances partially 
overwrite the stats.
+               $wgMemc->set( $profileKey, $profile, 3600 );
        }
 
        /**
+        * Retrieve per filter statistics.
+        *
         * @param $filter
         * @return array
         */
        public static function getFilterProfile( $filter ) {
                global $wgMemc;
 
-               $countKey = wfMemcKey( 'abusefilter', 'profile', $filter, 
'count' );
-               $totalKey = wfMemcKey( 'abusefilter', 'profile', $filter, 
'total' );
-               $totalCondKey = wfMemcKey( 'abusefilter', 'profile-conds', 
'total' );
+               $profileKey = self::filterProfileKey( $filter );
+               $profile = $wgMemc->get( $profileKey );
 
-               $curCount = $wgMemc->get( $countKey );
-               $curTotal = $wgMemc->get( $totalKey );
-               $curTotalConds = $wgMemc->get( $totalCondKey );
-
-               if ( !$curCount ) {
-                       return array( 0, 0 );
+               if ( $profile !== false ) {
+                       $curCount = $profile['count'];
+                       $matches = $profile['matches'];
+                       $curTotalTime = $profile['total-time'];
+                       $curTotalConds = $profile['total-cond'];
+               } else {
+                       $curCount = 0;
                }
 
-               $timeProfile = ( $curTotal / $curCount ) * 1000; // 1000 ms in 
a sec
-               $timeProfile = round( $timeProfile, 2 ); // Return in ms, 
rounded to 2dp
+               if ( !$curCount ) {
+                       return array( 0, 0, 0, 0 );
+               }
 
-               $condProfile = ( $curTotalConds / $curCount );
-               $condProfile = round( $condProfile, 0 );
+               $avgTime = ( $curTotalTime / $curCount ) * 1000; // 1000 ms in 
a sec
+               $avgTime = round( $avgTime, 2 ); // Return in ms, rounded to 2dp
 
-               return array( $timeProfile, $condProfile );
+               $avgCond = ( $curTotalConds / $curCount );
+               $avgCond = round( $avgCond, 1 );
+
+               return array( $curCount, $matches, $avgTime, $avgCond );
        }
 
        /**
@@ -974,9 +1002,6 @@
 
                global $wgMemc;
 
-               // Increment trigger counter
-               $wgMemc->incr( self::filterMatchesKey() );
-
                $local_log_ids = array();
                global $wgAbuseFilterNotifications, 
$wgAbuseFilterNotificationsPrivate;
                foreach ( $log_rows as $data ) {
@@ -1070,8 +1095,8 @@
                $vars->setVar( 'local_log_ids', $local_log_ids );
 
                // Check for emergency disabling.
-               $total = $wgMemc->get( AbuseFilter::filterUsedKey( $group ) );
-               self::checkEmergencyDisable( $group, $logged_local_filters, 
$total );
+               $profile = $wgMemc->get( AbuseFilter::filterProfileGroupKey( 
$group ) );
+               self::checkEmergencyDisable( $group, $logged_local_filters, 
$profile['total'] );
 
                wfProfileOut( __METHOD__ . '-hitstats' );
        }
@@ -1546,43 +1571,64 @@
        }
 
        /**
-        * Update statistics, and disable filters which are over-blocking.
+        * Update global statistics, and disable filters which are 
over-blocking.
+        *
         * @param $filters
+        * @param $totalTime
         * @param string $group The filter's group (as defined in 
$wgAbuseFilterValidGroups)
         */
-       public static function recordStats( $filters, $group = 'default' ) {
+       public static function recordStats( $filters, $totalTime, $group = 
'default' ) {
                global $wgAbuseFilterConditionLimit, $wgMemc;
+               $profileKey = self::filterProfileGroupKey( $group );
 
                // Figure out if we've triggered overflows and blocks.
-               $overflow_triggered = ( self::$condCount > 
$wgAbuseFilterConditionLimit );
+               $overflowTriggered = ( self::$condCount > 
$wgAbuseFilterConditionLimit );
 
-               // Store some keys...
-               $overflow_key = self::filterLimitReachedKey();
-               $total_key = self::filterUsedKey( $group );
+               $profile = $wgMemc->get( $profileKey );
+               if ( $profile !== false ) {
+                       $total = $profile['total'];
+               } else {
+                       $total = 0;
+               }
 
-               $total = $wgMemc->get( $total_key );
+               $storagePeriod = self::$statsStoragePeriod;
 
-               $storage_period = self::$statsStoragePeriod;
+               if ( !$total || $total > self::$statsMaxObservations ) {
+                       // This is for if the total doesn't exist, or has gone 
past $statsMaxObservations.
+                       // Recreate all the keys.
+                       $profile['total'] = 0;       // Total number of actions 
observed
+                       $profile['overflow'] = 0;    // Number of actions 
ending by exceeding condition limit
+                       $profile['total-time'] = 0;  // Total time of execution 
of all observed actions (seconds)
+                       $profile['total-cond'] = 0;  // Total number of 
conditions from all observed actions
+                       $profile['matches'] = 0;     // Total number of filters 
matched
 
-               if ( !$total || $total > 10000 ) {
-                       // This is for if the total doesn't exist, or has gone 
past 10,000.
-                       // Recreate all the keys at the same time, so they 
expire together.
-                       $wgMemc->set( $total_key, 0, $storage_period );
-                       $wgMemc->set( $overflow_key, 0, $storage_period );
+                       $wgMemc->set( $profileKey, $profile, $storagePeriod );
 
                        foreach ( $filters as $filter => $matched ) {
-                               $wgMemc->set( self::filterMatchesKey( $filter 
), 0, $storage_period );
+                               self::resetFilterProfile( $filter );
                        }
-                       $wgMemc->set( self::filterMatchesKey(), 0, 
$storage_period );
                }
 
                // Increment total
-               $wgMemc->incr( $total_key );
+               $profile['total'] += 1;
+               $profile['total-time'] += $totalTime;
+               $profile['total-cond'] += self::$condCount;
 
                // Increment overflow counter, if our condition limit overflowed
-               if ( $overflow_triggered ) {
-                       $wgMemc->incr( $overflow_key );
+               if ( $overflowTriggered ) {
+                       $profile['overflow'] += 1;
                }
+
+               // Record number of matches
+               foreach ( $filters as $filter => $matched ) {
+                       if ( $matched ) {
+                               $profile['matches']++;
+                       }
+               }
+
+               // Note: All related data is stored in a single memcache entry 
to avoid race conditions
+               // where partial updates on competing instances corrupt the 
data.
+               $wgMemc->set( $profileKey, $profile, $storagePeriod );
        }
 
        /**
@@ -1601,15 +1647,12 @@
                        $emergencyDisableAge = self::getEmergencyValue( 
$wgAbuseFilterEmergencyDisableAge, $group );
 
                        // Increment counter
-                       $matchCount = $wgMemc->get( self::filterMatchesKey( 
$filter ) );
-
-                       // Handle missing keys...
-                       if ( !$matchCount ) {
-                               $wgMemc->set( self::filterMatchesKey( $filter 
), 1, self::$statsStoragePeriod );
+                       $profile = $wgMemc->get( self::filterProfileKey( 
$filter ) );
+                       if ( $profile !== false ) {
+                               $matchCount = $profile['matches'];
                        } else {
-                               $wgMemc->incr( self::filterMatchesKey( $filter 
) );
+                               $matchCount = 1;
                        }
-                       $matchCount++;
 
                        // Figure out if the filter is subject to being deleted.
                        $filter_age = wfTimestamp( TS_UNIX, 
self::$filters[$filter]->af_timestamp );
@@ -1645,26 +1688,23 @@
        }
 
        /**
-        * @return String
-        */
-       public static function filterLimitReachedKey() {
-               return wfMemcKey( 'abusefilter', 'stats', 'overflow' );
-       }
-
-       /**
-        * @param string $group The filter's group (as defined in 
$wgAbuseFilterValidGroups)
-        * @return String
-        */
-       public static function filterUsedKey( $group = null ) {
-               return wfMemcKey( 'abusefilter', 'stats', 'total', $group );
-       }
-
-       /**
+        * Memcache access key used to store per-filter profiling data.
+        *
         * @param $filter
         * @return String
         */
-       public static function filterMatchesKey( $filter = null ) {
-               return wfMemcKey( 'abusefilter', 'stats', 'matches', $filter );
+       public static function filterProfileKey( $filter = null ) {
+               return wfMemcKey( 'abusefilter', 'profile', $filter );
+       }
+
+       /**
+        * Memcache access key used to store overall profiling data for rule 
groups
+        *
+        * @param $group
+        * @return String
+        */
+       public static function filterProfileGroupKey( $group = null ) {
+               return wfMemcKey( 'abusefilter', 'profile', 'group', $group );
        }
 
        /**
diff --git a/Views/AbuseFilterViewEdit.php b/Views/AbuseFilterViewEdit.php
index 7f7c634..a873f19 100644
--- a/Views/AbuseFilterViewEdit.php
+++ b/Views/AbuseFilterViewEdit.php
@@ -393,16 +393,13 @@
 
                if ( $filter !== 'new' ) {
                        // Statistics
-                       global $wgMemc;
-                       $matches_count = $wgMemc->get( 
AbuseFilter::filterMatchesKey( $filter ) );
-                       $total = $wgMemc->get( AbuseFilter::filterUsedKey( 
$row->af_group ) );
+                       list( $totalCount, $matchesCount, $avgTime, $avgCond ) 
= AbuseFilter::getFilterProfile( $filter );
 
-                       if ( $total > 0 ) {
-                               $matches_percent = sprintf( '%.2f', 100 * 
$matches_count / $total );
-                               list( $timeProfile, $condProfile ) = 
AbuseFilter::getFilterProfile( $filter );
+                       if ( $totalCount > 0 ) {
+                               $matchesPercent = sprintf( '%.2f', 100 * 
$matchesCount / $totalCount );
 
                                $fields['abusefilter-edit-status-label'] = 
$this->msg( 'abusefilter-edit-status' )
-                                       ->numParams( $total, $matches_count, 
$matches_percent, $timeProfile, $condProfile )
+                                       ->numParams( $totalCount, 
$matchesCount, $matchesPercent, $avgTime, $avgCond )
                                        ->escaped();
                        }
                }
diff --git a/Views/AbuseFilterViewList.php b/Views/AbuseFilterViewList.php
index 0ccaa73..73d2a34 100644
--- a/Views/AbuseFilterViewList.php
+++ b/Views/AbuseFilterViewList.php
@@ -162,28 +162,37 @@
                $this->getOutput()->addHTML( $output );
        }
 
+       /**
+        *      Generates a summary of filter activity using the internal 
statistics.
+        */
        function showStatus() {
                global $wgMemc, $wgAbuseFilterConditionLimit, 
$wgAbuseFilterValidGroups;
 
-               $overflow_count = (int)$wgMemc->get( 
AbuseFilter::filterLimitReachedKey() );
-               $match_count = (int) $wgMemc->get( 
AbuseFilter::filterMatchesKey() );
-               $total_count = 0;
+               $totalCount = 0;
+               $matchCount = 0;
+               $overflowCount = 0;
+
                foreach ( $wgAbuseFilterValidGroups as $group ) {
-                       $total_count += (int)$wgMemc->get( 
AbuseFilter::filterUsedKey( $group ) );
+                       $profile = $wgMemc->get( 
AbuseFilter::filterProfileGroupKey( $group ) );
+                       if ( $profile !== false ) {
+                               $totalCount += (int)$profile[ 'total' ];
+                               $overflowCount += (int)$profile[ 'overflow' ];
+                               $matchCount += (int)$profile[ 'matches' ];
+                       }
                }
 
-               if ( $total_count > 0 ) {
-                       $overflow_percent = sprintf( "%.2f", 100 * 
$overflow_count / $total_count );
-                       $match_percent = sprintf( "%.2f", 100 * $match_count / 
$total_count );
+               if ( $totalCount > 0 ) {
+                       $overflowPercent = sprintf( "%.2f", 100 * 
$overflowCount / $totalCount );
+                       $matchPercent = sprintf( "%.2f", 100 * $matchCount / 
$totalCount );
 
                        $status = $this->msg( 'abusefilter-status' )
                                ->numParams(
-                                       $total_count,
-                                       $overflow_count,
-                                       $overflow_percent,
+                                       $totalCount,
+                                       $overflowCount,
+                                       $overflowPercent,
                                        $wgAbuseFilterConditionLimit,
-                                       $match_count,
-                                       $match_percent
+                                       $matchCount,
+                                       $matchPercent
                                )->parse();
 
                        $status = Xml::tags( 'div', array( 'class' => 
'mw-abusefilter-status' ), $status );

-- 
To view, visit https://gerrit.wikimedia.org/r/201104
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib12e072a245fcad93c6c6bd452041f3441f68bb7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/AbuseFilter
Gerrit-Branch: master
Gerrit-Owner: Dragons flight <ro...@robertrohde.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to