Revision: 46040
Author:   werdna
Date:     2009-01-22 23:11:11 +0000 (Thu, 22 Jan 2009)

Log Message:
-----------
MAJOR refactoring of AbuseFilter class, including fixing a few bugs which I 
don't remember right now.

Mostly, a general cleanup of some code that was written in a really ugly way, 
and splitting of huge methods into five or six functions to make a much more 
logical flow.

Modified Paths:
--------------
    branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php

Modified: branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php
===================================================================
--- branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php        
2009-01-22 23:05:31 UTC (rev 46039)
+++ branches/change-tagging/extensions/AbuseFilter/AbuseFilter.class.php        
2009-01-22 23:11:11 UTC (rev 46040)
@@ -149,134 +149,160 @@
                
                return $result;
        }
-       
-       public static function filterAction( $vars, $title ) {
-               global $wgUser,$wgMemc;
-               
+
+       /** Returns an associative array of filters which were tripped */
+       public static function checkAllFilters( $vars ) {
                // Fetch from the database.
                $dbr = wfGetDB( DB_SLAVE );
                $res = $dbr->select( 'abuse_filter', '*', array( 'af_enabled' 
=> 1, 'af_deleted' => 0 ) );
-               
-               $blocking_filters = array();
-               $log_entries = array();
-               $log_template = array( 'afl_user' => $wgUser->getId(), 
'afl_user_text' => $wgUser->getName(),
-                                       'afl_var_dump' => serialize( $vars ), 
'afl_timestamp' => $dbr->timestamp(wfTimestampNow()),
-                                       'afl_namespace' => 
$title->getNamespace(), 'afl_title' => $title->getDBKey(), 'afl_ip' => 
wfGetIp() );
-               $doneActionsByFilter = array();
-               $filter_matched = array();
-               
+
                while ( $row = $dbr->fetchObject( $res ) ) {
+                       // Store the row somewhere convenient
                        self::$filters[$row->af_id] = $row;
+
+                       // Check conditions...
                        $pattern = trim($row->af_pattern);
                        if ( self::checkConditions( $pattern, $vars ) ) {
-                               $blocking_filters[$row->af_id] = $row;
-                               
-                               $newLog = $log_template;
-                               $newLog['afl_filter'] = $row->af_id;
-                               $newLog['afl_action'] = $vars['ACTION'];
-
-                               $log_entries[] = $newLog;
-                               
-                               $doneActionsByFilter[$row->af_id] = array();
+                               // Record match.
                                $filter_matched[$row->af_id] = true;
                        } else {
+                               // Record non-match.
                                $filter_matched[$row->af_id] = false;
                        }
                }
-               
-               //// Clean up from checking all the filters
-       
+
+               // Update statistics, and disable filters which are 
over-blocking.
                self::recordStats( $filter_matched );
-               
-               if (count($blocking_filters) == 0 ) {
-                       // No problems.
-                       return true;
-               }
-               
+
+               return $filter_matched;
+       }
+
+       /** Returns an array [ list of actions taken by filter, error message 
to display, if any ] */
+       public static function executeFilterActions( $filters, $title, $vars ) {
+               $dbr = wfGetDB( DB_SLAVE );
                // Retrieve the consequences.
-               $res = $dbr->select( 'abuse_filter_action', '*', array( 
'afa_filter' => array_keys( $blocking_filters ) ), __METHOD__, array( "ORDER 
BY" => " (afa_consequence in ('throttle','warn'))-(afa_consequence in 
('disallow')) desc" ) );
-               // We want throttles, warnings first, as they have a bit of a 
special treatment. We want disallow last, so that it can be "eaten" by other 
actions.
-               
-               $actions_done = array();
-               $throttled_filters = array();
-               
-               $display = '';
-               
+               $res = $dbr->select( 'abuse_filter_action', '*', array( 
'afa_filter' => $filters ), __METHOD__ );
+
+               $actionsByFilter = array_fill_keys( $filters, array() );
+               $actionsTaken = array_fill_keys( $filters, array() );
+
+               // Categorise consequences by filter.
                while ( $row = $dbr->fetchObject( $res ) ) {
-                       // Don't do the same action-parameters twice
-                       $action_key = md5( $row->afa_consequence . 
$row->afa_parameters );
-                       
-                       // Skip if we've already done this action-parameter, or 
a passive action has sufficed.
-                       $skipAction = ( in_array( $action_key, $actions_done ) 
|| in_array( $row->afa_filter, $throttled_filters ) );
-                       
-                       // Don't disallow if we've already done something 
active. It produces two messages, where one would suffice.
-                       if ($row->afa_consequence == 'disallow' && 
!$skipAction) {
-                               $doneActiveActions = array_diff( 
$doneActionsByFilter[$row->afa_filter], array( 'throttle', 'warn' /* passive 
actions */ ) );
-                               $skipAction = (bool)count($doneActiveActions);
+                       
$actionsByFilter[$row->afa_filter][$row->afa_consequence] = array( 'action' => 
$row->afa_consequence, 'parameters' => explode( "\n", $row->afa_parameters ) );
+               }
+
+               wfLoadExtensionMessages( 'AbuseFilter' );
+
+               $messages = array();
+
+               foreach( $actionsByFilter as $filter => $actions ) {
+                       // Special-case handling for warnings.
+                       if ( !empty( $actions['warn'] ) ) {
+                               $parameters = $actions['warn']['parameters'];
+                               $warnKey = 
'abusefilter-warned-'.$title->getPrefixedText();
+                               if (!isset($_SESSION[$warnKey]) || 
!$_SESSION[$warnKey]) {
+                                       $_SESSION[$warnKey] = true;
+
+                                       // Threaten them a little bit
+                                       $msg = ( !empty($parameters[0]) && 
strlen($parameters[0]) ) ? $parameters[0] : 'abusefilter-warning';
+                                       $messages[] = wfMsgNoTrans( $msg, 
self::$filters[$filter]->af_public_comments ) . "<br />\n";
+
+                                       $actionsTaken[] = 'warn';
+
+                                       continue; // Don't do anything else.
+                               } else {
+                                       // We already warned them
+                                       $_SESSION[$warnKey] = false;
+                               }
+                               
+                               unset( $actions['warn'] );
                        }
-                       
-                       if ( !$skipAction ) {
-                               // Unpack parameters
-                               $parameters = explode( "\n", 
$row->afa_parameters );
-                               
-                               // Take the action.
-                               $result = self::takeConsequenceAction( 
$row->afa_consequence, $parameters, $title, $vars, $display, $continue, 
$blocking_filters[$row->afa_filter]->af_public_comments );
-                               
-                               // Don't do it twice.
-                               $doneActionsByFilter[$row->afa_filter][] = 
$row->afa_consequence;
-                               $actions_done[] = $action_key;
-                               
-                               // Only execute other actions for a filter if 
that filter's rate limiter has been tripped.
-                               if (!$result) {
-                                       $throttled_filters[] = $row->afa_filter;
+
+                       if ( !empty( $actions['throttle'] ) ) {
+                               $parameters = 
$actions['throttle']['parameters'];
+                               $throttleId = array_shift( $parameters );
+                               list( $rateCount, $ratePeriod ) = explode( ',', 
array_shift( $parameters ) );
+
+                               $hitThrottle = false;
+
+                               // The rest are throttle-types.
+                               foreach( $parameters as $throttleType ) {
+                                       $hitThrottle = $hitThrottle || 
self::isThrottled( $throttleId, $throttleType, $title, $rateCount, $ratePeriod 
);
                                }
-                       } else {
-                               // Ignore it, until we hit the rate limit.
+
+                               unset( $actions['throttle'] );
+                               if (!$hitThrottle) {
+                                       $actionsTaken[] = 'throttle';
+                                       continue;
+                               }
                        }
+
+                       // Do the rest of the actions
+                       foreach( $actions as $action => $info ) {
+                               $newMsg = self::takeConsequenceAction( $action, 
$info['parameters'], $title, $vars, self::$filters[$filter]->af_public_comments 
);
+
+                               if ($newMsg)
+                                       $messages[] = $newMsg;
+                               $actionsTaken[$filter][] = $action;
+                       }
                }
+
+               return array( $actionsTaken, implode( "\n", $messages ) );
+       }
+       
+       public static function filterAction( $vars, $title ) {
+               global $wgUser,$wgMemc;
+
+               $dbr = wfGetDB( DB_SLAVE );
+
+               $filter_matched = self::checkAllFilters( $vars );
+
+               // Short-cut any remaining code if no filters were hit.
+               if ( count( array_filter( $filter_matched) ) == 0 ) {
+                       die( var_dump( $filter_matched ) );
+                       return true;
+               }
+
+               list( $actions_taken, $error_msg ) = 
self::executeFilterActions( array_keys( array_filter( $filter_matched ) ), 
$title, $vars );
+
+               // Create a template
+               $log_template = array( 'afl_user' => $wgUser->getId(), 
'afl_user_text' => $wgUser->getName(),
+                                       'afl_var_dump' => serialize( $vars ), 
'afl_timestamp' => $dbr->timestamp(wfTimestampNow()),
+                                       'afl_namespace' => 
$title->getNamespace(), 'afl_title' => $title->getDBKey(), 'afl_ip' => 
wfGetIp() );
+
+               self::addLogEntries( $actions_taken, $log_template, 
$vars['ACTION'] );
                
+               return $error_msg;
+       }
+
+       public static function addLogEntries( $actions_taken, $log_template, 
$action ) {
                $dbw = wfGetDB( DB_MASTER );
-               
-               // Log it
-               foreach( $log_entries as $index => $entry ) {
-                       $actions = $log_entries[$index]['afl_actions'] = 
implode( ',', $doneActionsByFilter[$entry['afl_filter']] );
-                       
-                       if ($actions == 'throttle') {
-                               // Just a throttle, don't record it.
-                               unset($log_entries[$index]);
+
+               $log_rows = array();
+
+               foreach( $actions_taken as $filter => $actions ) {
+                       $thisLog = $log_template;
+                       $thisLog['afl_filter'] = $filter;
+                       $thisLog['afl_action'] = $action;
+                       $thisLog['afl_actions'] = implode( ',', 
$actions_taken[$filter] );
+
+                       // Don't log if we were only throttling.
+                       if ($thisLog['afl_actions'] != 'throttle') {
+                               $log_rows[] = $thisLog;
                        }
-                       
-                       // Increment the hit counter
-                       $dbw->update( 'abuse_filter', array( 
'af_hit_count=af_hit_count+1' ), array( 'af_id' => $entry['afl_filter'] ), 
__METHOD__ );
                }
-               
-               if (!count($log_entries)) {
+
+               if (!count($log_rows)) {
                        return;
                }
-               
-               $dbw->insert( 'abuse_filter_log', $log_entries, __METHOD__ );
-               
-               return $display;
+
+               $dbw->insert( 'abuse_filter_log', $log_rows, __METHOD__ );
        }
        
-       public static function takeConsequenceAction( $action, $parameters, 
$title, $vars, &$display, &$continue, $rule_desc ) {
+       public static function takeConsequenceAction( $action, $parameters, 
$title, $vars, $rule_desc ) {
                wfLoadExtensionMessages( 'AbuseFilter' );
+               $display = '';
                switch ($action) {
-                       case 'warn':
-                               if (!isset($_SESSION['abusefilter-warned']) || 
!$_SESSION['abusefilter-warned']) {
-                                       $_SESSION['abusefilter-warned'] = true;
-                                       
-                                       // Threaten them a little bit
-                                       $msg = ( !empty($parameters[0]) && 
strlen($parameters[0]) ) ? $parameters[0] : 'abusefilter-warning';
-                                       $display .= wfMsgNoTrans( $msg, 
$rule_desc ) . "<br />\n";
-                                       
-                                       return false; // Don't apply the other 
stuff yet.
-                               } else {
-                                       // We already warned them
-                                       $_SESSION['abusefilter-warned'] = false;
-                               }
-                               break;
-                               
                        case 'disallow':
                                if (strlen($parameters[0])) {
                                        $display .= wfMsgNoTrans( 
$parameters[0], $rule_desc ) . "\n";
@@ -352,20 +378,6 @@
                                
                                $display .= wfMsgNoTrans( 
'abusefilter-blocked-display', $rule_desc ) ."<br />\n";
                                break;
-                       
-                       case 'throttle':
-                               $throttleId = array_shift( $parameters );
-                               list( $rateCount, $ratePeriod ) = explode( ',', 
array_shift( $parameters ) );
-                               
-                               $hitThrottle = false;
-                               
-                               // The rest are throttle-types.
-                               foreach( $parameters as $throttleType ) {
-                                       $hitThrottle = $hitThrottle || 
self::isThrottled( $throttleId, $throttleType, $title, $rateCount, $ratePeriod 
);
-                               }
-
-                               return $hitThrottle;
-                               break;
                        case 'degroup':
                                global $wgUser;
                                if (!$wgUser->isAnon()) {
@@ -418,7 +430,7 @@
                                break;
                }
                
-               return true;
+               return $display;
        }
        
        public static function isThrottled( $throttleId, $types, $title, 
$rateCount, $ratePeriod ) {
@@ -496,82 +508,81 @@
                global $wgAbuseFilterConditionLimit,$wgMemc;
                
                $blocking_filters = array_keys( array_filter( $filters ) );
-               
+
+               // Figure out if we've triggered overflows and blocks.
                $overflow_triggered = (self::$condCount > 
$wgAbuseFilterConditionLimit);
-               $filter_triggered = count($blocking_filters);
-               
+               $filter_triggered = count( $blocking_filters ) > 0;
+
+               // Store some keys...
                $overflow_key = self::filterLimitReachedKey();
+               $total_key = self::filterUsedKey();
                
-               $total_key = self::filterUsedKey();
                $total = $wgMemc->get( $total_key );
 
-               $storage_period = self::$statsStoragePeriod; // One day.
+               $storage_period = self::$statsStoragePeriod;
                
                if (!$total || $total > 1000) {
-                       $wgMemc->set( $total_key, 1, $storage_period );
-                       
-                       if ($overflow_triggered) {
-                               $wgMemc->set( $overflow_key, 1, $storage_period 
);
-                       } else {
-                               $wgMemc->set( $overflow_key, 0, $storage_period 
);
-                       }
-                       
-                       $anyMatch = false;
-                       
+                       // This is for if the total doesn't exist, or has gone 
past 1000.
+                       //  Recreate all the keys at the same time, so they 
expire together.
+                       $wgMemc->set( $total_key, 0, $storage_period );
+                       $wgMemc->set( $overflow_key, 0, $storage_period );
+
                        foreach( $filters as $filter => $matched ) {
-                               $filter_key = self::filterMatchesKey( $filter );
-                               if ($matched) {
-                                       $anyMatch = true;
-                                       $wgMemc->set( $filter_key, 1, 
$storage_period );
-                               } else {
-                                       $wgMemc->set( $filter_key, 0, 
$storage_period );
-                               }
+                               $wgMemc->set( self::filterMatchesKey( $filter 
), 0, $storage_period );
                        }
+                       $wgMemc->set( self::filterMatchesKey(), 0, 
$storage_period );
                        
-                       if ($anyMatch) {
-                               $wgMemc->set( self::filterMatchesKey(), 1, 
$storage_period );
-                       } else {
-                               $wgMemc->set( self::filterMatchesKey(), 0, 
$storage_period );
-                       }
-                       
                        return;
                }
-               
+
+               // Increment total
                $wgMemc->incr( $total_key );
-               
+
+               // Increment overflow counter, if our condition limit overflowed
                if ($overflow_triggered) {
                        $wgMemc->incr( $overflow_key );
                }
-               
-               $anyMatch = false;
-               
-               global $wgAbuseFilterEmergencyDisableThreshold, 
$wgAbuseFilterEmergencyDisableCount, $wgAbuseFilterEmergencyDisableAge;
-               
+
+               self::checkEmergencyDisable( $filters );
+
+               // Increment trigger counter
+               if ($filter_triggered) {
+                       $wgMemc->incr( self::filterMatchesKey() );
+               }
+
+               $dbw = wfGetDB( DB_MASTER );
+
+               // Update hit-counter.
+               $dbw->update( 'abuse_filter', array( 
'af_hit_count=af_hit_count+1' ), array( 'af_id' => array_keys( array_filter( 
$filters ) ) ), __METHOD__ );
+       }
+
+       public static function checkEmergencyDisable( $filters ) {
+               global $wgAbuseFilterEmergencyDisableThreshold, 
$wgAbuseFilterEmergencyDisableCount, $wgAbuseFilterEmergencyDisableAge, $wgMemc;
+
                foreach( $filters as $filter => $matched ) {
                        if ($matched) {
-                               $anyMatch = true;
-                               $match_count = $wgMemc->get( 
self::filterMatchesKey( $filter ) );
-                               
-                               if ($match_count > 0) {
+                               // Increment counter
+                               $matchCount = $wgMemc->get( 
self::filterMatchesKey( $filter ) );
+
+                               // Handle missing keys...
+                               if (!$matchCount) {
+                                       $wgMemc->set( self::filterMatchesKey( 
$filter ), 1, self::$statsStoragePeriod );
+                               } else {
                                        $wgMemc->incr( self::filterMatchesKey( 
$filter ) );
-                               } else {
-                                       $wgMemc->set( self::filterMatchesKey( 
$filter ), 1, self::$statsStoragePeriod );
                                }
-                               
+                               $matchCount++;
+                       
+                               // Figure out if the filter is subject to being 
deleted.
                                $filter_age = wfTimestamp( TS_UNIX, 
self::$filters[$filter]->af_timestamp );
                                $throttle_exempt_time = $filter_age + 
$wgAbuseFilterEmergencyDisableAge;
-                               
-                               if ($throttle_exempt_time > time() && 
$match_count > $wgAbuseFilterEmergencyDisableCount && ($match_count / $total) > 
$wgAbuseFilterEmergencyDisableThreshold) {
-                                       // More than X matches, constituting 
more than Y% of last Z edits. Disable it.
+
+                               if ($throttle_exempt_time > time() && 
$matchCount > $wgAbuseFilterEmergencyDisableCount && ($matchCount / $total) > 
$wgAbuseFilterEmergencyDisableThreshold) {
+                                       // More than 
$wgAbuseFilterEmergencyDisableCount matches, constituting more than 
$wgAbuseFilterEmergencyDisableThreshold (a fraction) of last few edits. Disable 
it.
                                        $dbw = wfGetDB( DB_MASTER );
                                        $dbw->update( 'abuse_filter', array( 
'af_enabled' => 0, 'af_throttled' => 1 ), array( 'af_id' => $filter ), 
__METHOD__ );
                                }
                        }
                }
-               
-               if ($anyMatch) {
-                       $wgMemc->incr( self::filterMatchesKey() );
-               }
        }
        
        public static function filterLimitReachedKey() {



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to