Gergő Tisza has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/349946 )

Change subject: Add ability to purge old rows to PurgeScoreCache
......................................................................

Add ability to purge old rows to PurgeScoreCache

This allows PurgeScoreCache to be used as a cron job to limit
scores to those revisions which are still in recentchanges.

Each deletion batch does a left join on recentchanges which is
probably not the most efficient but seems good enough for a
maintenance script (P5319; 30ms/batch means 90M rows would
take roughly an hour).

Also add some logging per the TODO comment.

Bug: T159753
Change-Id: Id35bca820822dc46caa5adf6450c81871465abc9
---
M includes/Cache.php
M maintenance/PurgeScoreCache.php
2 files changed, 69 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES 
refs/changes/46/349946/1

diff --git a/includes/Cache.php b/includes/Cache.php
index 4513902..de299e5 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -49,25 +49,63 @@
         *
         * @param string $model Model name.
         * @param bool $isEverything When true, delete scores with the 
up-to-date
-        * model version as well.  This can be used in cases where the old data 
is
-        * bad, but no new model has been released yet.
+        *   model version as well.  This can be used in cases where the old 
data is
+        *   bad, but no new model has been released yet.
         * @param integer $batchSize Maximum number of records to delete per 
loop.
-        * Note that this function runs multiple batches, until all records are 
deleted.
+        *   Note that this function runs multiple batches, until all records 
are deleted.
+        * @return int The number of deleted rows
         */
        public function purge( $model, $isEverything, $batchSize = 1000 ) {
-               $dbr = \wfGetDB( DB_REPLICA );
-               $dbw = \wfGetDB( DB_MASTER );
-
                $tables = [ 'ores_classification', 'ores_model' ];
-
-               $join_conds = [ 'ores_model' =>
-                       [ 'LEFT JOIN', 'oresm_id = oresc_model' ] ];
+               $join_conds = [
+                       'ores_model' => [ 'LEFT JOIN', 'oresm_id = oresc_model' 
],
+               ];
                $conditions = [
                        'oresm_name' => [ $model, null ],
                ];
                if ( !$isEverything ) {
                        $conditions[] = '(oresm_is_current != 1 OR 
oresm_is_current IS NULL)';
                }
+               return $this->deleteRows( $tables, $conditions, $join_conds );
+       }
+
+       /**
+        * Delete old cached scores.
+        * A score is old of the corresponding revision is not in the 
recentchanges table.
+        * @param string $model Model name.
+        * @param integer $batchSize Maximum number of records to delete per 
loop.
+        *   Note that this function runs multiple batches, until all records 
are deleted.
+        * @return int The number of deleted rows
+        */
+       public function purgeOld( $model, $batchSize = 1000 ) {
+               $tables = [ 'ores_classification', 'ores_model', 
'recentchanges' ];
+               $join_conds = [
+                       'ores_model' => [ 'LEFT JOIN', 'oresm_id = oresc_model' 
],
+                       'recentchanges' => [ 'LEFT JOIN', 'oresc_rev = 
rc_this_oldid' ],
+               ];
+               $conditions = [
+                       'oresm_name' => [ $model, null ],
+                       'rc_this_oldid' => null,
+               ];
+               return $this->deleteRows( $tables, $conditions, $join_conds );
+       }
+
+       /**
+        * Delete cached scores. Which rows to delete is given by 
Database::select parameters.
+        *
+        * @param array $tables
+        * @param array $conditions
+        * @param array $join_conds
+        * @param integer $batchSize Maximum number of records to delete per 
loop.
+        *   Note that this function runs multiple batches, until all records 
are deleted.
+        * @return int The number of deleted rows
+        * @see Database::select
+        */
+       protected function deleteRows( $tables, $conditions, $join_conds, 
$batchSize = 1000 ) {
+               $dbr = \wfGetDB( DB_REPLICA );
+               $dbw = \wfGetDB( DB_MASTER );
+
+               $deletedRows = 0;
 
                do {
                        $ids = $dbr->selectFieldValues( $tables,
@@ -82,9 +120,12 @@
                                        [ 'oresc_id' => $ids ],
                                        __METHOD__
                                );
+                               $deletedRows += $dbw->affectedRows();
                                
MediaWikiServices::getInstance()->getDBLoadBalancerFactory()->waitForReplication();
                        }
                } while ( $ids );
+
+               return $deletedRows;
        }
 
        /**
diff --git a/maintenance/PurgeScoreCache.php b/maintenance/PurgeScoreCache.php
index ce318ee..a3dca27 100644
--- a/maintenance/PurgeScoreCache.php
+++ b/maintenance/PurgeScoreCache.php
@@ -21,6 +21,8 @@
                $this->addOption( 'model', 'Model name (optional)', false, true 
);
                $this->addOption( 'all', 'Flag to indicate that we want to 
clear all data, ' .
                        'even those from the most recent model', false, false );
+               $this->addOption( 'old', 'Flag to indicate that we only want to 
clear old data ' .
+                       'that is not in recent changes anymore. Implicitly 
assumes --all.', false, false );
        }
 
        public function execute() {
@@ -30,10 +32,25 @@
                        $models = Cache::instance()->getModels();
                }
 
+               $this->output( "Purging ORES scores:\n" );
                foreach ( $models as $model ) {
-                       Cache::instance()->purge( $model, $this->hasOption( 
'all' ) );
+                       if ( $this->hasOption( 'old' ) ) {
+                               $deletedRows = Cache::instance()->purgeOld( 
$model );
+                               $description = 'old rows';
+                       } elseif ( $this->hasOption( 'all' ) ) {
+                               $deletedRows = Cache::instance()->purge( 
$model, true );
+                               $description = 'old model versions';
+                       } else {
+                               $deletedRows = Cache::instance()->purge( 
$model, false );
+                               $description = 'all rows';
+                       }
+                       if ( $deletedRows ) {
+                               $this->output( "   ...purging $description from 
'$model' model': deleted $deletedRows rows\n" );
+                       } else {
+                               $this->output( "   ...skipping '$model' model, 
no action needed\n" );
+                       }
                }
-               // @todo this script needs some output
+               $this->output( "   done.\n" );
        }
 
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/349946
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Id35bca820822dc46caa5adf6450c81871465abc9
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: GergÅ‘ Tisza <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to