jenkins-bot has submitted this change and it was merged.

Change subject: Write purge script; decrease brokenness
......................................................................


Write purge script; decrease brokenness

Also,
* Refer to article revisions, not recent changes ID.
* Fix ORES base URL--seems https redirects to http.

TODO:
* CheckModelVersions should trigger the purge job when an update is
detected.

Bug: T112856
Change-Id: Icaef8ae2934f766f30ff80b342a2c341887d594a
---
M includes/Cache.php
M includes/FetchScoreJob.php
M includes/Hooks.php
M includes/Scoring.php
M maintenance/CheckModelVersions.php
A maintenance/PurgeScoreCache.php
6 files changed, 124 insertions(+), 19 deletions(-)

Approvals:
  Legoktm: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/Cache.php b/includes/Cache.php
index fa3e00d..2b9a401 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -1,19 +1,20 @@
 <?php
-
 namespace ORES;
 
 use RuntimeException;
 
 class Cache {
+       static protected $modelVersions;
+
        /**
         * Save scores to the database
         *
         * @param array $scores in the same structure as is returned by ORES.
-        * @param integer $rcid Recent changes ID
+        * @param integer $revid Revision ID
         *
         * @throws RuntimeException
         */
-       public function storeScores( $scores, $rcid ) {
+       public function storeScores( $scores, $revid ) {
                // Map to database fields.
                $dbData = array();
                foreach ( $scores as $revision => $revisionData ) {
@@ -30,11 +31,13 @@
                                        $prediction = 'true';
                                }
 
+                               $modelVersion = $this->getModelVersion( $model 
);
+
                                foreach ( $modelOutputs['probability'] as 
$class => $probability ) {
                                        $dbData[] = array(
-                                               'ores_rc' => $rcid,
+                                               'ores_rev' => $revid,
                                                'ores_model' => $model,
-                                               'ores_model_version' => 0, // 
FIXME: waiting for API support
+                                               'ores_model_version' => 
$modelVersion,
                                                'ores_class' => $class,
                                                'ores_probability' => 
$probability,
                                                'ores_is_predicted' => ( 
$prediction === $class ),
@@ -46,14 +49,77 @@
                wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData, 
__METHOD__ );
        }
 
-       public function purge( $model, $version ) {
-               wfGetDb( DB_MASTER )->delete( 'ores_classification',
-                       array(
-                               'ores_model' => $model,
-                               'ores_model_version' => $version,
-                       ),
+       /**
+        * Delete cached scores
+        *
+        * Normally, we'll only delete scores from out-of-date model versions.
+        *
+        * @param string $model Model name.
+        * @param bool $isEverything When true, delete scores with the 
up-to-date
+        * model version as well.  This can be used in cases where the old data 
is
+        * bad, but no new model has been released yet.
+        * @param integer $batchSize Maximum number of records to delete per 
loop.
+        * Note that this function runs multiple batches, until all records are 
deleted.
+        */
+       public function purge( $model, $isEverything, $batchSize = 1000 ) {
+               $dbr = wfGetDb( DB_SLAVE );
+               $dbw = wfGetDb( DB_MASTER );
+
+               $conditions = array(
+                       'ores_model' => $model,
+               );
+               if ( !$isEverything ) {
+                       $currentModelVersion = $this->getModelVersion( $model );
+                       $conditions[] = 'ores_model_version != ' . 
$dbr->addQuotes( $currentModelVersion );
+               }
+
+               do {
+                       $ids = $dbr->selectFieldValues( 'ores_classification',
+                               'ores_rev',
+                               $conditions,
+                               __METHOD__,
+                               array( 'LIMIT' => $batchSize )
+                       );
+                       if ( $ids ) {
+                               $dbw->delete( 'ores_classification',
+                                       array( 'ores_rev' => $ids ),
+                                       __METHOD__
+                               );
+                               wfWaitForSlaves();
+                       }
+               } while ( $ids );
+       }
+
+       /**
+        * @param string $model
+        *
+        * @return string cached last seen version
+        */
+       protected function getModelVersion( $model ) {
+               if ( isset( self::$modelVersions[$model] ) ) {
+                       return self::$modelVersions[$model];
+               }
+
+               $modelVersion = wfGetDb( DB_SLAVE )->selectField( 'ores_model',
+                       'ores_model_version',
+                       array( 'ores_model' => $model ),
                        __METHOD__
                );
+               if ( $modelVersion === false ) {
+                       throw new RuntimeException( "No model version available 
for [{$model}]" );
+               }
+
+               self::$modelVersions[$model] = $modelVersion;
+               return $modelVersion;
+       }
+
+       public function getModels() {
+               $models = wfGetDb( DB_SLAVE )->selectFieldValues( 'ores_model',
+                       'ores_model',
+                       array(),
+                       __METHOD__
+               );
+               return $models;
        }
 
        public static function instance() {
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index 7e9e50c..7c10cd9 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -1,5 +1,4 @@
 <?php
-
 namespace ORES;
 
 use Job;
@@ -8,7 +7,7 @@
 class FetchScoreJob extends Job {
        /**
         * @param Title $title
-        * @param array $params 'rcid' and 'revid' keys
+        * @param array $params 'revid' key
         */
        public function __construct( Title $title, array $params ) {
                parent::__construct( 'ORESFetchScoreJob', $title, $params );
@@ -16,8 +15,9 @@
 
        public function run() {
                $scores = Scoring::instance()->getScores( 
$this->params['revid'] );
-               Cache::instance()->storeScores( $scores, $this->params['rcid'] 
);
+               Cache::instance()->storeScores( $scores, $this->params['revid'] 
);
 
+               // TODO: Or do we have to try/catch and return false on error, 
set the error string, etc?
                return true;
        }
 }
diff --git a/includes/Hooks.php b/includes/Hooks.php
index f9865e6..5b7c740 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -24,7 +24,6 @@
        public static function onRecentChange_save( RecentChange $rc ) {
                if ( $rc->getAttribute( 'rc_type' ) === RC_EDIT ) {
                        $job = new FetchScoreJob( $rc->getTitle(), array(
-                               'rcid' => $rc->getAttribute( 'rc_id' ),
                                'revid' => $rc->getAttribute( 'rc_this_oldid' ),
                        ) );
                        JobQueueGroup::singleton()->push( $job );
diff --git a/includes/Scoring.php b/includes/Scoring.php
index 5b801d5..1c9d517 100644
--- a/includes/Scoring.php
+++ b/includes/Scoring.php
@@ -1,5 +1,4 @@
 <?php
-
 namespace ORES;
 
 use FormatJson;
@@ -10,7 +9,7 @@
        protected function getScoresUrl( $revisions, $models ) {
                global $wgOresBaseUrl;
 
-               $url = $wgOresBaseUrl . 'scores/' . wfWikiID();
+               $url = $wgOresBaseUrl . 'scores/' . wfWikiID() . '/';
                $params = array(
                        'models' => implode( '|', (array) $models ),
                        'revids' => implode( '|', (array) $revisions ),
@@ -34,13 +33,13 @@
                $req = MWHttpRequest::factory( $url, null, __METHOD__ );
                $status = $req->execute();
                if ( !$status->isOK() ) {
-                       throw new RuntimeException( "No response from ORES 
server at $url, "
+                       throw new RuntimeException( "No response from ORES 
server [{$url}], "
                                .  $status->getMessage()->text() );
                }
                $json = $req->getContent();
                $wireData = FormatJson::decode( $json, true );
                if ( !$wireData || !empty( $wireData['error'] ) ) {
-                       throw new RuntimeException( 'Bad response from ORES 
server: ' . $json );
+                       throw new RuntimeException( "Bad response from ORES 
endpoint [{$url}]: {$json}" );
                }
 
                return $wireData;
diff --git a/maintenance/CheckModelVersions.php 
b/maintenance/CheckModelVersions.php
index 3ecd7b3..59ff694 100644
--- a/maintenance/CheckModelVersions.php
+++ b/maintenance/CheckModelVersions.php
@@ -29,6 +29,9 @@
                }
        }
 
+       /**
+        * Return a list of models available for this wiki.
+        */
        protected function getModels() {
                global $wgOresBaseUrl;
 
diff --git a/maintenance/PurgeScoreCache.php b/maintenance/PurgeScoreCache.php
new file mode 100644
index 0000000..bf28d39
--- /dev/null
+++ b/maintenance/PurgeScoreCache.php
@@ -0,0 +1,38 @@
+<?php
+
+use ORES\Cache;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+       ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+       : __DIR__ . '/../../../maintenance/Maintenance.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class PurgeScoreCache extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+
+               $this->addDescription( 'Purge out of date (or all) ORES model 
results' );
+
+               $this->addOption( 'model', 'Model name (optional)', false, true 
);
+               $this->addOption( 'all', 'Flag to indicate that we want to 
clear all data, ' .
+                       'even those from the most recent model', false, false );
+       }
+
+       public function execute() {
+               if ( $this->hasOption( 'model' ) ) {
+                       $models = array( $this->getOption( 'model' ) );
+               } else {
+                       $models = Cache::instance()->getModels();
+               }
+
+               foreach ( $models as $model ) {
+                       Cache::instance()->purge( $model, $this->hasOption( 
'all' ) );
+               }
+               // @todo this script needs some output
+       }
+}
+
+$maintClass = 'PurgeScoreCache';
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/247034
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Icaef8ae2934f766f30ff80b342a2c341887d594a
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
Gerrit-Reviewer: Halfak <[email protected]>
Gerrit-Reviewer: He7d3r <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Paladox <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to