jenkins-bot has submitted this change and it was merged.
Change subject: Write purge script; decrease brokenness
......................................................................
Write purge script; decrease brokenness
Also,
* Refer to article revisions, not recent changes ID.
* Fix ORES base URL--seems https redirects to http.
TODO:
* CheckModelVersions should trigger the purge job when an update is
detected.
Bug: T112856
Change-Id: Icaef8ae2934f766f30ff80b342a2c341887d594a
---
M includes/Cache.php
M includes/FetchScoreJob.php
M includes/Hooks.php
M includes/Scoring.php
M maintenance/CheckModelVersions.php
A maintenance/PurgeScoreCache.php
6 files changed, 124 insertions(+), 19 deletions(-)
Approvals:
Legoktm: Looks good to me, approved
jenkins-bot: Verified
diff --git a/includes/Cache.php b/includes/Cache.php
index fa3e00d..2b9a401 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -1,19 +1,20 @@
<?php
-
namespace ORES;
use RuntimeException;
class Cache {
+ static protected $modelVersions;
+
/**
* Save scores to the database
*
* @param array $scores in the same structure as is returned by ORES.
- * @param integer $rcid Recent changes ID
+ * @param integer $revid Revision ID
*
* @throws RuntimeException
*/
- public function storeScores( $scores, $rcid ) {
+ public function storeScores( $scores, $revid ) {
// Map to database fields.
$dbData = array();
foreach ( $scores as $revision => $revisionData ) {
@@ -30,11 +31,13 @@
$prediction = 'true';
}
+ $modelVersion = $this->getModelVersion( $model
);
+
foreach ( $modelOutputs['probability'] as
$class => $probability ) {
$dbData[] = array(
- 'ores_rc' => $rcid,
+ 'ores_rev' => $revid,
'ores_model' => $model,
- 'ores_model_version' => 0, //
FIXME: waiting for API support
+ 'ores_model_version' =>
$modelVersion,
'ores_class' => $class,
'ores_probability' =>
$probability,
'ores_is_predicted' => (
$prediction === $class ),
@@ -46,14 +49,77 @@
wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData,
__METHOD__ );
}
- public function purge( $model, $version ) {
- wfGetDb( DB_MASTER )->delete( 'ores_classification',
- array(
- 'ores_model' => $model,
- 'ores_model_version' => $version,
- ),
+ /**
+ * Delete cached scores
+ *
+ * Normally, we'll only delete scores from out-of-date model versions.
+ *
+ * @param string $model Model name.
+ * @param bool $isEverything When true, delete scores with the
up-to-date
+ * model version as well. This can be used in cases where the old data
is
+ * bad, but no new model has been released yet.
+ * @param integer $batchSize Maximum number of records to delete per
loop.
+ * Note that this function runs multiple batches, until all records are
deleted.
+ */
+ public function purge( $model, $isEverything, $batchSize = 1000 ) {
+ $dbr = wfGetDb( DB_SLAVE );
+ $dbw = wfGetDb( DB_MASTER );
+
+ $conditions = array(
+ 'ores_model' => $model,
+ );
+ if ( !$isEverything ) {
+ $currentModelVersion = $this->getModelVersion( $model );
+ $conditions[] = 'ores_model_version != ' .
$dbr->addQuotes( $currentModelVersion );
+ }
+
+ do {
+ $ids = $dbr->selectFieldValues( 'ores_classification',
+ 'ores_rev',
+ $conditions,
+ __METHOD__,
+ array( 'LIMIT' => $batchSize )
+ );
+ if ( $ids ) {
+ $dbw->delete( 'ores_classification',
+ array( 'ores_rev' => $ids ),
+ __METHOD__
+ );
+ wfWaitForSlaves();
+ }
+ } while ( $ids );
+ }
+
+ /**
+ * @param string $model
+ *
+ * @return string cached last seen version
+ */
+ protected function getModelVersion( $model ) {
+ if ( isset( self::$modelVersions[$model] ) ) {
+ return self::$modelVersions[$model];
+ }
+
+ $modelVersion = wfGetDb( DB_SLAVE )->selectField( 'ores_model',
+ 'ores_model_version',
+ array( 'ores_model' => $model ),
__METHOD__
);
+ if ( $modelVersion === false ) {
+ throw new RuntimeException( "No model version available
for [{$model}]" );
+ }
+
+ self::$modelVersions[$model] = $modelVersion;
+ return $modelVersion;
+ }
+
+ public function getModels() {
+ $models = wfGetDb( DB_SLAVE )->selectFieldValues( 'ores_model',
+ 'ores_model',
+ array(),
+ __METHOD__
+ );
+ return $models;
}
public static function instance() {
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index 7e9e50c..7c10cd9 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -1,5 +1,4 @@
<?php
-
namespace ORES;
use Job;
@@ -8,7 +7,7 @@
class FetchScoreJob extends Job {
/**
* @param Title $title
- * @param array $params 'rcid' and 'revid' keys
+ * @param array $params 'revid' key
*/
public function __construct( Title $title, array $params ) {
parent::__construct( 'ORESFetchScoreJob', $title, $params );
@@ -16,8 +15,9 @@
public function run() {
$scores = Scoring::instance()->getScores(
$this->params['revid'] );
- Cache::instance()->storeScores( $scores, $this->params['rcid']
);
+ Cache::instance()->storeScores( $scores, $this->params['revid']
);
+ // TODO: Or do we have to try/catch and return false on error,
set the error string, etc?
return true;
}
}
diff --git a/includes/Hooks.php b/includes/Hooks.php
index f9865e6..5b7c740 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -24,7 +24,6 @@
public static function onRecentChange_save( RecentChange $rc ) {
if ( $rc->getAttribute( 'rc_type' ) === RC_EDIT ) {
$job = new FetchScoreJob( $rc->getTitle(), array(
- 'rcid' => $rc->getAttribute( 'rc_id' ),
'revid' => $rc->getAttribute( 'rc_this_oldid' ),
) );
JobQueueGroup::singleton()->push( $job );
diff --git a/includes/Scoring.php b/includes/Scoring.php
index 5b801d5..1c9d517 100644
--- a/includes/Scoring.php
+++ b/includes/Scoring.php
@@ -1,5 +1,4 @@
<?php
-
namespace ORES;
use FormatJson;
@@ -10,7 +9,7 @@
protected function getScoresUrl( $revisions, $models ) {
global $wgOresBaseUrl;
- $url = $wgOresBaseUrl . 'scores/' . wfWikiID();
+ $url = $wgOresBaseUrl . 'scores/' . wfWikiID() . '/';
$params = array(
'models' => implode( '|', (array) $models ),
'revids' => implode( '|', (array) $revisions ),
@@ -34,13 +33,13 @@
$req = MWHttpRequest::factory( $url, null, __METHOD__ );
$status = $req->execute();
if ( !$status->isOK() ) {
- throw new RuntimeException( "No response from ORES
server at $url, "
+ throw new RuntimeException( "No response from ORES
server [{$url}], "
. $status->getMessage()->text() );
}
$json = $req->getContent();
$wireData = FormatJson::decode( $json, true );
if ( !$wireData || !empty( $wireData['error'] ) ) {
- throw new RuntimeException( 'Bad response from ORES
server: ' . $json );
+ throw new RuntimeException( "Bad response from ORES
endpoint [{$url}]: {$json}" );
}
return $wireData;
diff --git a/maintenance/CheckModelVersions.php
b/maintenance/CheckModelVersions.php
index 3ecd7b3..59ff694 100644
--- a/maintenance/CheckModelVersions.php
+++ b/maintenance/CheckModelVersions.php
@@ -29,6 +29,9 @@
}
}
+ /**
+ * Return a list of models available for this wiki.
+ */
protected function getModels() {
global $wgOresBaseUrl;
diff --git a/maintenance/PurgeScoreCache.php b/maintenance/PurgeScoreCache.php
new file mode 100644
index 0000000..bf28d39
--- /dev/null
+++ b/maintenance/PurgeScoreCache.php
@@ -0,0 +1,38 @@
+<?php
+
+use ORES\Cache;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+ ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+ : __DIR__ . '/../../../maintenance/Maintenance.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class PurgeScoreCache extends Maintenance {
+ public function __construct() {
+ parent::__construct();
+
+ $this->addDescription( 'Purge out of date (or all) ORES model
results' );
+
+ $this->addOption( 'model', 'Model name (optional)', false, true
);
+ $this->addOption( 'all', 'Flag to indicate that we want to
clear all data, ' .
+ 'even those from the most recent model', false, false );
+ }
+
+ public function execute() {
+ if ( $this->hasOption( 'model' ) ) {
+ $models = array( $this->getOption( 'model' ) );
+ } else {
+ $models = Cache::instance()->getModels();
+ }
+
+ foreach ( $models as $model ) {
+ Cache::instance()->purge( $model, $this->hasOption(
'all' ) );
+ }
+ // @todo this script needs some output
+ }
+}
+
+$maintClass = 'PurgeScoreCache';
+require_once RUN_MAINTENANCE_IF_MAIN;
--
To view, visit https://gerrit.wikimedia.org/r/247034
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icaef8ae2934f766f30ff80b342a2c341887d594a
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
Gerrit-Reviewer: Halfak <[email protected]>
Gerrit-Reviewer: He7d3r <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Paladox <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits