Awight has uploaded a new change for review.
https://gerrit.wikimedia.org/r/247034
Change subject: Decrease brokenness
......................................................................
Decrease brokenness
* Refer to article revisions, not recent changes ID.
* Fix cache purge script.
* Satisfy composer lint.
* Fix ORES base URL--seems https redirects to http.
TODO:
* Decide which code license we're using and set in composer.json
* CheckModelVersions should trigger the purge job when an update is detected.
Change-Id: Icaef8ae2934f766f30ff80b342a2c341887d594a
---
M composer.json
M extension.json
M includes/FetchScoreJob.php
M includes/Hooks.php
M includes/OresCache.php
M includes/OresScoring.php
M maintenance/CheckModelVersions.php
M maintenance/PurgeScoreCache.php
8 files changed, 137 insertions(+), 93 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES
refs/changes/34/247034/1
diff --git a/composer.json b/composer.json
index 534f8fb..4a1ae53 100644
--- a/composer.json
+++ b/composer.json
@@ -1,4 +1,7 @@
{
+ "name": "mediawiki/ores",
+ "description": "Display ORES revision scoring for articles",
+ "license": "unknown",
"require-dev": {
"jakub-onderka/php-parallel-lint": "0.9.*",
"mediawiki/mediawiki-codesniffer": "0.3.0"
diff --git a/extension.json b/extension.json
index d9aeb91..17b1b53 100644
--- a/extension.json
+++ b/extension.json
@@ -9,7 +9,9 @@
"url": "https://www.mediawiki.org/wiki/Extension:ORES",
"AutoloadClasses": {
"ORES\\Hooks": "includes/Hooks.php",
- "ORES\\FetchScoreJob": "includes/FetchScoreJob.php"
+ "ORES\\FetchScoreJob": "includes/FetchScoreJob.php",
+ "ORES\\OresCache": "includes/OresCache.php",
+ "ORES\\OresScoring": "includes/OresScoring.php"
},
"Hooks": {
"LoadExtensionSchemaUpdates": [
@@ -50,7 +52,7 @@
"ORESFetchScoreJob": "ORES\\FetchScoreJob"
},
"config": {
- "OresBaseUrl": "https://ores.wmflabs.org/",
+ "OresBaseUrl": "http://ores.wmflabs.org/",
"OresModels": [
"reverted"
],
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index 001d9f8..a92733b 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -1,16 +1,13 @@
<?php
-
namespace ORES;
-use Job;
-use FormatJson;
-use Title;
-use MWHttpRequest;
+use \Job;
+use \Title;
class FetchScoreJob extends Job {
/**
* @param Title $title
- * @param array $params 'rcid' and 'revid' keys
+ * @param array $params 'revid' key
*/
public function __construct( Title $title, array $params ) {
parent::__construct( 'ORESFetchScoreJob', $title, $params );
@@ -18,8 +15,9 @@
public function run() {
$scores = OresScoring::instance()->getScores(
$this->params['revid'] );
- OresCache::instance()->storeScores( $scores,
$this->params['rcid'] );
+ OresCache::instance()->storeScores( $scores,
$this->params['revid'] );
+ // TODO: Or do we have to try/catch and return false on error,
set the error string, etc?
return true;
}
}
diff --git a/includes/Hooks.php b/includes/Hooks.php
index 6e07e3f..c6737af 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -23,7 +23,6 @@
public static function onRecentChange_save( RecentChange $rc ) {
if ( $rc->getAttribute( 'rc_type' ) === RC_EDIT ) {
$job = new FetchScoreJob( $rc->getTitle(), array(
- 'rcid' => $rc->getAttribute( 'rc_id' ),
'revid' => $rc->getAttribute( 'rc_this_oldid' ),
) );
JobQueueGroup::singleton()->push( $job );
diff --git a/includes/OresCache.php b/includes/OresCache.php
index 302b463..37171ee 100644
--- a/includes/OresCache.php
+++ b/includes/OresCache.php
@@ -1,15 +1,20 @@
<?php
+namespace ORES;
+
+use \RuntimeException;
class OresCache {
+ static protected $modelVersions;
+
/**
* Save scores to the database
*
* @param array $scores in the same structure as is returned by ORES.
- * @param integer $rcid Recent changes ID
+ * @param integer $revid Revision ID
*
* @throws RuntimeException
*/
- public function storeScores( $scores, $rcid ) {
+ public function storeScores( $scores, $revid ) {
// Map to database fields.
$dbData = array();
foreach ( $scores as $revision => $revisionData ) {
@@ -26,11 +31,13 @@
$prediction = 'true';
}
+ $modelVersion = $this->getModelVersion( $model
);
+
foreach ( $modelOutputs['probability'] as
$class => $probability ) {
$dbData[] = array(
- 'ores_rc' => $rcid,
+ 'ores_rev' => $revid,
'ores_model' => $model,
- 'ores_model_version' => 0, //
FIXME: waiting for API support
+ 'ores_model_version' =>
$modelVersion,
'ores_class' => $class,
'ores_probability' =>
$probability,
'ores_is_predicted' => (
$prediction === $class ),
@@ -42,59 +49,76 @@
wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData,
__METHOD__ );
}
- /**
- * Delete cached scores
- *
- * Normally, we'll only delete scores from out-of-date model versions.
- *
- * @param string $model Model name.
- * @param bool $isEverything When true, delete scores with the up-to-date
- * model version as well. This can be used in cases where the old data is
- * bad, but no new model has been released yet.
- * @param integer $batchSize Maximum number of records to delete at once.
- */
+ /**
+ * Delete cached scores
+ *
+ * Normally, we'll only delete scores from out-of-date model versions.
+ *
+ * @param string $model Model name.
+ * @param bool $isEverything When true, delete scores with the
up-to-date
+ * model version as well. This can be used in cases where the old data
is
+ * bad, but no new model has been released yet.
+ * @param integer $batchSize Maximum number of records to delete per
loop.
+ * Note that this function runs multiple batches, until all records are
deleted.
+ */
public function purge( $model, $isEverything, $batchSize = 10000 ) {
- $dbr = wfGetDb( DB_SLAVE );
- $dbw = wfGetDb( DB_MASTER );
+ $dbr = wfGetDb( DB_SLAVE );
+ $dbw = wfGetDb( DB_MASTER );
- $conditions = array(
- 'ores_model' => $model,
- );
- if ( !$isEverything ) {
- $currentModelVersion = $this->getModelVersion( $model );
- $conditions[] = 'ores_model_version != ' . $dbr->addQuotes(
$currentModelVersion );
- }
+ $conditions = array(
+ 'ores_model' => $model,
+ );
+ if ( !$isEverything ) {
+ $currentModelVersion = $this->getModelVersion( $model );
+ $conditions[] = 'ores_model_version != ' .
$dbr->addQuotes( $currentModelVersion );
+ }
- do {
- $ids = $dbr->selectFieldValues( 'ores_classification',
- 'ores_rev',
- $conditions,
- __METHOD__,
- array( 'LIMIT' => $batchSize )
- );
- if ( $ids ) {
- $dbw->delete( 'ores_classification',
- array( 'ores_rev' => $ids ),
- __METHOD__
- );
- wfWaitForSlaves();
- }
- } while ( $ids );
- }
+ do {
+ $ids = $dbr->selectFieldValues( 'ores_classification',
+ 'ores_rev',
+ $conditions,
+ __METHOD__,
+ array( 'LIMIT' => $batchSize )
+ );
+ if ( $ids ) {
+ $dbw->delete( 'ores_classification',
+ array( 'ores_rev' => $ids ),
+ __METHOD__
+ );
+ wfWaitForSlaves();
+ }
+ } while ( $ids );
+ }
- protected function getModelVersion( $model ) {
- $modelVersion = wfGetDb( DB_SLAVE )->selectField( 'ores_model',
- array( 'ores_model_version' ),
- array( 'ores_model' => $model ),
- __METHOD__
- );
+ /**
+ * @return cached last seen version
+ */
+ protected function getModelVersion( $model ) {
+ if ( isset( self::$modelVersions[$model] ) ) {
+ return self::$modelVersions[$model];
+ }
- if ( $modelVersion === false ) {
- throw RuntimeException( "No model version available for
[{$model}]" );
- }
+ $modelVersion = wfGetDb( DB_SLAVE )->selectField( 'ores_model',
+ 'ores_model_version',
+ array( 'ores_model' => $model ),
+ __METHOD__
+ );
+ if ( $modelVersion === false ) {
+ throw RuntimeException( "No model version available for
[{$model}]" );
+ }
- return $modelVersion;
- }
+ self::$modelVersions[$model] = $modelVersion;
+ return $modelVersion;
+ }
+
+ public function getModels() {
+ $models = wfGetDb( DB_SLAVE )->selectFieldValues( 'ores_model',
+ 'ores_model',
+ array(),
+ __METHOD__
+ );
+ return $models;
+ }
public function instance() {
return new OresCache();
diff --git a/includes/OresScoring.php b/includes/OresScoring.php
index 24687f8..fc07974 100644
--- a/includes/OresScoring.php
+++ b/includes/OresScoring.php
@@ -1,10 +1,15 @@
<?php
+namespace ORES;
+
+use \FormatJson;
+use \MWHttpRequest;
+use \RuntimeException;
class OresScoring {
protected function getScoresUrl( $revisions, $models ) {
global $wgOresBaseUrl;
- $url = $wgOresBaseUrl . 'scores/' . wfWikiID();
+ $url = $wgOresBaseUrl . 'scores/' . wfWikiID() . '/';
$params = array(
'models' => implode( '|', (array) $models ),
'revids' => implode( '|', (array) $revisions ),
@@ -28,13 +33,13 @@
$req = MWHttpRequest::factory( $url, null, __METHOD__ );
$status = $req->execute();
if ( !$status->isOK() ) {
- throw new RuntimeException( "No response from ORES
server at $url, "
+ throw new RuntimeException( "No response from ORES
server [{$url}], "
. $status->getMessage()->text() );
}
$json = $req->getContent();
$wireData = FormatJson::decode( $json, true );
if ( !$wireData || !empty( $wireData['error'] ) ) {
- throw new RuntimeException( 'Bad response from ORES
server: ' . $json );
+ throw new RuntimeException( "Bad response from ORES
endpoint [{$url}]: {$json}" );
}
return $wireData;
diff --git a/maintenance/CheckModelVersions.php
b/maintenance/CheckModelVersions.php
index 5eca197..59ff694 100644
--- a/maintenance/CheckModelVersions.php
+++ b/maintenance/CheckModelVersions.php
@@ -15,37 +15,40 @@
}
public function execute() {
- $models = $this->getModels();
+ $models = $this->getModels();
- foreach ( $models as $name => $info ) {
- wfGetDB( DB_MASTER )->replace( 'ores_model',
- 'ores_model',
- array(
- 'ores_model' => $name,
- 'ores_model_version' => $info['version'],
- ),
- __METHOD__
- );
- }
- }
+ foreach ( $models as $name => $info ) {
+ wfGetDB( DB_MASTER )->replace( 'ores_model',
+ 'ores_model',
+ array(
+ 'ores_model' => $name,
+ 'ores_model_version' =>
$info['version'],
+ ),
+ __METHOD__
+ );
+ }
+ }
- protected function getModels() {
- global $wgOresBaseUrl;
+ /**
+ * Return a list of models available for this wiki.
+ */
+ protected function getModels() {
+ global $wgOresBaseUrl;
- $url = $wgOresBaseUrl . 'scores/' . wfWikiID() . '/';
- $req = MWHttpRequest::factory( $url, null, __METHOD__ );
+ $url = $wgOresBaseUrl . 'scores/' . wfWikiID() . '/';
+ $req = MWHttpRequest::factory( $url, null, __METHOD__ );
$status = $req->execute();
if ( !$status->isOK() ) {
- throw new RuntimeException( "Failed to get revscoring models
[{$url}], "
+ throw new RuntimeException( "Failed to get revscoring
models [{$url}], "
. $status->getMessage()->text() );
}
- $json = $req->getContent();
- $modelData = FormatJson::decode( $json, true );
- if ( !$modelData || !empty( $modelData['error'] ) || empty(
$modelData['models'] ) ) {
- throw new RuntimeException( "Bad response from revscoring models
request [{$url}]: {$json}" );
- }
- return $modelData['models'];
- }
+ $json = $req->getContent();
+ $modelData = FormatJson::decode( $json, true );
+ if ( !$modelData || !empty( $modelData['error'] ) || empty(
$modelData['models'] ) ) {
+ throw new RuntimeException( "Bad response from
revscoring models request [{$url}]: {$json}" );
+ }
+ return $modelData['models'];
+ }
}
$maintClass = 'CheckModelVersions';
diff --git a/maintenance/PurgeScoreCache.php b/maintenance/PurgeScoreCache.php
index 1201858..baf8def 100644
--- a/maintenance/PurgeScoreCache.php
+++ b/maintenance/PurgeScoreCache.php
@@ -1,5 +1,7 @@
<?php
+use ORES\OresCache;
+
require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
: dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
@@ -7,21 +9,29 @@
/**
* @ingroup Maintenance
*/
-class PurgeModelVersion extends Maintenance {
+class PurgeScoreCache extends Maintenance {
public function __construct() {
parent::__construct();
$this->addDescription( 'Purge out of date (or all) ORES model
results' );
- $this->addOption( 'model', 'Model name', true, true );
- $this->addOption( 'all', 'Flag to indicate that we want to
clear all data for this model, even the most recent version', false, false );
+ $this->addOption( 'model', 'Model name (optional)', false, true
);
+ $this->addOption( 'all', 'Flag to indicate that we want to
clear all data, from even the most recent versions', false, false );
}
public function execute() {
- $model = $this->getOption( 'model' );
+ $modelParam = $this->getOption( 'model' );
$all = $this->getOption( 'all' );
- OresCache::instance()->purge( $model, $all );
+ if ( $modelParam ) {
+ $models = array( $modelParam );
+ } else {
+ $models = OresCache::getModels();
+ }
+
+ foreach ( $models as $model ) {
+ OresCache::instance()->purge( $model, $all );
+ }
}
}
--
To view, visit https://gerrit.wikimedia.org/r/247034
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Icaef8ae2934f766f30ff80b342a2c341887d594a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits