Awight has uploaded a new change for review.
https://gerrit.wikimedia.org/r/238825
Change subject: WIP Build schema to store full classifier outputs
......................................................................
WIP Build schema to store full classifier outputs
This makes all the data coming back from the classifier available to the
extension. The new schema can hold results from multiple models, and
includes the prediction probabilities for each class.
TODO:
* Rewrite the ores_scores code to use ores_classification.
* List of models should be stored as configuration.
Change-Id: I6cc63c08e2df256de0bdaf271c36802ba908c9aa
---
M includes/FetchScoreJob.php
M includes/Hooks.php
M ores.sql
3 files changed, 60 insertions(+), 22 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES
refs/changes/25/238825/1
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index 9825232..7ced76a 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -7,6 +7,7 @@
use MWHttpRequest;
class FetchScoreJob extends Job {
+ protected $models;
/**
* @param Title $title
@@ -14,6 +15,11 @@
*/
public function __construct( Title $title, array $params ) {
parent::__construct( 'ORESFetchScoreJob', $title, $params );
+
+ // TODO: from configuration
+ $this->models = array(
+ 'reverted',
+ );
}
private function getUrl() {
@@ -21,8 +27,7 @@
$url = 'https://ores.wmflabs.org/scores/$wiki/';
$url = str_replace( '$wiki', wfWikiID(), $url );
$params = array(
- // @todo safe to hardcode?
- 'models' => 'reverted',
+ 'models' => $this->models,
'revids' => $this->params['revid'],
);
return wfAppendQuery( $url, $params );
@@ -32,23 +37,39 @@
$url = $this->getUrl();
$req = MWHttpRequest::factory( $url, null, __METHOD__ );
$status = $req->execute();
- if ( $status->isOK() ) {
- $content = $req->getContent();
- $json = json_decode( $content );
- // @todo check for errors
- $info = array_values( $json );
- wfGetDB( DB_MASTER )->insert(
- 'ores_scores',
- array(
- 'ores_rc' => $this->params['rcid'],
- 'ores_score' =>
$info['reverted']['probability']['true'],
- ),
- __METHOD__
- );
- return true;
- } else {
- // ????
+ if ( !$status->isOK() ) {
+ wfDebug( 'fatal', 'No response from ORES server: '
+ . $status->getMessage()->text() );
return false;
}
+ $json = $req->getContent();
+ $wire_data = json_decode( $json );
+ if ( !$wire_data ) {
+ wfDebug( 'fatal', 'Bad response from ORES server: ' .
$json );
+ return false;
+ }
+
+ // Map from wire format to database fields.
+ $db_data = array();
+ foreach ( $wire_data as $model => $model_outputs ) {
+ $prediction = $model_outputs['prediction'];
+ // Kludge out false -> "false" so we can match
prediction against class name.
+ if ( $prediction === false ) {
+ $prediction = 'false';
+ }
+
+ foreach ( $model_outputs['probability'] as $class =>
$probability ) {
+ $db_data[] = array(
+ 'ores_rc' => $this->params['rcid'],
+ 'ores_model' => $model,
+ 'ores_class' => $class,
+ 'ores_probability' => $probability,
+ 'ores_is_predicted' => ( $prediction
=== $class ),
+ );
+ }
+ }
+
+ wfGetDB( DB_MASTER )->insert( 'ores_classification', $db_data,
__METHOD__ );
+ return true;
}
}
diff --git a/includes/Hooks.php b/includes/Hooks.php
index 5d06d28..dd12fe8 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -18,7 +18,7 @@
*/
public static function onLoadExtensionSchemaUpdates( DatabaseUpdater
$updater ) {
$path = dirname( __DIR__ );
- $updater->addExtensionTable( 'ores_scores', $path . '/ores.sql'
);
+ $updater->addExtensionTable( 'ores_classification', $path .
'/ores.sql' );
}
public static function onRecentChange_save( RecentChange $rc ) {
diff --git a/ores.sql b/ores.sql
index 6fd5fa7..4fbc82c 100644
--- a/ores.sql
+++ b/ores.sql
@@ -1,4 +1,21 @@
-CREATE TABLE /*_*/ores_scores (
- ores_rc int NOT NULL PRIMARY KEY,
- ores_score VARCHAR(20) NOT NULL
+-- ORES automated classifier outputs for a given revision
+--
+-- Each revision will usually be assigned a probability for all classes in the
+-- model's output range.
+CREATE TABLE /*_*/ores_classification (
+ -- Recent changes ID
+ ores_rc int NOT NULL,
+ -- Model name
+ ores_model VARCHAR(32) NOT NULL,
+ -- Classification title
+ ores_class VARCHAR(32) NOT NULL,
+ -- Estimated classification probability
+ ores_probability DECIMAL(10,10) NOT NULL,
+ -- Whether this classification has been recommended as the most likely
+ -- candidate.
+ ores_is_predicted TINYINT(1) NOT NULL
) /*$wgDBTableOptions*/;
+
+CREATE INDEX /*i*/ores_rc ON /*_*/ores_classification (ores_rc);
+CREATE INDEX /*i*/ores_is_predicted ON /*_*/ores_classification
(ores_is_predicted);
+CREATE INDEX /*i*/ores_winner ON /*_*/ores_classification (ores_rc,
ores_is_predicted);
--
To view, visit https://gerrit.wikimedia.org/r/238825
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I6cc63c08e2df256de0bdaf271c36802ba908c9aa
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits