Awight has uploaded a new change for review.
https://gerrit.wikimedia.org/r/259212
Change subject: [WIP] Generalize thresholds
......................................................................
[WIP] Generalize thresholds
Also, rely on is_predicted as a minimum qualification before threshold is
considered.
Change-Id: Idb6eccba8ad63d417e44bd22b1e0a0357c7f3b17
---
M includes/Api.php
M includes/FetchScoreJob.php
M includes/Hooks.php
M includes/Scoring.php
4 files changed, 55 insertions(+), 11 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES
refs/changes/12/259212/1
diff --git a/includes/Api.php b/includes/Api.php
index 6cf0a12..6018150 100644
--- a/includes/Api.php
+++ b/includes/Api.php
@@ -4,6 +4,7 @@
use FormatJson;
use MWHttpRequest;
+use Psr\Log\LoggerInterface;
use RuntimeException;
/**
@@ -34,8 +35,11 @@
* @throws RuntimeException
*/
public static function request( $params = array() ) {
+ $logger = LoggerFactory::getInstance( 'ORES' );
+
$url = Api::getUrl();
$url = wfAppendQuery( $url, $params );
+ $logger->debug( 'Requesting: ' . json_encode( $url ) );
$req = MWHttpRequest::factory( $url, null, __METHOD__ );
$status = $req->execute();
if ( !$status->isOK() ) {
@@ -43,6 +47,7 @@
. $status->getMessage()->text() );
}
$json = $req->getContent();
+ $logger->debug( 'Raw response: ' . json_encode( $json ) );
$data = FormatJson::decode( $json, true );
if ( !$data || !empty( $data['error'] ) ) {
throw new RuntimeException( "Bad response from ORES
endpoint [{$url}]: {$json}" );
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index 2b86f1a..accdc3d 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -3,6 +3,7 @@
namespace ORES;
use Job;
+use Psr\Log\LoggerInterface;
use Title;
class FetchScoreJob extends Job {
@@ -15,10 +16,13 @@
}
public function run() {
+ $logger = LoggerFactory::getInstance( 'ORES' );
+ $logger->info( 'Fetching scores for revision ' . json_encode(
$this->params ) );
$scores = Scoring::instance()->getScores(
$this->params['revid'] );
Cache::instance()->storeScores( $scores, $this->params['revid']
);
+ $logger->debug( 'Stored scores: ' . json_encode( $scores ) );
- // TODO: Or do we have to try/catch and return false on error,
set the error string, etc?
+ // FIXME: Or should we return false on error, set the error
string, etc?
return true;
}
}
diff --git a/includes/Hooks.php b/includes/Hooks.php
index e41dcfb..a15f691 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -34,6 +34,8 @@
*/
public static function onRecentChange_save( RecentChange $rc ) {
if ( $rc->getAttribute( 'rc_type' ) === RC_EDIT ) {
+ $logger = LoggerFactory::getInstance( 'ORES' );
+ $logger->debug( 'Processing edit' );
$job = new FetchScoreJob( $rc->getTitle(), array(
'revid' => $rc->getAttribute( 'rc_this_oldid' ),
) );
@@ -73,9 +75,8 @@
$name, array &$tables, array &$fields, array &$conds,
array &$query_options, array &$join_conds, FormOptions $opts
) {
- global $wgOresDamagingThreshold;
-
$tables[] = 'ores_classification';
+ $fields[] = 'ores_is_predicted';
$fields[] = 'ores_probability';
$join_conds['ores_classification'] = array( 'LEFT JOIN',
'rc_this_oldid = ores_rev AND ores_model = \'damaging\'
' .
@@ -83,9 +84,19 @@
if ( $opts->getValue( 'hidenondamaging' ) ) {
// Filter out non-damaging edits.
+
+ // Only show edits predicted to be damaging.
+ //
+ // Here's to assuming that we'll never want to set the
threshold
+ // lower than the precomputed cutoff.
$conds[] = 'ores_is_predicted = 1';
- $conds[] = 'ores_probability > '
- . wfGetDb( DB_SLAVE )->addQuotes(
$wgOresDamagingThreshold );
+
+ // If a threshold is set, use that to make the filter
tighter.
+ $threshold = Scoring::getThreshold( 'damaging' );
+ if ( $threshold ) {
+ $conds[] = 'ores_probability > '
+ . wfGetDb( DB_SLAVE )->addQuotes(
$threshold );
+ }
}
return true;
@@ -126,11 +137,14 @@
* Internal helper to label matching rows
*/
protected static function processRecentChangesList( RCCacheEntry
$rcObj, array &$data ) {
- global $wgOresDamagingThreshold;
-
- $score = $rcObj->getAttribute( 'ores_probability' );
- if ( $score && $score >= $wgOresDamagingThreshold ) {
- $data['recentChangesFlags']['damaging'] = true;
+ $threshold = Scoring::getThreshold( 'damaging' );
+ if ( $threshold ) {
+ $score = $rcObj->getAttribute( 'ores_probability' );
+ $flagged = ( $score && $score >= $threshold );
+ } else {
+ // Use is_predicted instead.
+ $flagged = $rcObj->getAttribute( 'ores_is_predicted' );
}
+ $data['recentChangesFlags']['damaging'] = $flagged;
}
}
diff --git a/includes/Scoring.php b/includes/Scoring.php
index 3074f43..2ece8cd 100644
--- a/includes/Scoring.php
+++ b/includes/Scoring.php
@@ -10,7 +10,7 @@
* @return array Results in the form returned by ORES
* @throws RuntimeException
*/
- public function getScores( $revisions, $models = null ) {
+ public static function getScores( $revisions, $models = null ) {
if ( !$models ) {
global $wgOresModels;
$models = $wgOresModels;
@@ -26,4 +26,25 @@
public static function instance() {
return new self();
}
+
+ /**
+ * Get the configured per-model threshold for detection as a positive
prediction
+ *
+ * @param string $model Name of the model you want the threshold for
+ * @return float Threshold, between [0, 1]
+ *
+ * TODO:
+ * - Should a null value mean, use the `is_predicted` value?
+ * - Some interesting things should happen here, e.g. per-user settings.
+ */
+ public static function getThreshold( $model ) {
+ switch ( $model ) {
+ case 'damaging':
+ $model = ucfirst( $model );
+ $variable = "Ores{$model}Threshold";
+ default:
+ throw new UnexpectedValueException( 'No
threshold set for that model.' );
+ }
+ return $this->getConfig( $variable );
+ }
}
--
To view, visit https://gerrit.wikimedia.org/r/259212
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Idb6eccba8ad63d417e44bd22b1e0a0357c7f3b17
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits