Awight has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/259212

Change subject: [WIP] Generalize thresholds
......................................................................

[WIP] Generalize thresholds

Also, rely on is_predicted as a minimum qualification before threshold is
considered.

Change-Id: Idb6eccba8ad63d417e44bd22b1e0a0357c7f3b17
---
M includes/Api.php
M includes/FetchScoreJob.php
M includes/Hooks.php
M includes/Scoring.php
4 files changed, 55 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES 
refs/changes/12/259212/1

diff --git a/includes/Api.php b/includes/Api.php
index 6cf0a12..6018150 100644
--- a/includes/Api.php
+++ b/includes/Api.php
@@ -4,6 +4,7 @@
 
 use FormatJson;
 use MWHttpRequest;
+use Psr\Log\LoggerInterface;
 use RuntimeException;
 
 /**
@@ -34,8 +35,11 @@
         * @throws RuntimeException
         */
        public static function request( $params = array() ) {
+               $logger = LoggerFactory::getInstance( 'ORES' );
+
                $url = Api::getUrl();
                $url = wfAppendQuery( $url, $params );
+               $logger->debug( 'Requesting: ' . json_encode( $url ) );
                $req = MWHttpRequest::factory( $url, null, __METHOD__ );
                $status = $req->execute();
                if ( !$status->isOK() ) {
@@ -43,6 +47,7 @@
                                . $status->getMessage()->text() );
                }
                $json = $req->getContent();
+               $logger->debug( 'Raw response: ' . json_encode( $json ) );
                $data = FormatJson::decode( $json, true );
                if ( !$data || !empty( $data['error'] ) ) {
                        throw new RuntimeException( "Bad response from ORES 
endpoint [{$url}]: {$json}" );
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index 2b86f1a..accdc3d 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -3,6 +3,7 @@
 namespace ORES;
 
 use Job;
+use Psr\Log\LoggerInterface;
 use Title;
 
 class FetchScoreJob extends Job {
@@ -15,10 +16,13 @@
        }
 
        public function run() {
+               $logger = LoggerFactory::getInstance( 'ORES' );
+               $logger->info( 'Fetching scores for revision ' . json_encode( 
$this->params ) );
                $scores = Scoring::instance()->getScores( 
$this->params['revid'] );
                Cache::instance()->storeScores( $scores, $this->params['revid'] 
);
+               $logger->debug( 'Stored scores: ' . json_encode( $scores ) );
 
-               // TODO: Or do we have to try/catch and return false on error, 
set the error string, etc?
+               // FIXME: Or should we return false on error, set the error 
string, etc?
                return true;
        }
 }
diff --git a/includes/Hooks.php b/includes/Hooks.php
index e41dcfb..a15f691 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -34,6 +34,8 @@
         */
        public static function onRecentChange_save( RecentChange $rc ) {
                if ( $rc->getAttribute( 'rc_type' ) === RC_EDIT ) {
+                       $logger = LoggerFactory::getInstance( 'ORES' );
+                       $logger->debug( 'Processing edit' );
                        $job = new FetchScoreJob( $rc->getTitle(), array(
                                'revid' => $rc->getAttribute( 'rc_this_oldid' ),
                        ) );
@@ -73,9 +75,8 @@
                $name, array &$tables, array &$fields, array &$conds,
                array &$query_options, array &$join_conds, FormOptions $opts
        ) {
-               global $wgOresDamagingThreshold;
-
                $tables[] = 'ores_classification';
+               $fields[] = 'ores_is_predicted';
                $fields[] = 'ores_probability';
                $join_conds['ores_classification'] = array( 'LEFT JOIN',
                        'rc_this_oldid = ores_rev AND ores_model = \'damaging\' 
' .
@@ -83,9 +84,19 @@
 
                if ( $opts->getValue( 'hidenondamaging' ) ) {
                        // Filter out non-damaging edits.
+
+                       // Only show edits predicted to be damaging.
+                       //
+                       // Here's to assuming that we'll never want to set the 
threshold
+                       // lower than the precomputed cutoff.
                        $conds[] = 'ores_is_predicted = 1';
-                       $conds[] = 'ores_probability > '
-                               . wfGetDb( DB_SLAVE )->addQuotes( 
$wgOresDamagingThreshold );
+
+                       // If a threshold is set, use that to make the filter 
tighter.
+                       $threshold = Scoring::getThreshold( 'damaging' );
+                       if ( $threshold ) {
+                               $conds[] = 'ores_probability > '
+                                       . wfGetDb( DB_SLAVE )->addQuotes( 
$threshold );
+                       }
                }
 
                return true;
@@ -126,11 +137,14 @@
         * Internal helper to label matching rows
         */
        protected static function processRecentChangesList( RCCacheEntry 
$rcObj, array &$data ) {
-               global $wgOresDamagingThreshold;
-
-               $score = $rcObj->getAttribute( 'ores_probability' );
-               if ( $score && $score >= $wgOresDamagingThreshold ) {
-                       $data['recentChangesFlags']['damaging'] = true;
+               $threshold = Scoring::getThreshold( 'damaging' );
+               if ( $threshold ) {
+                       $score = $rcObj->getAttribute( 'ores_probability' );
+                       $flagged = ( $score && $score >= $threshold );
+               } else {
+                       // Use is_predicted instead.
+                       $flagged = $rcObj->getAttribute( 'ores_is_predicted' );
                }
+               $data['recentChangesFlags']['damaging'] = $flagged;
        }
 }
diff --git a/includes/Scoring.php b/includes/Scoring.php
index 3074f43..2ece8cd 100644
--- a/includes/Scoring.php
+++ b/includes/Scoring.php
@@ -10,7 +10,7 @@
         * @return array Results in the form returned by ORES
         * @throws RuntimeException
         */
-       public function getScores( $revisions, $models = null ) {
+       public static function getScores( $revisions, $models = null ) {
                if ( !$models ) {
                        global $wgOresModels;
                        $models = $wgOresModels;
@@ -26,4 +26,25 @@
        public static function instance() {
                return new self();
        }
+
+       /**
+        * Get the configured per-model threshold for detection as a positive 
prediction
+        *
+        * @param string $model Name of the model you want the threshold for
+        * @return float Threshold, between [0, 1]
+        *
+        * TODO:
+        * - Should a null value mean, use the `is_predicted` value?
+        * - Some interesting things should happen here, e.g. per-user settings.
+        */
+       public static function getThreshold( $model ) {
+               switch ( $model ) {
+                       case 'damaging':
+                               $model = ucfirst( $model );
+                               $variable = "Ores{$model}Threshold";
+                       default:
+                               throw new UnexpectedValueException( 'No 
threshold set for that model.' );
+               }
+               return $this->getConfig( $variable );
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/259212
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Idb6eccba8ad63d417e44bd22b1e0a0357c7f3b17
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Awight <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to