Anomie has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/313831

Change subject: Action API integration for ORES
......................................................................

Action API integration for ORES

* Add a meta module to expose ores_model data
* Add an rvprop, arvprop, rcprop, and ucprop to fetch ORES scores
* Add an rcshow, and ucshow to filter by ORES needs-review
  status

TODO:
* Figure out wlprop and wlshow.

Bug: T143614
Bug: T143616
Bug: T143617
Depends-On: Id6b42c7f2eb53a6f659d0d61383287f41d96ca00
Change-Id: I701e8e19d0b59e2b4db6bae5478720fa98f0fb85
---
M extension.json
M i18n/en.json
M i18n/qqq.json
A includes/ApiQueryORES.php
M includes/Cache.php
M includes/FetchScoreJob.php
M includes/Hooks.php
7 files changed, 436 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES 
refs/changes/31/313831/1

diff --git a/extension.json b/extension.json
index eb096bc..c5e6a5b 100644
--- a/extension.json
+++ b/extension.json
@@ -13,9 +13,25 @@
                "ORES\\Cache": "includes/Cache.php",
                "ORES\\Hooks": "includes/Hooks.php",
                "ORES\\FetchScoreJob": "includes/FetchScoreJob.php",
-               "ORES\\Scoring": "includes/Scoring.php"
+               "ORES\\Scoring": "includes/Scoring.php",
+               "ORES\\ApiQueryORES": "includes/ApiQueryORES.php"
+       },
+       "APIMetaModules": {
+               "ores": "ORES\\ApiQueryORES"
        },
        "Hooks": {
+               "APIGetAllowedParams": [
+                       "ORES\\Hooks::onAPIGetAllowedParams"
+               ],
+               "ApiQueryBaseBeforeQuery": [
+                       "ORES\\Hooks::onApiQueryBaseBeforeQuery"
+               ],
+               "ApiQueryBaseAfterQuery": [
+                       "ORES\\Hooks::onApiQueryBaseAfterQuery"
+               ],
+               "ApiQueryBaseProcessRow": [
+                       "ORES\\Hooks::onApiQueryBaseProcessRow"
+               ],
                "BeforePageDisplay": [
                        "ORES\\Hooks::onBeforePageDisplay"
                ],
@@ -118,6 +134,7 @@
                },
                "OresEnabledNamespaces": {},
                "OresWikiId": null,
+               "OresAPIMaxBatchJobs": 100,
                "RecentChangesFlags": {
                        "damaging": {
                                "letter": "ores-damaging-letter",
diff --git a/i18n/en.json b/i18n/en.json
index 891d5b4..32f1e6e 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -16,5 +16,12 @@
        "ores-pref-damaging": "ORES sensitivity",
        "ores-pref-rc-hidenondamaging": "Hide probably good edits from recent 
changes",
        "ores-pref-watchlist-hidenondamaging": "Hide probably good edits from 
the watchlist",
-       "prefs-ores" : "Revision scoring"
+       "prefs-ores" : "Revision scoring",
+       "apihelp-query+ores-description": "Return ORES configruation and model 
data for this wiki.",
+       "apihelp-query+ores-example-simple": "Fetch ORES data:",
+       "apihelp-query+revisions-paramvalue-prop-oresscores": "ORES scores for 
the revision.",
+       "apihelp-query+allrevisions-paramvalue-prop-oresscores": "ORES scores 
for the revision.",
+       "apihelp-query+recentchanges-paramvalue-prop-oresscores": "Adds ORES 
scores for the entry.",
+       "apihelp-query+watchlist-paramvalue-prop-oresscores": "Adds ORES scores 
for the edit.",
+       "apihelp-query+usercontribs-paramvalue-prop-oresscores": "Adds ORES 
scores for the edit."
 }
diff --git a/i18n/qqq.json b/i18n/qqq.json
index 88328fb..a46a9c4 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -19,5 +19,12 @@
        "ores-pref-damaging": "Part asking for damaging threshold",
        "ores-pref-rc-hidenondamaging": "Display message for user preferences 
to make hidenondamaging default in recent changes",
        "ores-pref-watchlist-hidenondamaging": "Display message for user 
preferences to make hidenondamaging default in the watchlist",
-       "prefs-ores": "Name of ORES section in preferences"
+       "prefs-ores": "Name of ORES section in preferences",
+       "apihelp-query+ores-description": 
"{{doc-apihelp-description|query+ores}}",
+       "apihelp-query+ores-example-simple": 
"{{doc-apihelp-example|query+ores}}",
+       "apihelp-query+revisions-paramvalue-prop-oresscores": 
"{{doc-apihelp-paramvalue|query+revisions|prop|oresscores}}",
+       "apihelp-query+allrevisions-paramvalue-prop-oresscores": 
"{{doc-apihelp-paramvalue|query+allrevisions|prop|oresscores}}",
+       "apihelp-query+recentchanges-paramvalue-prop-oresscores": 
"{{doc-apihelp-paramvalue|query+recentchanges|prop|oresscores}}",
+       "apihelp-query+watchlist-paramvalue-prop-oresscores": 
"{{doc-apihelp-paramvalue|query+watchlist|prop|oresscores}}",
+       "apihelp-query+usercontribs-paramvalue-prop-oresscores": 
"{{doc-apihelp-paramvalue|query+usercontribs|prop|oresscores}}"
 }
diff --git a/includes/ApiQueryORES.php b/includes/ApiQueryORES.php
new file mode 100644
index 0000000..93f57f6
--- /dev/null
+++ b/includes/ApiQueryORES.php
@@ -0,0 +1,90 @@
+<?php
+/**
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+namespace ORES;
+
+use ApiResult;
+use ApiQuery;
+use ApiQueryBase;
+
+/**
+ * A query action to return meta information about ORES models and
+ * configuration on the wiki.
+ *
+ * @ingroup API
+ */
+class ApiQueryORES extends ApiQueryBase {
+
+       public function __construct( ApiQuery $query, $moduleName ) {
+               parent::__construct( $query, $moduleName, 'ores' );
+       }
+
+       public function execute() {
+               global $wgOresBaseUrl, $wgOresExcludeBots, 
$wgOresDamagingThresholds,
+                       $wgOresEnabledNamespaces, $wgOresWikiId;
+
+               $result = $this->getResult();
+               $data = [
+                       'baseurl' => $wgOresBaseUrl,
+                       'wikiid' => $wgOresWikiId ?: wfWikiID(),
+                       'models' => [],
+                       'excludebots' => (bool)$wgOresExcludeBots,
+                       'damagingthresholds' => $wgOresDamagingThresholds,
+                       'namespaces' => $wgOresEnabledNamespaces
+                               ? array_keys( array_filter( 
$wgOresEnabledNamespaces ) )
+                               : \MWNamespace::getValidNamespaces(),
+               ];
+               ApiResult::setArrayType( $data['models'], 'assoc' );
+               ApiResult::setIndexedTagName( $data['namespaces'], 'ns' );
+
+               $this->addTables( 'ores_model' );
+               $this->addFields( [ 'oresm_name', 'oresm_version', 
'oresm_is_current' ] );
+               $this->addWhere( [ 'oresm_is_current' => 1 ] );
+               $res = $this->select();
+
+               foreach ( $res as $row ) {
+                       $data['models'][$row->oresm_name] = [
+                               'version' => $row->oresm_version,
+                       ];
+               }
+
+               $result->addValue( [ 'query' ], 'ores', $data );
+       }
+
+       public function getCacheMode( $params ) {
+               return 'public';
+       }
+
+       public function getAllowedParams() {
+               return [];
+       }
+
+       protected function getExamplesMessages() {
+               return [
+                       'action=query&meta=ores'
+                               => 'apihelp-query+ores-example-simple',
+               ];
+       }
+
+       public function getHelpUrls() {
+               return 'https://www.mediawiki.org/wiki/Extension:ORES';
+       }
+}
diff --git a/includes/Cache.php b/includes/Cache.php
index ea44b52..374c24f 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -37,7 +37,7 @@
                        $this->processRevision( $dbData, $revision, 
$revisionData );
                }
 
-               \wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData, 
__METHOD__ );
+               \wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData, 
__METHOD__, [ 'IGNORE' ] );
        }
 
        /**
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index ad1e619..30b7c62 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -12,11 +12,42 @@
         * @param array $params 'revid' key
         */
        public function __construct( Title $title, array $params ) {
+               $expensive = is_array( $params['revid'] );
+
+               if ( $expensive ) {
+                       sort( $params['revid'] );
+               }
+
                parent::__construct( 'ORESFetchScoreJob', $title, $params );
+
+               $this->removeDuplicates = $expensive;
        }
 
        public function run() {
                $logger = LoggerFactory::getInstance( 'ORES' );
+
+               if ( $this->removeDuplicates ) {
+                       // Filter out revisions that already have scores by the 
time this
+                       // job runs.
+                       $revids = (array)$this->params['revid'];
+                       $dbr = \wfGetDB( DB_REPLICA );
+                       $revids = array_diff(
+                               $revids,
+                               $dbr->selectFieldValues(
+                                       'ores_classification',
+                                       'oresc_rev',
+                                       [ 'oresc_rev' => $revids ],
+                                       __METHOD__,
+                                       [ 'DISTINCT' ]
+                               )
+                       );
+                       if ( !$revids ) {
+                               $logger->debug( 'Skipping fetch, no revisions 
need scores: ' . json_encode( $this->params ) );
+                               return true;
+                       }
+                       $this->params['revid'] = $revids;
+               }
+
                $logger->info( 'Fetching scores for revision ' . json_encode( 
$this->params ) );
                $scores = Scoring::instance()->getScores(
                        $this->params['revid'], null, 
$this->params['extra_params'] );
diff --git a/includes/Hooks.php b/includes/Hooks.php
index 0b5955b..b777879 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -22,6 +22,8 @@
 use Xml;
 
 class Hooks {
+       const GET_SCORES_BATCH_SIZE = 50;
+
        /**
         * @param DatabaseUpdater $updater
         * @return bool
@@ -525,4 +527,282 @@
                global $wgOresModels;
                return isset( $wgOresModels[$model] ) && $wgOresModels[$model];
        }
+
+       /**
+        * Inject parameters into certain API modules
+        * @param ApiBase &$module Module
+        * @param array &$params Parameter data
+        * @param int $flags zero or OR-ed flags like 
ApiBase::GET_VALUES_FOR_HELP
+        */
+       public static function onAPIGetAllowedParams( &$module, &$params, 
$flags ) {
+               if ( $module instanceof \ApiQueryRevisions ||
+                       $module instanceof \ApiQueryAllRevisions ||
+                       $module instanceof \ApiQueryRecentChanges ||
+                       /** @todo: $module instanceof \ApiQueryWatchlist || */
+                       $module instanceof \ApiQueryContributions
+               ) {
+                       $params['prop'][\ApiBase::PARAM_TYPE][] = 'oresscores';
+               }
+
+               if ( $module instanceof \ApiQueryRecentChanges ||
+                       /** @todo: $module instanceof \ApiQueryWatchlist || */
+                       $module instanceof \ApiQueryContributions
+               ) {
+                       $params['show'][\ApiBase::PARAM_TYPE][] = 'oresreview';
+                       $params['show'][\ApiBase::PARAM_TYPE][] = '!oresreview';
+               }
+       }
+
+       /**
+        * Modify the API query before it's made.
+        * @warning Any joins added *must* join on a unique key of the target 
table
+        *  unless you really know what you're doing.
+        * @param ApiQueryBase $module
+        * @param array &$tables tables to be queried
+        * @param array &$fields columns to select
+        * @param array &$conds WHERE conditionals for query
+        * @param array &$options options for the database request
+        * @param array &$joinConds join conditions for the tables
+        * @param array &$hookData Inter-hook communication
+        */
+       public static function onApiQueryBaseBeforeQuery(
+               $module, &$tables, &$fields, &$conds, &$options, &$joinConds, 
&$hookData
+       ) {
+               $params = $module->extractRequestParams();
+
+               if ( $module instanceof \ApiQueryRecentChanges ) {
+                       $field = 'rc_this_oldid';
+
+                       // Make sure the needed fields are included in the 
query, if necessary
+                       if ( in_array( 'oresscores', $params['prop'], true ) ) {
+                               if ( !in_array( 'rc_this_oldid', $fields, true 
) ) {
+                                       $fields[] = 'rc_this_oldid';
+                               }
+                               if ( !in_array( 'rc_type', $fields, true ) ) {
+                                       $fields[] = 'rc_type';
+                               }
+                       }
+               } elseif ( $module instanceof \ApiQueryContributions ) {
+                       $field = 'rev_id';
+               } else {
+                       return;
+               }
+
+               $show = isset( $params['show'] ) ? array_flip( $params['show'] 
) : [];
+               if ( isset( $show['oresreview'] ) || isset( 
$show['!oresreview'] ) ) {
+                       if ( isset( $show['oresreview'] ) && isset( 
$show['!oresreview'] ) ) {
+                               $module->dieUsageMsg( 'show' );
+                       }
+
+                       $threshold = self::getDamagingThreshold( 
$module->getUser() );
+                       $dbr = \wfGetDB( DB_REPLICA );
+
+                       $tables[] = 'ores_model';
+                       $tables[] = 'ores_classification';
+
+                       if ( isset( $show['oresreview'] ) ) {
+                               $join = 'INNER JOIN';
+
+                               // Filter out non-damaging and unscored edits.
+                               $conds[] = 'oresc_probability > ' . 
$dbr->addQuotes( $threshold );
+                       } else {
+                               $join = 'LEFT JOIN';
+
+                               // Filter out damaging edits.
+                               $conds[] = $dbr->makeList( [
+                                       'oresc_probability <= ' . 
$dbr->addQuotes( $threshold ),
+                                       'oresc_probability IS NULL'
+                               ], $dbr::LIST_OR );
+                       }
+
+                       $joinConds['ores_model'] = [ $join,
+                               'oresm_name = ' . $dbr->addQuotes( 'damaging' ) 
. ' AND oresm_is_current = 1'
+                       ];
+                       $joinConds['ores_classification'] = [ $join,
+                               "$field = oresc_rev AND oresc_model = oresm_id 
AND oresc_class = 1"
+                       ];
+               }
+       }
+
+       /**
+        * Perform work after the API query is made
+        * @param ApiQueryBase $module
+        * @param ResultWrapper|bool $res
+        * @param array &$hookData Inter-hook communication
+        */
+       public static function onApiQueryBaseAfterQuery( $module, $res, 
&$hookData ) {
+               global $wgOresAPIMaxBatchJobs;
+
+               if ( !$res ) {
+                       return;
+               }
+
+               // If the module is being used as a generator, don't bother. 
Generators
+               // don't return props.
+               if ( $module->isInGeneratorMode() ) {
+                       return;
+               }
+
+               if ( $module instanceof \ApiQueryRevisions ||
+                       $module instanceof \ApiQueryAllRevisions ||
+                       $module instanceof \ApiQueryContributions
+               ) {
+                       $field = 'rev_id';
+                       $checkRCType = false;
+               } elseif ( $module instanceof \ApiQueryRecentChanges ) {
+                       $field = 'rc_this_oldid';
+                       $checkRCType = true;
+               } else {
+                       return;
+               }
+
+               $params = $module->extractRequestParams();
+               if ( in_array( 'oresscores', $params['prop'], true ) ) {
+                       // Extract revision IDs from the result set
+                       $revids = [];
+                       foreach ( $res as $row ) {
+                               if ( !$checkRCType || (int)$row->rc_type === 
RC_EDIT || (int)$row->rc_type === RC_NEW ) {
+                                       $revids[] = $row->$field;
+                               }
+                       }
+                       $res->rewind();
+
+                       if ( $revids ) {
+                               $hookData['oresField'] = $field;
+                               $hookData['oresCheckRCType'] = $checkRCType;
+                               $hookData['oresNeedsContinuation'] = false;
+                               $hookData['oresScores'] = [];
+
+                               // Load cached score data
+                               $dbr = \wfGetDB( DB_REPLICA );
+                               $res2 = $dbr->select(
+                                       [ 'ores_classification', 'ores_model' ],
+                                       [ 'oresc_rev', 'oresc_class', 
'oresc_probability', 'oresm_name' ],
+                                       [
+                                               'oresc_rev' => $revids,
+                                               'oresc_model = oresm_id',
+                                               'oresm_is_current' => 1,
+                                       ],
+                                       __METHOD__
+                               );
+                               foreach ( $res2 as $row ) {
+                                       
$hookData['oresScores'][$row->oresc_rev][] = $row;
+                               }
+
+                               // If any queried revisions were not cached, 
fetch up to
+                               // GET_SCORES_BATCH_SIZE from the service now, 
cache them, and
+                               // add them to the result.
+                               $revids = array_diff( $revids, array_keys( 
$hookData['oresScores'] ) );
+                               if ( $revids ) {
+                                       if ( count( $revids ) > 
self::GET_SCORES_BATCH_SIZE ) {
+                                               
$hookData['oresNeedsContinuation'] = true;
+                                               $title = \Title::newFromText( 
'API batch score fetch' );
+                                               for (
+                                                       $i = 
self::GET_SCORES_BATCH_SIZE, $ct = 0;
+                                                       $i < count( $revids ) 
&& $ct < $wgOresAPIMaxBatchJobs;
+                                                       $i += 
self::GET_SCORES_BATCH_SIZE, $ct++
+                                               ) {
+                                                       $job = new 
FetchScoreJob( $title, [
+                                                               'revid' => 
array_slice( $revids, $i, self::GET_SCORES_BATCH_SIZE ),
+                                                               'extra_params' 
=> [],
+                                                       ] );
+                                                       
JobQueueGroup::singleton()->push( $job );
+                                               }
+                                               $revids = array_slice( $revids, 
0, self::GET_SCORES_BATCH_SIZE );
+                                       }
+                                       $scores = 
Scoring::instance()->getScores( $revids );
+                                       $cache = Cache::instance();
+                                       \DeferredUpdates::addCallableUpdate( 
function() use ( $cache, $scores ) {
+                                               $cache->storeScores( $scores );
+                                       } );
+
+                                       $models = [];
+                                       $res2 = $dbr->select(
+                                               [ 'ores_model' ],
+                                               [ 'oresm_id', 'oresm_name' ],
+                                               [ 'oresm_is_current' => 1 ],
+                                               __METHOD__
+                                       );
+                                       foreach ( $res2 as $row ) {
+                                               $models[$row->oresm_id] = 
$row->oresm_name;
+                                       }
+
+                                       foreach ( $scores as $revid => $data ) {
+                                               $dbData = [];
+                                               $cache->processRevision( 
$dbData, $revid, $data );
+                                               foreach ( $dbData as $row ) {
+                                                       
$hookData['oresScores'][$revid][] = (object)[
+                                                               'oresc_class' 
=> $row['oresc_class'],
+                                                               
'oresc_probability' => $row['oresc_probability'],
+                                                               'oresm_name' => 
$models[$row['oresc_model']],
+                                                       ];
+                                               }
+                                       }
+
+                                       if ( 
!$hookData['oresNeedsContinuation'] && array_diff( $revids, array_keys( $scores 
) ) ) {
+                                               // Some queried revisions were 
ignored, signal continuation.
+                                               
$hookData['oresNeedsContinuation'] = true;
+                                       }
+                               }
+                       }
+               }
+       }
+
+       /**
+        * Modify each data row before it's returned.
+        * @param ApiQueryBase $module
+        * @param object $row
+        * @param array &$data
+        * @param array &$hookData Inter-hook communication
+        * @return bool False to stop processing the result set
+        */
+       public static function onApiQueryBaseProcessRow( $module, $row, &$data, 
&$hookData ) {
+               global $wgOresModelClasses;
+               static $classMap = null;
+
+               if ( isset( $hookData['oresField'] ) &&
+                       ( !$hookData['oresCheckRCType'] ||
+                               (int)$row->rc_type === RC_NEW || 
(int)$row->rc_type === RC_EDIT
+                       )
+               ) {
+                       $data['oresscores'] = [];
+
+                       $revid = $row->{$hookData['oresField']};
+                       if ( !isset( $hookData['oresScores'][$revid] ) ) {
+                               // If we didn't fetch all uncached scores, 
signal continuation.
+                               // Otherwise, we have a WTF situation that we 
should just ignore.
+                               return !$hookData['oresNeedsContinuation'];
+                       }
+
+                       if ( $classMap === null ) {
+                               $classMap = array_map( 'array_flip', 
$wgOresModelClasses );
+                       }
+
+                       foreach ( $hookData['oresScores'][$revid] as $orow ) {
+                               if ( !isset( 
$classMap[$orow->oresm_name][$orow->oresc_class] ) ) {
+                                       // Missing configuration, ignore it
+                                       continue;
+                               }
+                               
$data['oresscores'][$orow->oresm_name][$classMap[$orow->oresm_name][$orow->oresc_class]]
 =
+                                       (float)$orow->oresc_probability;
+                       }
+
+                       foreach ( $data['oresscores'] as $model => &$scores ) {
+                               // Recalculate the class-0 result, as it's not 
stored in the database
+                               if ( isset( $classMap[$model][0] ) && !isset( 
$scores[$classMap[$model][0]] ) ) {
+                                       $scores[$classMap[$model][0]] = 1.0 - 
array_sum( $scores );
+                               }
+
+                               \ApiResult::setArrayType( $scores, 'kvp', 
'name' );
+                               \ApiResult::setIndexedTagName( $scores, 'class' 
);
+                       }
+                       unset( $scores );
+
+                       \ApiResult::setArrayType( $data['oresscores'], 'kvp', 
'name' );
+                       \ApiResult::setIndexedTagName( $data['oresscores'], 
'model' );
+               }
+
+               return true;
+       }
+
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/313831
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I701e8e19d0b59e2b4db6bae5478720fa98f0fb85
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to