Anomie has uploaded a new change for review.
https://gerrit.wikimedia.org/r/313831
Change subject: Action API integration for ORES
......................................................................
Action API integration for ORES
* Add a meta module to expose ores_model data
* Add an rvprop, arvprop, rcprop, and ucprop to fetch ORES scores
* Add an rcshow, and ucshow to filter by ORES needs-review
status
TODO:
* Figure out wlprop and wlshow.
Bug: T143614
Bug: T143616
Bug: T143617
Depends-On: Id6b42c7f2eb53a6f659d0d61383287f41d96ca00
Change-Id: I701e8e19d0b59e2b4db6bae5478720fa98f0fb85
---
M extension.json
M i18n/en.json
M i18n/qqq.json
A includes/ApiQueryORES.php
M includes/Cache.php
M includes/FetchScoreJob.php
M includes/Hooks.php
7 files changed, 436 insertions(+), 4 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ORES
refs/changes/31/313831/1
diff --git a/extension.json b/extension.json
index eb096bc..c5e6a5b 100644
--- a/extension.json
+++ b/extension.json
@@ -13,9 +13,25 @@
"ORES\\Cache": "includes/Cache.php",
"ORES\\Hooks": "includes/Hooks.php",
"ORES\\FetchScoreJob": "includes/FetchScoreJob.php",
- "ORES\\Scoring": "includes/Scoring.php"
+ "ORES\\Scoring": "includes/Scoring.php",
+ "ORES\\ApiQueryORES": "includes/ApiQueryORES.php"
+ },
+ "APIMetaModules": {
+ "ores": "ORES\\ApiQueryORES"
},
"Hooks": {
+ "APIGetAllowedParams": [
+ "ORES\\Hooks::onAPIGetAllowedParams"
+ ],
+ "ApiQueryBaseBeforeQuery": [
+ "ORES\\Hooks::onApiQueryBaseBeforeQuery"
+ ],
+ "ApiQueryBaseAfterQuery": [
+ "ORES\\Hooks::onApiQueryBaseAfterQuery"
+ ],
+ "ApiQueryBaseProcessRow": [
+ "ORES\\Hooks::onApiQueryBaseProcessRow"
+ ],
"BeforePageDisplay": [
"ORES\\Hooks::onBeforePageDisplay"
],
@@ -118,6 +134,7 @@
},
"OresEnabledNamespaces": {},
"OresWikiId": null,
+ "OresAPIMaxBatchJobs": 100,
"RecentChangesFlags": {
"damaging": {
"letter": "ores-damaging-letter",
diff --git a/i18n/en.json b/i18n/en.json
index 891d5b4..32f1e6e 100644
--- a/i18n/en.json
+++ b/i18n/en.json
@@ -16,5 +16,12 @@
"ores-pref-damaging": "ORES sensitivity",
"ores-pref-rc-hidenondamaging": "Hide probably good edits from recent
changes",
"ores-pref-watchlist-hidenondamaging": "Hide probably good edits from
the watchlist",
- "prefs-ores" : "Revision scoring"
+ "prefs-ores" : "Revision scoring",
+ "apihelp-query+ores-description": "Return ORES configruation and model
data for this wiki.",
+ "apihelp-query+ores-example-simple": "Fetch ORES data:",
+ "apihelp-query+revisions-paramvalue-prop-oresscores": "ORES scores for
the revision.",
+ "apihelp-query+allrevisions-paramvalue-prop-oresscores": "ORES scores
for the revision.",
+ "apihelp-query+recentchanges-paramvalue-prop-oresscores": "Adds ORES
scores for the entry.",
+ "apihelp-query+watchlist-paramvalue-prop-oresscores": "Adds ORES scores
for the edit.",
+ "apihelp-query+usercontribs-paramvalue-prop-oresscores": "Adds ORES
scores for the edit."
}
diff --git a/i18n/qqq.json b/i18n/qqq.json
index 88328fb..a46a9c4 100644
--- a/i18n/qqq.json
+++ b/i18n/qqq.json
@@ -19,5 +19,12 @@
"ores-pref-damaging": "Part asking for damaging threshold",
"ores-pref-rc-hidenondamaging": "Display message for user preferences
to make hidenondamaging default in recent changes",
"ores-pref-watchlist-hidenondamaging": "Display message for user
preferences to make hidenondamaging default in the watchlist",
- "prefs-ores": "Name of ORES section in preferences"
+ "prefs-ores": "Name of ORES section in preferences",
+ "apihelp-query+ores-description":
"{{doc-apihelp-description|query+ores}}",
+ "apihelp-query+ores-example-simple":
"{{doc-apihelp-example|query+ores}}",
+ "apihelp-query+revisions-paramvalue-prop-oresscores":
"{{doc-apihelp-paramvalue|query+revisions|prop|oresscores}}",
+ "apihelp-query+allrevisions-paramvalue-prop-oresscores":
"{{doc-apihelp-paramvalue|query+allrevisions|prop|oresscores}}",
+ "apihelp-query+recentchanges-paramvalue-prop-oresscores":
"{{doc-apihelp-paramvalue|query+recentchanges|prop|oresscores}}",
+ "apihelp-query+watchlist-paramvalue-prop-oresscores":
"{{doc-apihelp-paramvalue|query+watchlist|prop|oresscores}}",
+ "apihelp-query+usercontribs-paramvalue-prop-oresscores":
"{{doc-apihelp-paramvalue|query+usercontribs|prop|oresscores}}"
}
diff --git a/includes/ApiQueryORES.php b/includes/ApiQueryORES.php
new file mode 100644
index 0000000..93f57f6
--- /dev/null
+++ b/includes/ApiQueryORES.php
@@ -0,0 +1,90 @@
+<?php
+/**
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+namespace ORES;
+
+use ApiResult;
+use ApiQuery;
+use ApiQueryBase;
+
+/**
+ * A query action to return meta information about ORES models and
+ * configuration on the wiki.
+ *
+ * @ingroup API
+ */
+class ApiQueryORES extends ApiQueryBase {
+
+ public function __construct( ApiQuery $query, $moduleName ) {
+ parent::__construct( $query, $moduleName, 'ores' );
+ }
+
+ public function execute() {
+ global $wgOresBaseUrl, $wgOresExcludeBots,
$wgOresDamagingThresholds,
+ $wgOresEnabledNamespaces, $wgOresWikiId;
+
+ $result = $this->getResult();
+ $data = [
+ 'baseurl' => $wgOresBaseUrl,
+ 'wikiid' => $wgOresWikiId ?: wfWikiID(),
+ 'models' => [],
+ 'excludebots' => (bool)$wgOresExcludeBots,
+ 'damagingthresholds' => $wgOresDamagingThresholds,
+ 'namespaces' => $wgOresEnabledNamespaces
+ ? array_keys( array_filter(
$wgOresEnabledNamespaces ) )
+ : \MWNamespace::getValidNamespaces(),
+ ];
+ ApiResult::setArrayType( $data['models'], 'assoc' );
+ ApiResult::setIndexedTagName( $data['namespaces'], 'ns' );
+
+ $this->addTables( 'ores_model' );
+ $this->addFields( [ 'oresm_name', 'oresm_version',
'oresm_is_current' ] );
+ $this->addWhere( [ 'oresm_is_current' => 1 ] );
+ $res = $this->select();
+
+ foreach ( $res as $row ) {
+ $data['models'][$row->oresm_name] = [
+ 'version' => $row->oresm_version,
+ ];
+ }
+
+ $result->addValue( [ 'query' ], 'ores', $data );
+ }
+
+ public function getCacheMode( $params ) {
+ return 'public';
+ }
+
+ public function getAllowedParams() {
+ return [];
+ }
+
+ protected function getExamplesMessages() {
+ return [
+ 'action=query&meta=ores'
+ => 'apihelp-query+ores-example-simple',
+ ];
+ }
+
+ public function getHelpUrls() {
+ return 'https://www.mediawiki.org/wiki/Extension:ORES';
+ }
+}
diff --git a/includes/Cache.php b/includes/Cache.php
index ea44b52..374c24f 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -37,7 +37,7 @@
$this->processRevision( $dbData, $revision,
$revisionData );
}
- \wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData,
__METHOD__ );
+ \wfGetDB( DB_MASTER )->insert( 'ores_classification', $dbData,
__METHOD__, [ 'IGNORE' ] );
}
/**
diff --git a/includes/FetchScoreJob.php b/includes/FetchScoreJob.php
index ad1e619..30b7c62 100644
--- a/includes/FetchScoreJob.php
+++ b/includes/FetchScoreJob.php
@@ -12,11 +12,42 @@
* @param array $params 'revid' key
*/
public function __construct( Title $title, array $params ) {
+ $expensive = is_array( $params['revid'] );
+
+ if ( $expensive ) {
+ sort( $params['revid'] );
+ }
+
parent::__construct( 'ORESFetchScoreJob', $title, $params );
+
+ $this->removeDuplicates = $expensive;
}
public function run() {
$logger = LoggerFactory::getInstance( 'ORES' );
+
+ if ( $this->removeDuplicates ) {
+ // Filter out revisions that already have scores by the
time this
+ // job runs.
+ $revids = (array)$this->params['revid'];
+ $dbr = \wfGetDB( DB_REPLICA );
+ $revids = array_diff(
+ $revids,
+ $dbr->selectFieldValues(
+ 'ores_classification',
+ 'oresc_rev',
+ [ 'oresc_rev' => $revids ],
+ __METHOD__,
+ [ 'DISTINCT' ]
+ )
+ );
+ if ( !$revids ) {
+ $logger->debug( 'Skipping fetch, no revisions
need scores: ' . json_encode( $this->params ) );
+ return true;
+ }
+ $this->params['revid'] = $revids;
+ }
+
$logger->info( 'Fetching scores for revision ' . json_encode(
$this->params ) );
$scores = Scoring::instance()->getScores(
$this->params['revid'], null,
$this->params['extra_params'] );
diff --git a/includes/Hooks.php b/includes/Hooks.php
index 0b5955b..b777879 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -22,6 +22,8 @@
use Xml;
class Hooks {
+ const GET_SCORES_BATCH_SIZE = 50;
+
/**
* @param DatabaseUpdater $updater
* @return bool
@@ -525,4 +527,282 @@
global $wgOresModels;
return isset( $wgOresModels[$model] ) && $wgOresModels[$model];
}
+
+ /**
+ * Inject parameters into certain API modules
+ * @param ApiBase &$module Module
+ * @param array &$params Parameter data
+ * @param int $flags zero or OR-ed flags like
ApiBase::GET_VALUES_FOR_HELP
+ */
+ public static function onAPIGetAllowedParams( &$module, &$params,
$flags ) {
+ if ( $module instanceof \ApiQueryRevisions ||
+ $module instanceof \ApiQueryAllRevisions ||
+ $module instanceof \ApiQueryRecentChanges ||
+ /** @todo: $module instanceof \ApiQueryWatchlist || */
+ $module instanceof \ApiQueryContributions
+ ) {
+ $params['prop'][\ApiBase::PARAM_TYPE][] = 'oresscores';
+ }
+
+ if ( $module instanceof \ApiQueryRecentChanges ||
+ /** @todo: $module instanceof \ApiQueryWatchlist || */
+ $module instanceof \ApiQueryContributions
+ ) {
+ $params['show'][\ApiBase::PARAM_TYPE][] = 'oresreview';
+ $params['show'][\ApiBase::PARAM_TYPE][] = '!oresreview';
+ }
+ }
+
+ /**
+ * Modify the API query before it's made.
+ * @warning Any joins added *must* join on a unique key of the target
table
+ * unless you really know what you're doing.
+ * @param ApiQueryBase $module
+ * @param array &$tables tables to be queried
+ * @param array &$fields columns to select
+ * @param array &$conds WHERE conditionals for query
+ * @param array &$options options for the database request
+ * @param array &$joinConds join conditions for the tables
+ * @param array &$hookData Inter-hook communication
+ */
+ public static function onApiQueryBaseBeforeQuery(
+ $module, &$tables, &$fields, &$conds, &$options, &$joinConds,
&$hookData
+ ) {
+ $params = $module->extractRequestParams();
+
+ if ( $module instanceof \ApiQueryRecentChanges ) {
+ $field = 'rc_this_oldid';
+
+ // Make sure the needed fields are included in the
query, if necessary
+ if ( in_array( 'oresscores', $params['prop'], true ) ) {
+ if ( !in_array( 'rc_this_oldid', $fields, true
) ) {
+ $fields[] = 'rc_this_oldid';
+ }
+ if ( !in_array( 'rc_type', $fields, true ) ) {
+ $fields[] = 'rc_type';
+ }
+ }
+ } elseif ( $module instanceof \ApiQueryContributions ) {
+ $field = 'rev_id';
+ } else {
+ return;
+ }
+
+ $show = isset( $params['show'] ) ? array_flip( $params['show']
) : [];
+ if ( isset( $show['oresreview'] ) || isset(
$show['!oresreview'] ) ) {
+ if ( isset( $show['oresreview'] ) && isset(
$show['!oresreview'] ) ) {
+ $module->dieUsageMsg( 'show' );
+ }
+
+ $threshold = self::getDamagingThreshold(
$module->getUser() );
+ $dbr = \wfGetDB( DB_REPLICA );
+
+ $tables[] = 'ores_model';
+ $tables[] = 'ores_classification';
+
+ if ( isset( $show['oresreview'] ) ) {
+ $join = 'INNER JOIN';
+
+ // Filter out non-damaging and unscored edits.
+ $conds[] = 'oresc_probability > ' .
$dbr->addQuotes( $threshold );
+ } else {
+ $join = 'LEFT JOIN';
+
+ // Filter out damaging edits.
+ $conds[] = $dbr->makeList( [
+ 'oresc_probability <= ' .
$dbr->addQuotes( $threshold ),
+ 'oresc_probability IS NULL'
+ ], $dbr::LIST_OR );
+ }
+
+ $joinConds['ores_model'] = [ $join,
+ 'oresm_name = ' . $dbr->addQuotes( 'damaging' )
. ' AND oresm_is_current = 1'
+ ];
+ $joinConds['ores_classification'] = [ $join,
+ "$field = oresc_rev AND oresc_model = oresm_id
AND oresc_class = 1"
+ ];
+ }
+ }
+
+ /**
+ * Perform work after the API query is made
+ * @param ApiQueryBase $module
+ * @param ResultWrapper|bool $res
+ * @param array &$hookData Inter-hook communication
+ */
+ public static function onApiQueryBaseAfterQuery( $module, $res,
&$hookData ) {
+ global $wgOresAPIMaxBatchJobs;
+
+ if ( !$res ) {
+ return;
+ }
+
+ // If the module is being used as a generator, don't bother.
Generators
+ // don't return props.
+ if ( $module->isInGeneratorMode() ) {
+ return;
+ }
+
+ if ( $module instanceof \ApiQueryRevisions ||
+ $module instanceof \ApiQueryAllRevisions ||
+ $module instanceof \ApiQueryContributions
+ ) {
+ $field = 'rev_id';
+ $checkRCType = false;
+ } elseif ( $module instanceof \ApiQueryRecentChanges ) {
+ $field = 'rc_this_oldid';
+ $checkRCType = true;
+ } else {
+ return;
+ }
+
+ $params = $module->extractRequestParams();
+ if ( in_array( 'oresscores', $params['prop'], true ) ) {
+ // Extract revision IDs from the result set
+ $revids = [];
+ foreach ( $res as $row ) {
+ if ( !$checkRCType || (int)$row->rc_type ===
RC_EDIT || (int)$row->rc_type === RC_NEW ) {
+ $revids[] = $row->$field;
+ }
+ }
+ $res->rewind();
+
+ if ( $revids ) {
+ $hookData['oresField'] = $field;
+ $hookData['oresCheckRCType'] = $checkRCType;
+ $hookData['oresNeedsContinuation'] = false;
+ $hookData['oresScores'] = [];
+
+ // Load cached score data
+ $dbr = \wfGetDB( DB_REPLICA );
+ $res2 = $dbr->select(
+ [ 'ores_classification', 'ores_model' ],
+ [ 'oresc_rev', 'oresc_class',
'oresc_probability', 'oresm_name' ],
+ [
+ 'oresc_rev' => $revids,
+ 'oresc_model = oresm_id',
+ 'oresm_is_current' => 1,
+ ],
+ __METHOD__
+ );
+ foreach ( $res2 as $row ) {
+
$hookData['oresScores'][$row->oresc_rev][] = $row;
+ }
+
+ // If any queried revisions were not cached,
fetch up to
+ // GET_SCORES_BATCH_SIZE from the service now,
cache them, and
+ // add them to the result.
+ $revids = array_diff( $revids, array_keys(
$hookData['oresScores'] ) );
+ if ( $revids ) {
+ if ( count( $revids ) >
self::GET_SCORES_BATCH_SIZE ) {
+
$hookData['oresNeedsContinuation'] = true;
+ $title = \Title::newFromText(
'API batch score fetch' );
+ for (
+ $i =
self::GET_SCORES_BATCH_SIZE, $ct = 0;
+ $i < count( $revids )
&& $ct < $wgOresAPIMaxBatchJobs;
+ $i +=
self::GET_SCORES_BATCH_SIZE, $ct++
+ ) {
+ $job = new
FetchScoreJob( $title, [
+ 'revid' =>
array_slice( $revids, $i, self::GET_SCORES_BATCH_SIZE ),
+ 'extra_params'
=> [],
+ ] );
+
JobQueueGroup::singleton()->push( $job );
+ }
+ $revids = array_slice( $revids,
0, self::GET_SCORES_BATCH_SIZE );
+ }
+ $scores =
Scoring::instance()->getScores( $revids );
+ $cache = Cache::instance();
+ \DeferredUpdates::addCallableUpdate(
function() use ( $cache, $scores ) {
+ $cache->storeScores( $scores );
+ } );
+
+ $models = [];
+ $res2 = $dbr->select(
+ [ 'ores_model' ],
+ [ 'oresm_id', 'oresm_name' ],
+ [ 'oresm_is_current' => 1 ],
+ __METHOD__
+ );
+ foreach ( $res2 as $row ) {
+ $models[$row->oresm_id] =
$row->oresm_name;
+ }
+
+ foreach ( $scores as $revid => $data ) {
+ $dbData = [];
+ $cache->processRevision(
$dbData, $revid, $data );
+ foreach ( $dbData as $row ) {
+
$hookData['oresScores'][$revid][] = (object)[
+ 'oresc_class'
=> $row['oresc_class'],
+
'oresc_probability' => $row['oresc_probability'],
+ 'oresm_name' =>
$models[$row['oresc_model']],
+ ];
+ }
+ }
+
+ if (
!$hookData['oresNeedsContinuation'] && array_diff( $revids, array_keys( $scores
) ) ) {
+ // Some queried revisions were
ignored, signal continuation.
+
$hookData['oresNeedsContinuation'] = true;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Modify each data row before it's returned.
+ * @param ApiQueryBase $module
+ * @param object $row
+ * @param array &$data
+ * @param array &$hookData Inter-hook communication
+ * @return bool False to stop processing the result set
+ */
+ public static function onApiQueryBaseProcessRow( $module, $row, &$data,
&$hookData ) {
+ global $wgOresModelClasses;
+ static $classMap = null;
+
+ if ( isset( $hookData['oresField'] ) &&
+ ( !$hookData['oresCheckRCType'] ||
+ (int)$row->rc_type === RC_NEW ||
(int)$row->rc_type === RC_EDIT
+ )
+ ) {
+ $data['oresscores'] = [];
+
+ $revid = $row->{$hookData['oresField']};
+ if ( !isset( $hookData['oresScores'][$revid] ) ) {
+ // If we didn't fetch all uncached scores,
signal continuation.
+ // Otherwise, we have a WTF situation that we
should just ignore.
+ return !$hookData['oresNeedsContinuation'];
+ }
+
+ if ( $classMap === null ) {
+ $classMap = array_map( 'array_flip',
$wgOresModelClasses );
+ }
+
+ foreach ( $hookData['oresScores'][$revid] as $orow ) {
+ if ( !isset(
$classMap[$orow->oresm_name][$orow->oresc_class] ) ) {
+ // Missing configuration, ignore it
+ continue;
+ }
+
$data['oresscores'][$orow->oresm_name][$classMap[$orow->oresm_name][$orow->oresc_class]]
=
+ (float)$orow->oresc_probability;
+ }
+
+ foreach ( $data['oresscores'] as $model => &$scores ) {
+ // Recalculate the class-0 result, as it's not
stored in the database
+ if ( isset( $classMap[$model][0] ) && !isset(
$scores[$classMap[$model][0]] ) ) {
+ $scores[$classMap[$model][0]] = 1.0 -
array_sum( $scores );
+ }
+
+ \ApiResult::setArrayType( $scores, 'kvp',
'name' );
+ \ApiResult::setIndexedTagName( $scores, 'class'
);
+ }
+ unset( $scores );
+
+ \ApiResult::setArrayType( $data['oresscores'], 'kvp',
'name' );
+ \ApiResult::setIndexedTagName( $data['oresscores'],
'model' );
+ }
+
+ return true;
+ }
+
}
--
To view, visit https://gerrit.wikimedia.org/r/313831
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I701e8e19d0b59e2b4db6bae5478720fa98f0fb85
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Anomie <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits