jenkins-bot has submitted this change and it was merged.
Change subject: Add PopulateDatabase.php
......................................................................
Add PopulateDatabase.php
Bug: T123795
Change-Id: Ia10250e261ed2a16afa970cdf3e9c5e3105bb98d
---
M includes/Cache.php
A maintenance/PopulateDatabase.php
2 files changed, 112 insertions(+), 3 deletions(-)
Approvals:
Hoo man: Looks good to me, approved
jenkins-bot: Verified
diff --git a/includes/Cache.php b/includes/Cache.php
index 4d18cda..ef759db 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -18,11 +18,10 @@
* Save scores to the database
*
* @param array $scores in the same structure as is returned by ORES.
- * @param integer $revid Revision ID
*
* @throws RuntimeException
*/
- public function storeScores( $scores, $revid ) {
+ public function storeScores( $scores ) {
// Map to database fields.
$dbData = array();
foreach ( $scores as $revision => $revisionData ) {
@@ -47,7 +46,7 @@
continue;
}
$dbData[] = array(
- 'oresc_rev' => $revid,
+ 'oresc_rev' => $revision,
'oresc_model' => $modelId,
'oresc_class' => $class,
'oresc_probability' =>
$probability,
diff --git a/maintenance/PopulateDatabase.php b/maintenance/PopulateDatabase.php
new file mode 100644
index 0000000..2b7d4f0
--- /dev/null
+++ b/maintenance/PopulateDatabase.php
@@ -0,0 +1,110 @@
+<?php
+
+namespace ORES;
+
+use Maintenance;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+ ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+ : __DIR__ . '/../../../maintenance/Maintenance.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class PopulateDatabase extends Maintenance {
+
+ /**
+ * @var int|null
+ */
+ private $batchSize;
+
+ /**
+ * @var int|null
+ */
+ private $revisionLimit;
+
+ public function __construct() {
+ parent::__construct();
+
+ $this->addDescription( 'Populate ores_classification table by
scoring ' .
+ 'the latest edits in recentchanges table that are not
scored' );
+ $this->addOption( 'number', 'Number of revisions to be scored',
false, true, 'n' );
+ $this->addOption( 'batch', 'Batch size for select sql query',
false, true, 'b' );
+
+ }
+
+ public function execute() {
+ global $wgOresExcludeBots;
+
+ $scoring = Scoring::instance();
+ $cache = Cache::instance();
+ $this->batchSize = $this->getOption( 'batch', 5000 );
+ $this->revisionLimit = $this->getOption( 'number', 1000 );
+
+ $latestRcId = 0;
+ $dbr = wfGetDB( DB_SLAVE );
+ $join_conds = array( 'ores_classification' =>
+ array( 'LEFT JOIN', array( 'oresc_rev = rc_this_oldid'
) )
+ );
+
+ $count = 0;
+ while ( $count < $this->revisionLimit ) {
+
+ $conditions = array( 'oresc_id IS NULL', 'rc_type' => 0
);
+ if ( $wgOresExcludeBots === true ) {
+ $conditions['rc_bot'] = 0;
+ }
+ if ( $latestRcId ) {
+ $conditions[] = 'rc_id < ' . $dbr->addQuotes(
$latestRcId );
+ }
+
+ $res = $dbr->select( array( 'recentchanges',
'ores_classification' ),
+ array( 'rc_id', 'rc_this_oldid' ),
+ $conditions,
+ __METHOD__,
+ array( 'ORDER BY' => 'rc_id DESC',
+ 'LIMIT' => $this->batchSize ),
+ $join_conds
+ );
+
+ $pack = array();
+ foreach ( $res as $row ) {
+ $pack[] = $row->rc_this_oldid;
+ if ( count( $pack ) % 50 === 0 ) {
+ $this->processScores( $pack, $scoring,
$cache );
+ $pack = array();
+ }
+ $latestRcId = $row->rc_id;
+ }
+ if ( $pack !== array() ) {
+ $this->processScores( $pack, $scoring, $cache );
+ }
+
+ $count += $this->batchSize;
+ wfGetLBFactory()->waitForReplication();
+
+ if ( $res->numRows() < $this->batchSize ) {
+ break;
+ }
+ }
+ $this->output( 'Finished processing the revisions' );
+ }
+
+ /**
+ * Process several edits and store the scores in the database
+ *
+ * @param array $revs array of revision ids
+ * @param Scoring $scoring scoring object
+ * @param Cache $Cache cahe object
+ */
+ private function processScores( array $revs, Scoring $scoring, Cache
$cache ) {
+ $size = count( $revs );
+ $this->output( "Processing $size revsisions\n" );
+
+ $scores = $scoring->getScores( $revs );
+ $cache->storeScores( $scores );
+ }
+}
+
+$maintClass = 'ORES\PopulateDatabase';
+require_once RUN_MAINTENANCE_IF_MAIN;
--
To view, visit https://gerrit.wikimedia.org/r/268874
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia10250e261ed2a16afa970cdf3e9c5e3105bb98d
Gerrit-PatchSet: 20
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Awight <[email protected]>
Gerrit-Reviewer: Hoo man <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits