jenkins-bot has submitted this change and it was merged.

Change subject: Add PopulateDatabase.php
......................................................................


Add PopulateDatabase.php

Bug: T123795
Change-Id: Ia10250e261ed2a16afa970cdf3e9c5e3105bb98d
---
M includes/Cache.php
A maintenance/PopulateDatabase.php
2 files changed, 112 insertions(+), 3 deletions(-)

Approvals:
  Hoo man: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/Cache.php b/includes/Cache.php
index 4d18cda..ef759db 100644
--- a/includes/Cache.php
+++ b/includes/Cache.php
@@ -18,11 +18,10 @@
         * Save scores to the database
         *
         * @param array $scores in the same structure as is returned by ORES.
-        * @param integer $revid Revision ID
         *
         * @throws RuntimeException
         */
-       public function storeScores( $scores, $revid ) {
+       public function storeScores( $scores ) {
                // Map to database fields.
                $dbData = array();
                foreach ( $scores as $revision => $revisionData ) {
@@ -47,7 +46,7 @@
                                                continue;
                                        }
                                        $dbData[] = array(
-                                               'oresc_rev' => $revid,
+                                               'oresc_rev' => $revision,
                                                'oresc_model' => $modelId,
                                                'oresc_class' => $class,
                                                'oresc_probability' => 
$probability,
diff --git a/maintenance/PopulateDatabase.php b/maintenance/PopulateDatabase.php
new file mode 100644
index 0000000..2b7d4f0
--- /dev/null
+++ b/maintenance/PopulateDatabase.php
@@ -0,0 +1,110 @@
+<?php
+
+namespace ORES;
+
+use Maintenance;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+       ? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+       : __DIR__ . '/../../../maintenance/Maintenance.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class PopulateDatabase extends Maintenance {
+
+       /**
+        * @var int|null
+        */
+       private $batchSize;
+
+       /**
+        * @var int|null
+        */
+       private $revisionLimit;
+
+       public function __construct() {
+               parent::__construct();
+
+               $this->addDescription( 'Populate ores_classification table by 
scoring ' .
+                       'the latest edits in recentchanges table that are not 
scored' );
+               $this->addOption( 'number', 'Number of revisions to be scored', 
false, true, 'n' );
+               $this->addOption( 'batch', 'Batch size for select sql query', 
false, true, 'b' );
+
+       }
+
+       public function execute() {
+               global $wgOresExcludeBots;
+
+               $scoring = Scoring::instance();
+               $cache = Cache::instance();
+               $this->batchSize = $this->getOption( 'batch', 5000 );
+               $this->revisionLimit = $this->getOption( 'number', 1000 );
+
+               $latestRcId = 0;
+               $dbr = wfGetDB( DB_SLAVE );
+               $join_conds = array( 'ores_classification' =>
+                       array( 'LEFT JOIN', array( 'oresc_rev = rc_this_oldid' 
) )
+               );
+
+               $count = 0;
+               while ( $count < $this->revisionLimit ) {
+
+                       $conditions = array( 'oresc_id IS NULL', 'rc_type' => 0 
);
+                       if ( $wgOresExcludeBots === true ) {
+                               $conditions['rc_bot'] = 0;
+                       }
+                       if ( $latestRcId ) {
+                               $conditions[] = 'rc_id < ' . $dbr->addQuotes( 
$latestRcId );
+                       }
+
+                       $res = $dbr->select( array( 'recentchanges', 
'ores_classification' ),
+                               array( 'rc_id', 'rc_this_oldid' ),
+                               $conditions,
+                               __METHOD__,
+                               array( 'ORDER BY' => 'rc_id DESC',
+                                       'LIMIT' => $this->batchSize ),
+                               $join_conds
+                       );
+
+                       $pack = array();
+                       foreach ( $res as $row ) {
+                               $pack[] = $row->rc_this_oldid;
+                               if ( count( $pack ) % 50 === 0 ) {
+                                       $this->processScores( $pack, $scoring, 
$cache );
+                                       $pack = array();
+                               }
+                               $latestRcId = $row->rc_id;
+                       }
+                       if ( $pack !== array() ) {
+                               $this->processScores( $pack, $scoring, $cache );
+                       }
+
+                       $count += $this->batchSize;
+                       wfGetLBFactory()->waitForReplication();
+
+                       if ( $res->numRows() < $this->batchSize ) {
+                               break;
+                       }
+               }
+               $this->output( 'Finished processing the revisions' );
+       }
+
+       /**
+        * Process several edits and store the scores in the database
+        *
+        * @param array $revs array of revision ids
+        * @param Scoring $scoring scoring object
+        * @param Cache $Cache cahe object
+        */
+       private function processScores( array $revs, Scoring $scoring, Cache 
$cache ) {
+               $size = count( $revs );
+               $this->output( "Processing $size revsisions\n" );
+
+               $scores = $scoring->getScores( $revs );
+               $cache->storeScores( $scores );
+       }
+}
+
+$maintClass = 'ORES\PopulateDatabase';
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/268874
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ia10250e261ed2a16afa970cdf3e9c5e3105bb98d
Gerrit-PatchSet: 20
Gerrit-Project: mediawiki/extensions/ORES
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Awight <[email protected]>
Gerrit-Reviewer: Hoo man <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to