Brian Wolff has submitted this change and it was merged.

Change subject: Rewrite runBatchedQuery.php
......................................................................


Rewrite runBatchedQuery.php

This maintenance script was previously introduced as a way to do large
UPDATE queries in a replication safe way. However, in modern versions of
MySQL, UPDATE...LIMIT is considered a non-replication-safe query, and
will emit a warning.

So instead, ask the user to provide slightly more structured data about
the update query being done, and partition the table based on an index.
In the UPDATE queries, specify index ranges instead of using LIMIT.

Also add a "db" option, which allows the script to update databases
which are not valid wiki names, for example, centralauth.

Change-Id: I462bdcb03e107af9db4738895952d5110f0ec4fc
---
M maintenance/runBatchedQuery.php
1 file changed, 56 insertions(+), 8 deletions(-)

Approvals:
  Aaron Schulz: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/maintenance/runBatchedQuery.php b/maintenance/runBatchedQuery.php
index a5e7a2f..f8eedb3 100644
--- a/maintenance/runBatchedQuery.php
+++ b/maintenance/runBatchedQuery.php
@@ -34,26 +34,74 @@
        public function __construct() {
                parent::__construct();
                $this->addDescription(
-                       "Run a query repeatedly until it affects 0 rows, and 
wait for replica DBs in between.\n" .
-                               "NOTE: You need to set a LIMIT clause 
yourself." );
+                       "Run an update query on all rows of a table. " .
+                       "Waits for replicas at appropriate intervals." );
+               $this->addOption( 'table', 'The table name', true, true );
+               $this->addOption( 'set', 'The SET clause', true, true );
+               $this->addOption( 'where', 'The WHERE clause', false, true );
+               $this->addOption( 'key', 'A column name, the values of which 
are unique', true, true );
+               $this->addOption( 'batch-size', 'The batch size (default 
1000)', false, true );
+               $this->addOption( 'db', 'The database name, or omit to use the 
current wiki.', false, true );
        }
 
        public function execute() {
-               if ( !$this->hasArg() ) {
-                       $this->error( "No query specified. Specify the query as 
a command line parameter.", true );
+               $table = $this->getOption( 'table' );
+               $key = $this->getOption( 'key' );
+               $set = $this->getOption( 'set' );
+               $where = $this->getOption( 'where', null );
+               $where = $where === null ? [] : [ $where ];
+               $batchSize = $this->getOption( 'batch-size', 1000 );
+
+               $dbName = $this->getOption( 'db', null );
+               if ( $dbName === null ) {
+                       $dbw = $this->getDB( DB_MASTER );
+               } else {
+                       $lbf = 
MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+                       $lb = $lbf->getMainLB( $dbName );
+                       $dbw = $lb->getConnection( DB_MASTER, [], $dbName );
                }
 
-               $query = $this->getArg();
+               $selectConds = $where;
+               $prevEnd = false;
+
                $n = 1;
-               $dbw = $this->getDB( DB_MASTER );
                do {
                        $this->output( "Batch $n: " );
                        $n++;
+
+                       // Note that the update conditions do not rely on 
atomicity of the
+                       // SELECT query in order to guarantee that all rows are 
updated. The
+                       // results of the SELECT are merely a partitioning 
hint. Simultaneous
+                       // updates merely result in the wrong number of rows 
being updated
+                       // in a batch.
+
+                       $res = $dbw->select( $table, $key, $selectConds, 
__METHOD__,
+                               [ 'ORDER BY' => $key, 'LIMIT' => $batchSize ] );
+                       if ( $res->numRows() ) {
+                               $res->seek( $res->numRows() - 1 );
+                               $row = $res->fetchObject();
+                               $end = $dbw->addQuotes( $row->$key );
+                               $selectConds = array_merge( $where, [ "$key > 
$end" ] );
+                               $updateConds = array_merge( $where, [ "$key <= 
$end" ] );
+                       } else {
+                               $updateConds = $where;
+                       }
+                       if ( $prevEnd !== false ) {
+                               $updateConds = array_merge( [ "$key > $prevEnd" 
], $updateConds );
+                       }
+
+                       $query = "UPDATE " . $dbw->tableName( $table ) .
+                               " SET " . $set .
+                               " WHERE " . $dbw->makeList( $updateConds, 
IDatabase::LIST_AND );
+
                        $dbw->query( $query, __METHOD__ );
+
+                       $prevEnd = $end;
+
                        $affected = $dbw->affectedRows();
-                       $this->output( "$affected rows\n" );
+                       $this->output( "$affected rows affected\n" );
                        wfWaitForSlaves();
-               } while ( $affected > 0 );
+               } while ( $res->numRows() );
        }
 
        public function getDbType() {

-- 
To view, visit https://gerrit.wikimedia.org/r/321604
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I462bdcb03e107af9db4738895952d5110f0ec4fc
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org>
Gerrit-Reviewer: Brian Wolff <bawolff...@gmail.com>
Gerrit-Reviewer: Parent5446 <tylerro...@gmail.com>
Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to