Brian Wolff has submitted this change and it was merged. Change subject: Rewrite runBatchedQuery.php ......................................................................
Rewrite runBatchedQuery.php This maintenance script was previously introduced as a way to do large UPDATE queries in a replication safe way. However, in modern versions of MySQL, UPDATE...LIMIT is considered a non-replication-safe query, and will emit a warning. So instead, ask the user to provide slightly more structured data about the update query being done, and partition the table based on an index. In the UPDATE queries, specify index ranges instead of using LIMIT. Also add a "db" option, which allows the script to update databases which are not valid wiki names, for example, centralauth. Change-Id: I462bdcb03e107af9db4738895952d5110f0ec4fc --- M maintenance/runBatchedQuery.php 1 file changed, 56 insertions(+), 8 deletions(-) Approvals: Aaron Schulz: Looks good to me, approved jenkins-bot: Verified diff --git a/maintenance/runBatchedQuery.php b/maintenance/runBatchedQuery.php index a5e7a2f..f8eedb3 100644 --- a/maintenance/runBatchedQuery.php +++ b/maintenance/runBatchedQuery.php @@ -34,26 +34,74 @@ public function __construct() { parent::__construct(); $this->addDescription( - "Run a query repeatedly until it affects 0 rows, and wait for replica DBs in between.\n" . - "NOTE: You need to set a LIMIT clause yourself." ); + "Run an update query on all rows of a table. " . + "Waits for replicas at appropriate intervals." ); + $this->addOption( 'table', 'The table name', true, true ); + $this->addOption( 'set', 'The SET clause', true, true ); + $this->addOption( 'where', 'The WHERE clause', false, true ); + $this->addOption( 'key', 'A column name, the values of which are unique', true, true ); + $this->addOption( 'batch-size', 'The batch size (default 1000)', false, true ); + $this->addOption( 'db', 'The database name, or omit to use the current wiki.', false, true ); } public function execute() { - if ( !$this->hasArg() ) { - $this->error( "No query specified. Specify the query as a command line parameter.", true ); + $table = $this->getOption( 'table' ); + $key = $this->getOption( 'key' ); + $set = $this->getOption( 'set' ); + $where = $this->getOption( 'where', null ); + $where = $where === null ? [] : [ $where ]; + $batchSize = $this->getOption( 'batch-size', 1000 ); + + $dbName = $this->getOption( 'db', null ); + if ( $dbName === null ) { + $dbw = $this->getDB( DB_MASTER ); + } else { + $lbf = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $lb = $lbf->getMainLB( $dbName ); + $dbw = $lb->getConnection( DB_MASTER, [], $dbName ); } - $query = $this->getArg(); + $selectConds = $where; + $prevEnd = false; + $n = 1; - $dbw = $this->getDB( DB_MASTER ); do { $this->output( "Batch $n: " ); $n++; + + // Note that the update conditions do not rely on atomicity of the + // SELECT query in order to guarantee that all rows are updated. The + // results of the SELECT are merely a partitioning hint. Simultaneous + // updates merely result in the wrong number of rows being updated + // in a batch. + + $res = $dbw->select( $table, $key, $selectConds, __METHOD__, + [ 'ORDER BY' => $key, 'LIMIT' => $batchSize ] ); + if ( $res->numRows() ) { + $res->seek( $res->numRows() - 1 ); + $row = $res->fetchObject(); + $end = $dbw->addQuotes( $row->$key ); + $selectConds = array_merge( $where, [ "$key > $end" ] ); + $updateConds = array_merge( $where, [ "$key <= $end" ] ); + } else { + $updateConds = $where; + } + if ( $prevEnd !== false ) { + $updateConds = array_merge( [ "$key > $prevEnd" ], $updateConds ); + } + + $query = "UPDATE " . $dbw->tableName( $table ) . + " SET " . $set . + " WHERE " . $dbw->makeList( $updateConds, IDatabase::LIST_AND ); + $dbw->query( $query, __METHOD__ ); + + $prevEnd = $end; + $affected = $dbw->affectedRows(); - $this->output( "$affected rows\n" ); + $this->output( "$affected rows affected\n" ); wfWaitForSlaves(); - } while ( $affected > 0 ); + } while ( $res->numRows() ); } public function getDbType() { -- To view, visit https://gerrit.wikimedia.org/r/321604 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I462bdcb03e107af9db4738895952d5110f0ec4fc Gerrit-PatchSet: 3 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org> Gerrit-Reviewer: Brian Wolff <bawolff...@gmail.com> Gerrit-Reviewer: Parent5446 <tylerro...@gmail.com> Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits