jenkins-bot has submitted this change and it was merged.

Change subject: Made RenameUserJob better avoid slave lag spikes
......................................................................


Made RenameUserJob better avoid slave lag spikes

* Do the major table updates using a SELECT+UPDATE pattern
  so the slaves only see the query to update rows by PRIMARY
  KEY. Otherwise, queries that take dozens of seconds hit the
  slaves, causing that much lag.
* Also cleaned up the job documentation while at it.

Bug: T95501
Change-Id: I5d79858eee48f780530485b05bec7567ae7bafe7
---
M RenameUserJob.php
M RenameuserSQL.php
2 files changed, 142 insertions(+), 62 deletions(-)

Approvals:
  Gilles: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/RenameUserJob.php b/RenameUserJob.php
index 2edfc44..516e281 100644
--- a/RenameUserJob.php
+++ b/RenameUserJob.php
@@ -2,97 +2,153 @@
 
 /**
  * Custom job to perform updates on tables in busier environments
+ *
+ * Job parameters include:
+ *   - table     : DB table to update
+ *   - column    : The *_user_text column to update
+ *   - oldname   : The old user name
+ *   - newname   : The new user name
+ *   - count     : The expected number of rows to update in this batch
+ *
+ * Additionally, one of the following groups of parameters must be set:
+ * a) The timestamp based rename paramaters:
+ *   - timestampColumn : The *_timestamp column
+ *   - minTimestamp    : The minimum bound of the timestamp column range for 
this batch
+ *   - maxTimestamp    : The maximum bound of the timestamp column range for 
this batch
+ *   - uniqueKey       : A column that is unique (preferrably the PRIMARY KEY) 
[optional]
+ * b) The unique key based rename paramaters:
+ *   - uniqueKey : A column that is unique (preferrably the PRIMARY KEY)
+ *   - keyId     : A list of values for this column to determine rows to 
update for this batch
+ *
+ * To avoid some race conditions, the following parameters should be set:
+ *   - userID    : The ID of the user to update
+ *   - uidColumn : The *_user_id column
  */
 class RenameUserJob extends Job {
-       /**
-        * Constructor
-        *
-        * @param Title $title Associated title
-        * @param array $params Job parameters
-        * @param int $id
-        */
-       public function __construct( $title, $params = array(), $id = 0 ) {
+       public function __construct( Title $title, $params = array(), $id = 0 ) 
{
                parent::__construct( 'renameUser', $title, $params, $id );
        }
 
-       /**
-        * Execute the job
-        *
-        * @return bool
-        */
        public function run() {
-               $dbw = wfGetDB( DB_MASTER );
+               global $wgUpdateRowsPerQuery;
 
                $table = $this->params['table'];
                $column = $this->params['column'];
                $oldname = $this->params['oldname'];
-               $userID = isset( $this->params['userID'] ) ? 
$this->params['userID'] : null;
-               $uidColumn = isset( $this->params['uidColumn'] ) ? 
$this->params['uidColumn'] : null;
-               $timestampColumn = isset( $this->params['timestampColumn'] ) ?
-                       $this->params['timestampColumn'] :
-                       null;
-               $minTimestamp = $this->params['minTimestamp'];
-               $maxTimestamp = $this->params['maxTimestamp'];
-               $uniqueKey = isset( $this->params['uniqueKey'] ) ? 
$this->params['uniqueKey'] : null;
-               $keyId = isset( $this->params['keyId'] ) ? 
$this->params['keyId'] : null;
                $newname = $this->params['newname'];
                $count = $this->params['count'];
+               if ( isset( $this->params['userID'] ) ) {
+                       $userID = $this->params['userID'];
+                       $uidColumn = $this->params['uidColumn'];
+               } else {
+                       $userID = null;
+                       $uidColumn = null;
+               }
+               if ( isset( $this->params['timestampColumn'] ) ) {
+                       $timestampColumn = $this->params['timestampColumn'];
+                       $minTimestamp = $this->params['minTimestamp'];
+                       $maxTimestamp = $this->params['maxTimestamp'];
+               } else {
+                       $timestampColumn = null;
+                       $minTimestamp = null;
+                       $maxTimestamp = null;
+               }
+               $uniqueKey = isset( $this->params['uniqueKey'] ) ? 
$this->params['uniqueKey'] : null;
+               $keyId = isset( $this->params['keyId'] ) ? 
$this->params['keyId'] : null;
 
                # Conditions like "*_user_text = 'x'
                $conds = array( $column => $oldname );
-               # If user ID given, add that to condition to avoid rename 
collisions.
-               if ( isset( $userID ) ) {
+               # If user ID given, add that to condition to avoid rename 
collisions
+               if ( $userID !== null ) {
                        $conds[$uidColumn] = $userID;
                }
                # Bound by timestamp if given
-               if ( isset( $timestampColumn ) ) {
+               if ( $timestampColumn !== null ) {
                        $conds[] = "$timestampColumn >= '$minTimestamp'";
                        $conds[] = "$timestampColumn <= '$maxTimestamp'";
-                       # Otherwise, bound by key (B/C)
-               } elseif ( isset( $uniqueKey ) ) {
+               # Bound by unique key if given (B/C)
+               } elseif ( $uniqueKey !== null && $keyId !== null ) {
                        $conds[$uniqueKey] = $keyId;
                } else {
-                       wfDebug( 'RenameUserJob::run - invalid job row given' 
); // this shouldn't happen
-                       return false;
+                       throw new InvalidArgumentException( "Expected ID batch 
or time range" );
                }
-               # Update a chuck of rows!
-               $dbw->update( $table,
-                       array( $column => $newname ),
-                       $conds,
-                       __METHOD__
-               );
+
+               $dbw = wfGetDB( DB_MASTER );
+
+               $affectedCount = 0;
+               # Actually update the rows for this job...
+               if ( $uniqueKey !== null ) {
+                       # Select the rows to update by PRIMARY KEY
+                       $ids = $dbw->selectFieldValues( $table, $uniqueKey, 
$conds, __METHOD__ );
+                       # Update these rows by PRIMARY KEY to avoid slave lag
+                       foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as 
$batch ) {
+                               $dbw->commit( __METHOD__, 'flush' );
+
+                               $dbw->update( $table,
+                                       array( $column => $newname ),
+                                       array( $column => $oldname, $uniqueKey 
=> $batch ),
+                                       __METHOD__
+                               );
+                               $affectedCount += $dbw->affectedRows();
+                       }
+               } else {
+                       # Update the chunk of rows directly
+                       $dbw->update( $table,
+                               array( $column => $newname ),
+                               $conds,
+                               __METHOD__
+                       );
+                       $affectedCount += $dbw->affectedRows();
+               }
+
                # Special case: revisions may be deleted while renaming...
-               if ( $table === 'revision' && isset( $timestampColumn ) ) {
-                       $actual = $dbw->affectedRows();
+               if ( $affectedCount < $count && $table === 'revision' && 
$timestampColumn !== null ) {
                        # If some revisions were not renamed, they may have 
been deleted.
                        # Do a pass on the archive table to get these 
straglers...
-                       if ( $actual < $count ) {
-                               $dbw->update( 'archive',
+                       $ids = $dbw->selectFieldValues(
+                               'archive',
+                               'ar_id',
+                               array(
+                                       'ar_user_text' => $oldname,
+                                       'ar_user' => $userID,
+                                       // No user,rev_id index, so use 
timestamp to bound
+                                       // the rows. This can use the 
user,timestamp index.
+                                       "ar_timestamp >= '$minTimestamp'",
+                                       "ar_timestamp <= '$maxTimestamp'"
+                               ),
+                               __METHOD__
+                       );
+                       if ( $ids ) {
+                               $dbw->update(
+                                       'archive',
                                        array( 'ar_user_text' => $newname ),
-                                       array( 'ar_user_text' => $oldname,
-                                               'ar_user' => $userID,
-                                               // No user,rev_id index, so use 
timestamp to bound
-                                               // the rows. This can use the 
user,timestamp index.
-                                               "ar_timestamp >= 
'$minTimestamp'",
-                                               "ar_timestamp <= 
'$maxTimestamp'" ),
+                                       array( 'ar_user_text' => $oldname, 
'ar_id' => $ids ),
                                        __METHOD__
                                );
                        }
                }
                # Special case: revisions may be restored while renaming...
-               if ( $table === 'archive' && isset( $timestampColumn ) ) {
-                       $actual = $dbw->affectedRows();
+               if ( $affectedCount < $count && $table === 'archive' && 
$timestampColumn !== null ) {
                        # If some revisions were not renamed, they may have 
been restored.
                        # Do a pass on the revision table to get these 
straglers...
-                       if ( $actual < $count ) {
-                               $dbw->update( 'revision',
+                       $ids = $dbw->selectFieldValues(
+                               'revision',
+                               'rev_id',
+                               array(
+                                       'rev_user_text' => $oldname,
+                                       'rev_user' => $userID,
+                                       // No user,rev_id index, so use 
timestamp to bound
+                                       // the rows. This can use the 
user,timestamp index.
+                                       "rev_timestamp >= '$minTimestamp'",
+                                       "rev_timestamp <= '$maxTimestamp'"
+                               ),
+                               __METHOD__
+                       );
+                       if ( $ids ) {
+                               $dbw->update(
+                                       'revision',
                                        array( 'rev_user_text' => $newname ),
-                                       array( 'rev_user_text' => $oldname,
-                                               'rev_user' => $userID,
-                                               // No user,rev_id index, so use 
timestamp to bound
-                                               // the rows. This can use the 
user,timestamp index.
-                                               "rev_timestamp >= 
'$minTimestamp'",
-                                               "rev_timestamp <= 
'$maxTimestamp'" ),
+                                       array( 'rev_user_text' => $oldname, 
'rev_id' => $ids ),
                                        __METHOD__
                                );
                        }
diff --git a/RenameuserSQL.php b/RenameuserSQL.php
index 56d9d49..15da11b 100755
--- a/RenameuserSQL.php
+++ b/RenameuserSQL.php
@@ -72,6 +72,11 @@
         */
        const CONTRIB_JOB = 500;
 
+       // B/C constants for tablesJob field
+       const NAME_COL = 0;
+       const UID_COL  = 1;
+       const TIME_COL = 2;
+
        /**
         * Constructor
         *
@@ -108,9 +113,24 @@
                $this->tablesJob = array(); // Slow updates
                // If this user has a large number of edits, use the jobqueue
                if ( User::newFromId( $uid )->getEditCount() > 
self::CONTRIB_JOB ) {
-                       $this->tablesJob['revision'] = array( 'rev_user_text', 
'rev_user', 'rev_timestamp' );
-                       $this->tablesJob['archive'] = array( 'ar_user_text', 
'ar_user', 'ar_timestamp' );
-                       $this->tablesJob['logging'] = array( 'log_user_text', 
'log_user', 'log_timestamp' );
+                       $this->tablesJob['revision'] = array(
+                               self::NAME_COL => 'rev_user_text',
+                               self::UID_COL  => 'rev_user',
+                               self::TIME_COL => 'rev_timestamp',
+                               'uniqueKey'    => 'rev_id'
+                       );
+                       $this->tablesJob['archive'] = array(
+                               self::NAME_COL => 'ar_user_text',
+                               self::UID_COL  => 'ar_user',
+                               self::TIME_COL => 'ar_timestamp',
+                               'uniqueKey'    => 'ar_id'
+                       );
+                       $this->tablesJob['logging'] = array(
+                               self::NAME_COL => 'log_user_text',
+                               self::UID_COL  => 'log_user',
+                               self::TIME_COL => 'log_timestamp',
+                               'uniqueKey'    => 'log_id'
+                       );
                } else {
                        $this->tables['revision'] = array( 'rev_user_text', 
'rev_user' );
                        $this->tables['archive'] = array( 'ar_user_text', 
'ar_user' );
@@ -221,9 +241,9 @@
                // randomly mixed between the two new names. Some sort of rename
                // lock might be in order...
                foreach ( $this->tablesJob as $table => $params ) {
-                       $userTextC = $params[0]; // some *_user_text column
-                       $userIDC = $params[1]; // some *_user column
-                       $timestampC = $params[2]; // some *_timestamp column
+                       $userTextC = $params[self::NAME_COL]; // some 
*_user_text column
+                       $userIDC = $params[self::UID_COL]; // some *_user column
+                       $timestampC = $params[self::TIME_COL]; // some 
*_timestamp column
 
                        $res = $dbw->select( $table,
                                array( $timestampC ),
@@ -244,6 +264,10 @@
                        $jobParams['minTimestamp'] = '0';
                        $jobParams['maxTimestamp'] = '0';
                        $jobParams['count'] = 0;
+                       // Unique column for slave lag avoidance
+                       if ( isset( $params['uniqueKey'] ) ) {
+                               $jobParams['uniqueKey'] = $params['uniqueKey'];
+                       }
 
                        // Insert jobs into queue!
                        while ( true ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/239992
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5d79858eee48f780530485b05bec7567ae7bafe7
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Renameuser
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Gilles <[email protected]>
Gerrit-Reviewer: Jcrespo <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to