jenkins-bot has submitted this change and it was merged.
Change subject: Made RenameUserJob better avoid slave lag spikes
......................................................................
Made RenameUserJob better avoid slave lag spikes
* Do the major table updates using a SELECT+UPDATE pattern
so the slaves only see the query to update rows by PRIMARY
KEY. Otherwise, queries that take dozens of seconds hit the
slaves, causing that much lag.
* Also cleaned up the job documentation while at it.
Bug: T95501
Change-Id: I5d79858eee48f780530485b05bec7567ae7bafe7
---
M RenameUserJob.php
M RenameuserSQL.php
2 files changed, 142 insertions(+), 62 deletions(-)
Approvals:
Gilles: Looks good to me, approved
jenkins-bot: Verified
diff --git a/RenameUserJob.php b/RenameUserJob.php
index 2edfc44..516e281 100644
--- a/RenameUserJob.php
+++ b/RenameUserJob.php
@@ -2,97 +2,153 @@
/**
* Custom job to perform updates on tables in busier environments
+ *
+ * Job parameters include:
+ * - table : DB table to update
+ * - column : The *_user_text column to update
+ * - oldname : The old user name
+ * - newname : The new user name
+ * - count : The expected number of rows to update in this batch
+ *
+ * Additionally, one of the following groups of parameters must be set:
+ * a) The timestamp based rename paramaters:
+ * - timestampColumn : The *_timestamp column
+ * - minTimestamp : The minimum bound of the timestamp column range for
this batch
+ * - maxTimestamp : The maximum bound of the timestamp column range for
this batch
+ * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY)
[optional]
+ * b) The unique key based rename paramaters:
+ * - uniqueKey : A column that is unique (preferrably the PRIMARY KEY)
+ * - keyId : A list of values for this column to determine rows to
update for this batch
+ *
+ * To avoid some race conditions, the following parameters should be set:
+ * - userID : The ID of the user to update
+ * - uidColumn : The *_user_id column
*/
class RenameUserJob extends Job {
- /**
- * Constructor
- *
- * @param Title $title Associated title
- * @param array $params Job parameters
- * @param int $id
- */
- public function __construct( $title, $params = array(), $id = 0 ) {
+ public function __construct( Title $title, $params = array(), $id = 0 )
{
parent::__construct( 'renameUser', $title, $params, $id );
}
- /**
- * Execute the job
- *
- * @return bool
- */
public function run() {
- $dbw = wfGetDB( DB_MASTER );
+ global $wgUpdateRowsPerQuery;
$table = $this->params['table'];
$column = $this->params['column'];
$oldname = $this->params['oldname'];
- $userID = isset( $this->params['userID'] ) ?
$this->params['userID'] : null;
- $uidColumn = isset( $this->params['uidColumn'] ) ?
$this->params['uidColumn'] : null;
- $timestampColumn = isset( $this->params['timestampColumn'] ) ?
- $this->params['timestampColumn'] :
- null;
- $minTimestamp = $this->params['minTimestamp'];
- $maxTimestamp = $this->params['maxTimestamp'];
- $uniqueKey = isset( $this->params['uniqueKey'] ) ?
$this->params['uniqueKey'] : null;
- $keyId = isset( $this->params['keyId'] ) ?
$this->params['keyId'] : null;
$newname = $this->params['newname'];
$count = $this->params['count'];
+ if ( isset( $this->params['userID'] ) ) {
+ $userID = $this->params['userID'];
+ $uidColumn = $this->params['uidColumn'];
+ } else {
+ $userID = null;
+ $uidColumn = null;
+ }
+ if ( isset( $this->params['timestampColumn'] ) ) {
+ $timestampColumn = $this->params['timestampColumn'];
+ $minTimestamp = $this->params['minTimestamp'];
+ $maxTimestamp = $this->params['maxTimestamp'];
+ } else {
+ $timestampColumn = null;
+ $minTimestamp = null;
+ $maxTimestamp = null;
+ }
+ $uniqueKey = isset( $this->params['uniqueKey'] ) ?
$this->params['uniqueKey'] : null;
+ $keyId = isset( $this->params['keyId'] ) ?
$this->params['keyId'] : null;
# Conditions like "*_user_text = 'x'
$conds = array( $column => $oldname );
- # If user ID given, add that to condition to avoid rename
collisions.
- if ( isset( $userID ) ) {
+ # If user ID given, add that to condition to avoid rename
collisions
+ if ( $userID !== null ) {
$conds[$uidColumn] = $userID;
}
# Bound by timestamp if given
- if ( isset( $timestampColumn ) ) {
+ if ( $timestampColumn !== null ) {
$conds[] = "$timestampColumn >= '$minTimestamp'";
$conds[] = "$timestampColumn <= '$maxTimestamp'";
- # Otherwise, bound by key (B/C)
- } elseif ( isset( $uniqueKey ) ) {
+ # Bound by unique key if given (B/C)
+ } elseif ( $uniqueKey !== null && $keyId !== null ) {
$conds[$uniqueKey] = $keyId;
} else {
- wfDebug( 'RenameUserJob::run - invalid job row given'
); // this shouldn't happen
- return false;
+ throw new InvalidArgumentException( "Expected ID batch
or time range" );
}
- # Update a chuck of rows!
- $dbw->update( $table,
- array( $column => $newname ),
- $conds,
- __METHOD__
- );
+
+ $dbw = wfGetDB( DB_MASTER );
+
+ $affectedCount = 0;
+ # Actually update the rows for this job...
+ if ( $uniqueKey !== null ) {
+ # Select the rows to update by PRIMARY KEY
+ $ids = $dbw->selectFieldValues( $table, $uniqueKey,
$conds, __METHOD__ );
+ # Update these rows by PRIMARY KEY to avoid slave lag
+ foreach ( array_chunk( $ids, $wgUpdateRowsPerQuery ) as
$batch ) {
+ $dbw->commit( __METHOD__, 'flush' );
+
+ $dbw->update( $table,
+ array( $column => $newname ),
+ array( $column => $oldname, $uniqueKey
=> $batch ),
+ __METHOD__
+ );
+ $affectedCount += $dbw->affectedRows();
+ }
+ } else {
+ # Update the chunk of rows directly
+ $dbw->update( $table,
+ array( $column => $newname ),
+ $conds,
+ __METHOD__
+ );
+ $affectedCount += $dbw->affectedRows();
+ }
+
# Special case: revisions may be deleted while renaming...
- if ( $table === 'revision' && isset( $timestampColumn ) ) {
- $actual = $dbw->affectedRows();
+ if ( $affectedCount < $count && $table === 'revision' &&
$timestampColumn !== null ) {
# If some revisions were not renamed, they may have
been deleted.
# Do a pass on the archive table to get these
straglers...
- if ( $actual < $count ) {
- $dbw->update( 'archive',
+ $ids = $dbw->selectFieldValues(
+ 'archive',
+ 'ar_id',
+ array(
+ 'ar_user_text' => $oldname,
+ 'ar_user' => $userID,
+ // No user,rev_id index, so use
timestamp to bound
+ // the rows. This can use the
user,timestamp index.
+ "ar_timestamp >= '$minTimestamp'",
+ "ar_timestamp <= '$maxTimestamp'"
+ ),
+ __METHOD__
+ );
+ if ( $ids ) {
+ $dbw->update(
+ 'archive',
array( 'ar_user_text' => $newname ),
- array( 'ar_user_text' => $oldname,
- 'ar_user' => $userID,
- // No user,rev_id index, so use
timestamp to bound
- // the rows. This can use the
user,timestamp index.
- "ar_timestamp >=
'$minTimestamp'",
- "ar_timestamp <=
'$maxTimestamp'" ),
+ array( 'ar_user_text' => $oldname,
'ar_id' => $ids ),
__METHOD__
);
}
}
# Special case: revisions may be restored while renaming...
- if ( $table === 'archive' && isset( $timestampColumn ) ) {
- $actual = $dbw->affectedRows();
+ if ( $affectedCount < $count && $table === 'archive' &&
$timestampColumn !== null ) {
# If some revisions were not renamed, they may have
been restored.
# Do a pass on the revision table to get these
straglers...
- if ( $actual < $count ) {
- $dbw->update( 'revision',
+ $ids = $dbw->selectFieldValues(
+ 'revision',
+ 'rev_id',
+ array(
+ 'rev_user_text' => $oldname,
+ 'rev_user' => $userID,
+ // No user,rev_id index, so use
timestamp to bound
+ // the rows. This can use the
user,timestamp index.
+ "rev_timestamp >= '$minTimestamp'",
+ "rev_timestamp <= '$maxTimestamp'"
+ ),
+ __METHOD__
+ );
+ if ( $ids ) {
+ $dbw->update(
+ 'revision',
array( 'rev_user_text' => $newname ),
- array( 'rev_user_text' => $oldname,
- 'rev_user' => $userID,
- // No user,rev_id index, so use
timestamp to bound
- // the rows. This can use the
user,timestamp index.
- "rev_timestamp >=
'$minTimestamp'",
- "rev_timestamp <=
'$maxTimestamp'" ),
+ array( 'rev_user_text' => $oldname,
'rev_id' => $ids ),
__METHOD__
);
}
diff --git a/RenameuserSQL.php b/RenameuserSQL.php
index 56d9d49..15da11b 100755
--- a/RenameuserSQL.php
+++ b/RenameuserSQL.php
@@ -72,6 +72,11 @@
*/
const CONTRIB_JOB = 500;
+ // B/C constants for tablesJob field
+ const NAME_COL = 0;
+ const UID_COL = 1;
+ const TIME_COL = 2;
+
/**
* Constructor
*
@@ -108,9 +113,24 @@
$this->tablesJob = array(); // Slow updates
// If this user has a large number of edits, use the jobqueue
if ( User::newFromId( $uid )->getEditCount() >
self::CONTRIB_JOB ) {
- $this->tablesJob['revision'] = array( 'rev_user_text',
'rev_user', 'rev_timestamp' );
- $this->tablesJob['archive'] = array( 'ar_user_text',
'ar_user', 'ar_timestamp' );
- $this->tablesJob['logging'] = array( 'log_user_text',
'log_user', 'log_timestamp' );
+ $this->tablesJob['revision'] = array(
+ self::NAME_COL => 'rev_user_text',
+ self::UID_COL => 'rev_user',
+ self::TIME_COL => 'rev_timestamp',
+ 'uniqueKey' => 'rev_id'
+ );
+ $this->tablesJob['archive'] = array(
+ self::NAME_COL => 'ar_user_text',
+ self::UID_COL => 'ar_user',
+ self::TIME_COL => 'ar_timestamp',
+ 'uniqueKey' => 'ar_id'
+ );
+ $this->tablesJob['logging'] = array(
+ self::NAME_COL => 'log_user_text',
+ self::UID_COL => 'log_user',
+ self::TIME_COL => 'log_timestamp',
+ 'uniqueKey' => 'log_id'
+ );
} else {
$this->tables['revision'] = array( 'rev_user_text',
'rev_user' );
$this->tables['archive'] = array( 'ar_user_text',
'ar_user' );
@@ -221,9 +241,9 @@
// randomly mixed between the two new names. Some sort of rename
// lock might be in order...
foreach ( $this->tablesJob as $table => $params ) {
- $userTextC = $params[0]; // some *_user_text column
- $userIDC = $params[1]; // some *_user column
- $timestampC = $params[2]; // some *_timestamp column
+ $userTextC = $params[self::NAME_COL]; // some
*_user_text column
+ $userIDC = $params[self::UID_COL]; // some *_user column
+ $timestampC = $params[self::TIME_COL]; // some
*_timestamp column
$res = $dbw->select( $table,
array( $timestampC ),
@@ -244,6 +264,10 @@
$jobParams['minTimestamp'] = '0';
$jobParams['maxTimestamp'] = '0';
$jobParams['count'] = 0;
+ // Unique column for slave lag avoidance
+ if ( isset( $params['uniqueKey'] ) ) {
+ $jobParams['uniqueKey'] = $params['uniqueKey'];
+ }
// Insert jobs into queue!
while ( true ) {
--
To view, visit https://gerrit.wikimedia.org/r/239992
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5d79858eee48f780530485b05bec7567ae7bafe7
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/Renameuser
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Gilles <[email protected]>
Gerrit-Reviewer: Jcrespo <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits