Tim Starling has submitted this change and it was merged. Change subject: Reduce disruption during updateCollation.php ......................................................................
Reduce disruption during updateCollation.php Have updateCollation.php order by cl_to, so that each category is updated all at once. This minimises the time during which a category will appear to be incorrectly sorted, while the maintenance script is in progress. Mark the cl_collation index as needing deletion, it was always pretty pointless. You can't do much better than a full table scan when you're changing the collation value on a wiki. Increase the batch size since the lack of a cl_to,cl_from index means that it will have to filesort each category. A larger batch size means less sorts. As noted by Liangent on bug 45970, you can't order by cl_sortkey since that will change during execution. Also fix an inappropriate use of $wgMiserMode and remove a no-op from the SET clause of the UPDATE. Very lightly tested. Change-Id: I19bc8d6701f5f78040aa9c521427ac98ef488d89 --- M maintenance/tables.sql M maintenance/updateCollation.php 2 files changed, 45 insertions(+), 21 deletions(-) Approvals: Tim Starling: Verified; Looks good to me, approved diff --git a/maintenance/tables.sql b/maintenance/tables.sql index a917783..4307c0c 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -562,10 +562,10 @@ -- callers won't be using an index: fix this? CREATE INDEX /*i*/cl_sortkey ON /*_*/categorylinks (cl_to,cl_type,cl_sortkey,cl_from); --- Not really used? +-- Used by the API (and some extensions) CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp); --- For finding rows with outdated collation +-- FIXME: Not used, delete this CREATE INDEX /*i*/cl_collation ON /*_*/categorylinks (cl_collation); -- diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php index 04a2d47..2132938 100644 --- a/maintenance/updateCollation.php +++ b/maintenance/updateCollation.php @@ -35,7 +35,7 @@ * @ingroup Maintenance */ class UpdateCollation extends Maintenance { - const BATCH_SIZE = 50; // Number of rows to process in one batch + const BATCH_SIZE = 10000; // Number of rows to process in one batch const SYNC_INTERVAL = 20; // Wait for slaves after this many batches public $sizeHistogram = array(); @@ -82,10 +82,13 @@ $collation = Collation::singleton(); } - $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' ); + $options = array( + 'LIMIT' => self::BATCH_SIZE, + 'ORDER BY' => 'cl_to, cl_type, cl_from', + 'STRAIGHT_JOIN', + ); if ( $force || $dryRun ) { - $options['ORDER BY'] = 'cl_from, cl_to'; $collationConds = array(); } else { if ( $this->hasOption( 'previous-collation' ) ) { @@ -96,17 +99,17 @@ ); } - if ( !$wgMiserMode ) { + $count = $dbw->estimateRowCount( + 'categorylinks', + '*', + $collationConds, + __METHOD__ + ); + // Improve estimate if feasible + if ( $count < 1000000 ) { $count = $dbw->selectField( 'categorylinks', 'COUNT(*)', - $collationConds, - __METHOD__ - ); - } else { - $count = $dbw->estimateRowCount( - 'categorylinks', - '*', $collationConds, __METHOD__ ); @@ -126,7 +129,7 @@ $res = $dbw->select( array( 'categorylinks', 'page' ), array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation', - 'cl_sortkey', 'page_namespace', 'page_title' + 'cl_sortkey', 'cl_type', 'page_namespace', 'page_title' ), array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ), __METHOD__, @@ -175,7 +178,6 @@ 'cl_sortkey_prefix' => $prefix, 'cl_collation' => $collationName, 'cl_type' => $type, - 'cl_timestamp = cl_timestamp', ), array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ), __METHOD__ @@ -186,12 +188,8 @@ $dbw->commit( __METHOD__ ); } - if ( ( $force || $dryRun ) && $row ) { - $encFrom = $dbw->addQuotes( $row->cl_from ); - $encTo = $dbw->addQuotes( $row->cl_to ); - $batchConds = array( - "(cl_from = $encFrom AND cl_to > $encTo) " . - " OR cl_from > $encFrom" ); + if ( $row ) { + $batchConds = array( $this->getBatchCondition( $row ) ); } $count += $res->numRows(); @@ -212,6 +210,32 @@ } } + /** + * Return an SQL expression selecting rows which sort above the given row, + * assuming an ordering of cl_to, cl_type, cl_from + */ + function getBatchCondition( $row ) { + $dbw = $this->getDB( DB_MASTER ); + $fields = array( 'cl_to', 'cl_type', 'cl_from' ); + $first = true; + $cond = false; + $prefix = false; + foreach ( $fields as $field ) { + $encValue = $dbw->addQuotes( $row->$field ); + $inequality = "$field > $encValue"; + $equality = "$field = $encValue"; + if ( $first ) { + $cond = $inequality; + $prefix = $equality; + $first = false; + } else { + $cond .= " OR ($prefix AND $inequality)"; + $prefix .= " AND $equality"; + } + } + return $cond; + } + function updateSortKeySizeHistogram( $key ) { $length = strlen( $key ); if ( !isset( $this->sizeHistogram[$length] ) ) { -- To view, visit https://gerrit.wikimedia.org/r/53301 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I19bc8d6701f5f78040aa9c521427ac98ef488d89 Gerrit-PatchSet: 4 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: Brian Wolff <bawolff...@gmail.com> Gerrit-Reviewer: Matmarex <matma....@gmail.com> Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits