Tim Starling has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/53483


Change subject: Reduce disruption during updateCollation.php
......................................................................

Reduce disruption during updateCollation.php

Have updateCollation.php order by cl_to, so that each category is
updated all at once. This minimises the time during which a category
will appear to be incorrectly sorted, while the maintenance script is in
progress.

Mark the cl_collation index as needing deletion, it was always pretty
pointless. You can't do much better than a full table scan when you're
changing the collation value on a wiki.

Increase the batch size since the lack of a cl_to,cl_from index means
that it will have to filesort each category. A larger batch size means
less sorts. As noted by Liangent on bug 45970, you can't order by
cl_sortkey since that will change during execution.

Also fix an inappropriate use of $wgMiserMode and remove a no-op from
the SET clause of the UPDATE.

Very lightly tested.

Also cherry-picked I803f20bc

Change-Id: I0a58ab79e7f982f4f289ccc2bd231f8b3ed6b217
---
M maintenance/tables.sql
M maintenance/updateCollation.php
2 files changed, 45 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/83/53483/1

diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index e501d35..6f567c9 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -560,10 +560,10 @@
 -- callers won't be using an index: fix this?
 CREATE INDEX /*i*/cl_sortkey ON /*_*/categorylinks 
(cl_to,cl_type,cl_sortkey,cl_from);
 
--- Not really used?
+-- Used by the API (and some extensions)
 CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp);
 
--- For finding rows with outdated collation
+-- FIXME: Not used, delete this
 CREATE INDEX /*i*/cl_collation ON /*_*/categorylinks (cl_collation);
 
 --
diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php
index 04a2d47..0e2791c 100644
--- a/maintenance/updateCollation.php
+++ b/maintenance/updateCollation.php
@@ -35,7 +35,7 @@
  * @ingroup Maintenance
  */
 class UpdateCollation extends Maintenance {
-       const BATCH_SIZE = 50; // Number of rows to process in one batch
+       const BATCH_SIZE = 10000; // Number of rows to process in one batch
        const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
 
        public $sizeHistogram = array();
@@ -82,10 +82,13 @@
                        $collation = Collation::singleton();
                }
 
-               $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' 
);
+               $options = array(
+                       'LIMIT' => self::BATCH_SIZE,
+                       'ORDER BY' => 'cl_to, cl_type, cl_from',
+                       'STRAIGHT_JOIN',
+               );
 
                if ( $force || $dryRun ) {
-                       $options['ORDER BY'] = 'cl_from, cl_to';
                        $collationConds = array();
                } else {
                        if ( $this->hasOption( 'previous-collation' ) ) {
@@ -96,17 +99,17 @@
                                );
                        }
 
-                       if ( !$wgMiserMode ) {
+                       $count = $dbw->estimateRowCount(
+                               'categorylinks',
+                               '*',
+                               $collationConds,
+                               __METHOD__
+                       );
+                       // Improve estimate if feasible
+                       if ( $count < 1000000 ) {
                                $count = $dbw->selectField(
                                        'categorylinks',
                                        'COUNT(*)',
-                                       $collationConds,
-                                       __METHOD__
-                               );
-                       } else {
-                               $count = $dbw->estimateRowCount(
-                                       'categorylinks',
-                                       '*',
                                        $collationConds,
                                        __METHOD__
                                );
@@ -126,7 +129,7 @@
                        $res = $dbw->select(
                                array( 'categorylinks', 'page' ),
                                array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 
'cl_collation',
-                                       'cl_sortkey', 'page_namespace', 
'page_title'
+                                       'cl_sortkey', 'cl_type', 
'page_namespace', 'page_title'
                                ),
                                array_merge( $collationConds, $batchConds, 
array( 'cl_from = page_id' ) ),
                                __METHOD__,
@@ -186,12 +189,8 @@
                                $dbw->commit( __METHOD__ );
                        }
 
-                       if ( ( $force || $dryRun ) && $row ) {
-                               $encFrom = $dbw->addQuotes( $row->cl_from );
-                               $encTo = $dbw->addQuotes( $row->cl_to );
-                               $batchConds = array(
-                                       "(cl_from = $encFrom AND cl_to > 
$encTo) " .
-                                       " OR cl_from > $encFrom" );
+                       if ( $row ) {
+                               $batchConds = array( $this->getBatchCondition( 
$row ) );
                        }
 
                        $count += $res->numRows();
@@ -212,6 +211,32 @@
                }
        }
 
+       /**
+        * Return an SQL expression selecting rows which sort above the given 
row,
+        * assuming an ordering of cl_to, cl_type, cl_from
+        */
+       function getBatchCondition( $row ) {
+               $dbw = $this->getDB( DB_MASTER );
+               $fields = array( 'cl_to', 'cl_type', 'cl_from' );
+               $first = true;
+               $cond = false;
+               $prefix = false;
+               foreach ( $fields as $field ) {
+                       $encValue = $dbw->addQuotes( $row->$field );
+                       $inequality = "$field > $encValue";
+                       $equality = "$field = $encValue";
+                       if ( $first ) {
+                               $cond = $inequality;
+                               $prefix = $equality;
+                               $first = false;
+                       } else {
+                               $cond .= " OR ($prefix AND $inequality)";
+                               $prefix .= " AND $equality";
+                       }
+               }
+               return $cond;
+       }
+
        function updateSortKeySizeHistogram( $key ) {
                $length = strlen( $key );
                if ( !isset( $this->sizeHistogram[$length] ) ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/53483
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0a58ab79e7f982f4f289ccc2bd231f8b3ed6b217
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: wmf/1.21wmf11
Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to