jenkins-bot has submitted this change and it was merged.

Change subject: Add new index to make updateCollation.php painless
......................................................................


Add new index to make updateCollation.php painless

We want to update categories in order, to minimize disruption
to users. Previous indexes required a filesort to do this, which
exploded things on large wikis. See bug for details

Bug: T58041
Change-Id: Iee6cd997ff87a313a46fda19d8ab063d0fed8ce8
---
M includes/installer/MysqlUpdater.php
M includes/installer/SqliteUpdater.php
A maintenance/archives/patch-add-cl_collation_ext_index.sql
M maintenance/archives/patch-categorylinks-better-collation.sql
A maintenance/archives/patch-kill-cl_collation_index.sql
M maintenance/tables.sql
M maintenance/updateCollation.php
7 files changed, 54 insertions(+), 13 deletions(-)

Approvals:
  Bartosz Dziewoński: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/installer/MysqlUpdater.php 
b/includes/installer/MysqlUpdater.php
index 9a39b77..d414d907 100644
--- a/includes/installer/MysqlUpdater.php
+++ b/includes/installer/MysqlUpdater.php
@@ -182,7 +182,6 @@
                        [ 'dropIndex', 'iwlinks', 'iwl_prefix', 
'patch-kill-iwl_prefix.sql' ],
                        [ 'addField', 'categorylinks', 'cl_collation', 
'patch-categorylinks-better-collation.sql' ],
                        [ 'doClFieldsUpdate' ],
-                       [ 'doCollationUpdate' ],
                        [ 'addTable', 'module_deps', 'patch-module_deps.sql' ],
                        [ 'dropIndex', 'archive', 'ar_page_revid', 
'patch-archive_kill_ar_page_revid.sql' ],
                        [ 'addIndex', 'archive', 'ar_revid', 
'patch-archive_ar_revid.sql' ],
@@ -280,6 +279,10 @@
                        [ 'dropTable', 'msg_resource' ],
                        [ 'addTable', 'bot_passwords', 
'patch-bot_passwords.sql' ],
                        [ 'addField', 'watchlist', 'wl_id', 
'patch-watchlist-wl_id.sql' ],
+                       [ 'dropIndex', 'categorylinks', 'cl_collation', 
'patch-kill-cl_collation_index.sql' ],
+                       [ 'addIndex', 'categorylinks', 'cl_collation_ext',
+                               'patch-add-cl_collation_ext_index.sql' ],
+                       [ 'doCollationUpdate' ],
                ];
        }
 
diff --git a/includes/installer/SqliteUpdater.php 
b/includes/installer/SqliteUpdater.php
index 99ab4e5..86dccd7 100644
--- a/includes/installer/SqliteUpdater.php
+++ b/includes/installer/SqliteUpdater.php
@@ -65,7 +65,6 @@
                        [ 'addField', 'interwiki', 'iw_api', 
'patch-iw_api_and_wikiid.sql' ],
                        [ 'dropIndex', 'iwlinks', 'iwl_prefix', 
'patch-kill-iwl_prefix.sql' ],
                        [ 'addField', 'categorylinks', 'cl_collation', 
'patch-categorylinks-better-collation.sql' ],
-                       [ 'doCollationUpdate' ],
                        [ 'addTable', 'module_deps', 'patch-module_deps.sql' ],
                        [ 'dropIndex', 'archive', 'ar_page_revid', 
'patch-archive_kill_ar_page_revid.sql' ],
                        [ 'addIndex', 'archive', 'ar_revid', 
'patch-archive_ar_revid.sql' ],
@@ -149,6 +148,10 @@
                        [ 'dropTable', 'msg_resource' ],
                        [ 'addTable', 'bot_passwords', 
'patch-bot_passwords.sql' ],
                        [ 'addField', 'watchlist', 'wl_id', 
'patch-watchlist-wl_id.sql' ],
+                       [ 'dropIndex', 'categorylinks', 'cl_collation', 
'patch-kill-cl_collation_index.sql' ],
+                       [ 'addIndex', 'categorylinks', 'cl_collation_ext',
+                               'patch-add-cl_collation_ext_index.sql' ],
+                       [ 'doCollationUpdate' ],
                ];
        }
 
diff --git a/maintenance/archives/patch-add-cl_collation_ext_index.sql 
b/maintenance/archives/patch-add-cl_collation_ext_index.sql
new file mode 100644
index 0000000..8137dc6
--- /dev/null
+++ b/maintenance/archives/patch-add-cl_collation_ext_index.sql
@@ -0,0 +1,2 @@
+-- @since 1.27
+CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, 
cl_type, cl_from);
diff --git a/maintenance/archives/patch-categorylinks-better-collation.sql 
b/maintenance/archives/patch-categorylinks-better-collation.sql
index c1499c1..f5ff1f1 100644
--- a/maintenance/archives/patch-categorylinks-better-collation.sql
+++ b/maintenance/archives/patch-categorylinks-better-collation.sql
@@ -13,7 +13,7 @@
        ADD COLUMN cl_sortkey_prefix varchar(255) binary NOT NULL default '',
        ADD COLUMN cl_collation varbinary(32) NOT NULL default '',
        ADD COLUMN cl_type ENUM('page', 'subcat', 'file') NOT NULL default 
'page',
-       ADD INDEX (cl_collation),
+-- rm'd in 1.27        ADD INDEX (cl_collation),
        DROP INDEX cl_sortkey,
        ADD INDEX cl_sortkey (cl_to, cl_type, cl_sortkey, cl_from);
 INSERT IGNORE INTO /*$wgDBprefix*/updatelog (ul_key) VALUES 
('cl_fields_update');
diff --git a/maintenance/archives/patch-kill-cl_collation_index.sql 
b/maintenance/archives/patch-kill-cl_collation_index.sql
new file mode 100644
index 0000000..7f75a62
--- /dev/null
+++ b/maintenance/archives/patch-kill-cl_collation_index.sql
@@ -0,0 +1,7 @@
+--
+-- Kill cl_collation index.
+-- @since 1.27
+--
+
+DROP INDEX /*i*/cl_collation ON /*_*/categorylinks;
+
diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index 7942687..89aeb9c 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -619,8 +619,8 @@
 -- Used by the API (and some extensions)
 CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp);
 
--- FIXME: Not used, delete this
-CREATE INDEX /*i*/cl_collation ON /*_*/categorylinks (cl_collation);
+-- Used when updating collation (e.g. updateCollation.php)
+CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, 
cl_type, cl_from);
 
 --
 -- Track all existing categories.  Something is a category if 1) it has an en-
diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php
index af666b0..186feb22 100644
--- a/maintenance/updateCollation.php
+++ b/maintenance/updateCollation.php
@@ -33,7 +33,7 @@
  * @ingroup Maintenance
  */
 class UpdateCollation extends Maintenance {
-       const BATCH_SIZE = 10000; // Number of rows to process in one batch
+       const BATCH_SIZE = 100; // Number of rows to process in one batch
        const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
 
        public $sizeHistogram = [];
@@ -85,10 +85,18 @@
                // but this will raise an exception, breaking all category pages
                $collation->getFirstLetter( 'MediaWiki' );
 
+               // Locally at least, (my local is a rather old version of mysql)
+               // mysql seems to filesort if there is both an equality
+               // (but not for an inequality) condition on cl_collation in the
+               // WHERE and it is also the first item in the ORDER BY.
+               if ( $this->hasOption( 'previous-collation' ) ) {
+                       $orderBy = 'cl_to, cl_type, cl_from';
+               } else {
+                       $orderBy = 'cl_collation, cl_to, cl_type, cl_from';
+               }
                $options = [
                        'LIMIT' => self::BATCH_SIZE,
-                       'ORDER BY' => 'cl_from, cl_to',
-                       'STRAIGHT_JOIN',
+                       'ORDER BY' => $orderBy,
                ];
 
                if ( $force || $dryRun ) {
@@ -124,16 +132,24 @@
                        }
                        $this->output( "Fixing collation for $count rows.\n" );
                }
-
                $count = 0;
                $batchCount = 0;
                $batchConds = [];
                do {
                        $this->output( "Selecting next " . self::BATCH_SIZE . " 
rows..." );
+
+                       // cl_type must be selected as a number for proper 
paging because
+                       // enums suck.
+                       if ( $dbw->getType() === 'mysql' ) {
+                               $clType = 'cl_type+0 AS "cl_type_numeric"';
+                       } else {
+                               $clType = 'cl_type';
+                       }
                        $res = $dbw->select(
                                [ 'categorylinks', 'page' ],
                                [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 
'cl_collation',
-                                       'cl_sortkey', 'page_namespace', 
'page_title'
+                                       'cl_sortkey', $clType,
+                                       'page_namespace', 'page_title'
                                ],
                                array_merge( $collationConds, $batchConds, [ 
'cl_from = page_id' ] ),
                                __METHOD__,
@@ -217,18 +233,28 @@
 
        /**
         * Return an SQL expression selecting rows which sort above the given 
row,
-        * assuming an ordering of cl_from, cl_to
+        * assuming an ordering of cl_collation, cl_to, cl_type, cl_from
         * @param stdClass $row
         * @param DatabaseBase $dbw
         * @return string
         */
        function getBatchCondition( $row, $dbw ) {
-               $fields = [ 'cl_from', 'cl_to' ];
+               if ( $this->hasOption( 'previous-collation' ) ) {
+                       $fields = [ 'cl_to', 'cl_type', 'cl_from' ];
+               } else {
+                       $fields = [ 'cl_collation', 'cl_to', 'cl_type', 
'cl_from' ];
+               }
                $first = true;
                $cond = false;
                $prefix = false;
                foreach ( $fields as $field ) {
-                       $encValue = $dbw->addQuotes( $row->$field );
+                       if ( $dbw->getType() === 'mysql' && $field === 
'cl_type' ) {
+                               // Range conditions with enums are weird in 
mysql
+                               // This must be a numeric literal, or it won't 
work.
+                               $encValue = intval( $row->cl_type_numeric );
+                       } else {
+                               $encValue = $dbw->addQuotes( $row->$field );
+                       }
                        $inequality = "$field > $encValue";
                        $equality = "$field = $encValue";
                        if ( $first ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/272416
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iee6cd997ff87a313a46fda19d8ab063d0fed8ce8
Gerrit-PatchSet: 7
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Brian Wolff <[email protected]>
Gerrit-Reviewer: Bartosz Dziewoński <[email protected]>
Gerrit-Reviewer: Brian Wolff <[email protected]>
Gerrit-Reviewer: Jcrespo <[email protected]>
Gerrit-Reviewer: Jjanes <[email protected]>
Gerrit-Reviewer: Kaldari <[email protected]>
Gerrit-Reviewer: Niharika29 <[email protected]>
Gerrit-Reviewer: Parent5446 <[email protected]>
Gerrit-Reviewer: PleaseStand <[email protected]>
Gerrit-Reviewer: Springle <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to