Pgehres has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/53285


Change subject: Rewrite migratePass1.php to use chunks.
......................................................................

Rewrite migratePass1.php to use chunks.

Since there isn't an id field in globalnames, make use of the index that does
exist to partition the table based on gn_name prefixes with a little recursion.

Change-Id: I3b8f39022d1abc481685b72d609a866e114026e7
---
M maintenance/migratePass1.php
1 file changed, 93 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CentralAuth 
refs/changes/85/53285/1

diff --git a/maintenance/migratePass1.php b/maintenance/migratePass1.php
index 906279e..26fc047 100644
--- a/maintenance/migratePass1.php
+++ b/maintenance/migratePass1.php
@@ -10,30 +10,111 @@
 }
 require_once( "$IP/maintenance/commandLine.inc" );
 
+$stats = array(
+       "migrated" => 0,
+       "total" => 0,
+       "chunksize" => 1000,
+       "start" => microtime( true ),
+       "startPrefix" => ''
+);
+
 function migratePassOne() {
-       $migrated = 0;
-       $total = 0;
-       $chunkSize = 1000;
-       $start = microtime( true );
+       global $stats;
+
+       if ( $stats["chunksize"] < 1 ) {
+               echo "You must select a positive chunk size or infinite 
recursion will ensue";
+               die( 1 );
+       }
+
+       doMigratePassOne();
+
+       migratePassOneReport( $stats['migrated'], $stats['total'], 
$stats['start'] );
+       echo "DONE\n";
+}
+
+function doMigratePassOne( $prefix='' ) {
+       global $stats;
+
+       // enable us to continue where we left off
+       if ( $prefix < $stats['startPrefix'] ) {
+               // check to see if start is a proper subset of startPrefix
+               if ( strpos( $stats['startPrefix'], $prefix ) !== 0 ) {
+                       return; // skip this prefix
+               }
+       }
+
+       $keys = splitPrefixIntoKeys( $prefix );
+
+       print_r( $keys );
+
+       foreach ( $keys as $key => $count) {
+
+               print $key . ' - '. $count . "\n";
+
+               if ( $count > $stats['chunksize'] ) {
+                       doMigratePassOne( $prefix . $key );
+               } else {
+                       doMigratePassOneSubset( $prefix . $key );
+               }
+       }
+}
+
+/**
+ * Executes the migratePass1 for the specified subset of globalnames.
+ *
+ * This function assumes that all checks for chunk size have already been 
performed
+ * and any over-sized chunks split into smaller parts.
+ *
+ * @param string $prefix The prefix used as a limiting factor
+ */
+function doMigratePassOneSubset( $prefix='' ) {
+       global $stats;
+
+       echo "-- Starting $prefix --\n";
 
        $dbBackground = CentralAuthUser::getCentralSlaveDB();
+
        $result = $dbBackground->select(
                'globalnames',
-               array( 'gn_name' ),
-               '',
-               __METHOD__ );
+               array( 'gn_name', ),
+               $prefix == '' ? '' : "gn_name like '$prefix%'",
+               __METHOD__
+       );
+
        foreach( $result as $row ) {
                $name = $row->gn_name;
                $central = new CentralAuthUser( $name );
                if ( $central->storeAndMigrate() ) {
-                       $migrated++;
+                       $stats['migrated']++;
                }
-               if ( ++$total % $chunkSize == 0 ) {
-                       migratePassOneReport( $migrated, $total, $start );
+               if ( ++$stats['total'] % $stats['chunksize'] == 0 ) {
+                       migratePassOneReport( $stats['migrated'], 
$stats['total'], $stats['start'] );
                }
        }
-       migratePassOneReport( $migrated, $total, $start );
-       echo "DONE\n";
+       migratePassOneReport( $stats['migrated'], $stats['total'], 
$stats['start'] );
+       echo "-- Done $prefix --\n";
+}
+
+function splitPrefixIntoKeys( $prefix='' ) {
+       $subStart = strlen( $prefix ) + 1; // MySQL is 1-indexed
+
+       $dbBackground = CentralAuthUser::getCentralSlaveDB();
+
+       $result = $dbBackground->select(
+               'globalnames',
+               array( "substring(gn_name, $subStart, 1) as letter", 'count(*) 
as count' ),
+               $prefix == '' ? '' : "gn_name like '$prefix%'",
+               __METHOD__ ,
+               array( 'ORDER BY' => 'letter ASC', 'GROUP BY' => 'letter' )
+       );
+
+       $keys = array();
+
+       foreach ( $result as $row ) {
+               $keys[$row->letter] = $row->count;
+       }
+
+       return $keys;
 }
 
 /**

-- 
To view, visit https://gerrit.wikimedia.org/r/53285
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I3b8f39022d1abc481685b72d609a866e114026e7
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CentralAuth
Gerrit-Branch: master
Gerrit-Owner: Pgehres <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to