PleaseStand has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/247029

Change subject: Avoid creating lots and lots of cat_id gaps
......................................................................

Avoid creating lots and lots of cat_id gaps

Currently, INSERT...ON DUPLICATE KEY UPDATE is used to update the page
counts in the category table. However, MySQL 5.1.22 and newer, by default,
increment the counter for cat_id before checking for duplicate key errors.
This creates many gaps in the cat_id sequence.

To avoid this, check for existing category rows, and instead UPDATE any
that were found. It is hoped that the extra queries will not significantly
harm performance.

Change-Id: Ic2ab9ff14f04a0c7ea90a5b6756cade0c78e2885
---
M includes/Category.php
M includes/page/WikiPage.php
2 files changed, 73 insertions(+), 38 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/29/247029/1

diff --git a/includes/Category.php b/includes/Category.php
index c3e8a4e..1b05f33 100644
--- a/includes/Category.php
+++ b/includes/Category.php
@@ -304,11 +304,11 @@
                        return false;
                }
 
-               # Note, we must use names for this, since categorylinks does.
-               if ( $this->mName === null ) {
-                       if ( !$this->initialize() ) {
-                               return false;
-                       }
+               # If we have just a category name, find out whether there is an
+               # existing row. Or if we have just an ID, get the name, because
+               # that's what categorylinks uses.
+               if ( !$this->initialize() ) {
+                       return false;
                }
 
                $dbw = wfGetDB( DB_MASTER );
@@ -327,25 +327,38 @@
                        array( 'LOCK IN SHARE MODE' )
                );
 
-               # TODO: This will cause lots and lots of gaps on MySQL unless
-               # innodb_autoinc_lock_mode is 0 (and also on some non-MySQL
-               # DBMSes) if you run populateCategory.php repeatedly.
-               $dbw->upsert(
-                       'category',
-                       array(
-                               'cat_title' => $this->mName,
-                               'cat_pages' => $result->pages,
-                               'cat_subcats' => $result->subcats,
-                               'cat_files' => $result->files
-                       ),
-                       array( 'cat_title' ),
-                       array(
-                               'cat_pages' => $result->pages,
-                               'cat_subcats' => $result->subcats,
-                               'cat_files' => $result->files
-                       ),
-                       __METHOD__
-               );
+               if ( $this->mId ) {
+                       # The category row already exists, so do a plain UPDATE 
instead
+                       # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating 
a gap
+                       # in the cat_id sequence. The row may or may not be 
"affected".
+                       $dbw->update(
+                               'category',
+                               array(
+                                       'cat_pages' => $result->pages,
+                                       'cat_subcats' => $result->subcats,
+                                       'cat_files' => $result->files
+                               ),
+                               array( 'cat_title' => $this->mName ),
+                               __METHOD__
+                       );
+               } else {
+                       $dbw->upsert(
+                               'category',
+                               array(
+                                       'cat_title' => $this->mName,
+                                       'cat_pages' => $result->pages,
+                                       'cat_subcats' => $result->subcats,
+                                       'cat_files' => $result->files
+                               ),
+                               array( 'cat_title' ),
+                               array(
+                                       'cat_pages' => $result->pages,
+                                       'cat_subcats' => $result->subcats,
+                                       'cat_files' => $result->files
+                               ),
+                               __METHOD__
+                       );
+               }
 
                $dbw->endAtomic( __METHOD__ );
 
diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php
index 3f6a4db..3ec3e89 100644
--- a/includes/page/WikiPage.php
+++ b/includes/page/WikiPage.php
@@ -3395,22 +3395,44 @@
                                }
 
                                if ( count( $added ) ) {
-                                       $insertRows = array();
-                                       foreach ( $added as $cat ) {
-                                               $insertRows[] = array(
-                                                       'cat_title'   => $cat,
-                                                       'cat_pages'   => 1,
-                                                       'cat_subcats' => ( $ns 
== NS_CATEGORY ) ? 1 : 0,
-                                                       'cat_files'   => ( $ns 
== NS_FILE ) ? 1 : 0,
+                                       $existingAdded = 
$dbw->selectFieldValues(
+                                               'category',
+                                               'cat_title',
+                                               array( 'cat_title' => $added ),
+                                               __METHOD__
+                                       );
+
+                                       // For category rows that already 
exist, do a plain
+                                       // UPDATE instead of INSERT...ON 
DUPLICATE KEY UPDATE
+                                       // to avoid creating gaps in the cat_id 
sequence.
+                                       if ( count( $existingAdded ) ) {
+                                               $dbw->update(
+                                                       'category',
+                                                       $addFields,
+                                                       array( 'cat_title' => 
$existingAdded ),
+                                                       __METHOD__
                                                );
                                        }
-                                       $dbw->upsert(
-                                               'category',
-                                               $insertRows,
-                                               array( 'cat_title' ),
-                                               $addFields,
-                                               $method
-                                       );
+
+                                       $missingAdded = array_diff( $added, 
$existingAdded );
+                                       if ( count( $missingAdded ) ) {
+                                               $insertRows = array();
+                                               foreach ( $missingAdded as $cat 
) {
+                                                       $insertRows[] = array(
+                                                               'cat_title'   
=> $cat,
+                                                               'cat_pages'   
=> 1,
+                                                               'cat_subcats' 
=> ( $ns == NS_CATEGORY ) ? 1 : 0,
+                                                               'cat_files'   
=> ( $ns == NS_FILE ) ? 1 : 0,
+                                                       );
+                                               }
+                                               $dbw->upsert(
+                                                       'category',
+                                                       $insertRows,
+                                                       array( 'cat_title' ),
+                                                       $addFields,
+                                                       $method
+                                               );
+                                       }
                                }
 
                                if ( count( $deleted ) ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/247029
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic2ab9ff14f04a0c7ea90a5b6756cade0c78e2885
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: PleaseStand <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to