PleaseStand has uploaded a new change for review.
https://gerrit.wikimedia.org/r/247029
Change subject: Avoid creating lots and lots of cat_id gaps
......................................................................
Avoid creating lots and lots of cat_id gaps
Currently, INSERT...ON DUPLICATE KEY UPDATE is used to update the page
counts in the category table. However, MySQL 5.1.22 and newer, by default,
increment the counter for cat_id before checking for duplicate key errors.
This creates many gaps in the cat_id sequence.
To avoid this, check for existing category rows, and instead UPDATE any
that were found. It is hoped that the extra queries will not significantly
harm performance.
Change-Id: Ic2ab9ff14f04a0c7ea90a5b6756cade0c78e2885
---
M includes/Category.php
M includes/page/WikiPage.php
2 files changed, 73 insertions(+), 38 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/29/247029/1
diff --git a/includes/Category.php b/includes/Category.php
index c3e8a4e..1b05f33 100644
--- a/includes/Category.php
+++ b/includes/Category.php
@@ -304,11 +304,11 @@
return false;
}
- # Note, we must use names for this, since categorylinks does.
- if ( $this->mName === null ) {
- if ( !$this->initialize() ) {
- return false;
- }
+ # If we have just a category name, find out whether there is an
+ # existing row. Or if we have just an ID, get the name, because
+ # that's what categorylinks uses.
+ if ( !$this->initialize() ) {
+ return false;
}
$dbw = wfGetDB( DB_MASTER );
@@ -327,25 +327,38 @@
array( 'LOCK IN SHARE MODE' )
);
- # TODO: This will cause lots and lots of gaps on MySQL unless
- # innodb_autoinc_lock_mode is 0 (and also on some non-MySQL
- # DBMSes) if you run populateCategory.php repeatedly.
- $dbw->upsert(
- 'category',
- array(
- 'cat_title' => $this->mName,
- 'cat_pages' => $result->pages,
- 'cat_subcats' => $result->subcats,
- 'cat_files' => $result->files
- ),
- array( 'cat_title' ),
- array(
- 'cat_pages' => $result->pages,
- 'cat_subcats' => $result->subcats,
- 'cat_files' => $result->files
- ),
- __METHOD__
- );
+ if ( $this->mId ) {
+ # The category row already exists, so do a plain UPDATE
instead
+ # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating
a gap
+ # in the cat_id sequence. The row may or may not be
"affected".
+ $dbw->update(
+ 'category',
+ array(
+ 'cat_pages' => $result->pages,
+ 'cat_subcats' => $result->subcats,
+ 'cat_files' => $result->files
+ ),
+ array( 'cat_title' => $this->mName ),
+ __METHOD__
+ );
+ } else {
+ $dbw->upsert(
+ 'category',
+ array(
+ 'cat_title' => $this->mName,
+ 'cat_pages' => $result->pages,
+ 'cat_subcats' => $result->subcats,
+ 'cat_files' => $result->files
+ ),
+ array( 'cat_title' ),
+ array(
+ 'cat_pages' => $result->pages,
+ 'cat_subcats' => $result->subcats,
+ 'cat_files' => $result->files
+ ),
+ __METHOD__
+ );
+ }
$dbw->endAtomic( __METHOD__ );
diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php
index 3f6a4db..3ec3e89 100644
--- a/includes/page/WikiPage.php
+++ b/includes/page/WikiPage.php
@@ -3395,22 +3395,44 @@
}
if ( count( $added ) ) {
- $insertRows = array();
- foreach ( $added as $cat ) {
- $insertRows[] = array(
- 'cat_title' => $cat,
- 'cat_pages' => 1,
- 'cat_subcats' => ( $ns
== NS_CATEGORY ) ? 1 : 0,
- 'cat_files' => ( $ns
== NS_FILE ) ? 1 : 0,
+ $existingAdded =
$dbw->selectFieldValues(
+ 'category',
+ 'cat_title',
+ array( 'cat_title' => $added ),
+ __METHOD__
+ );
+
+ // For category rows that already
exist, do a plain
+ // UPDATE instead of INSERT...ON
DUPLICATE KEY UPDATE
+ // to avoid creating gaps in the cat_id
sequence.
+ if ( count( $existingAdded ) ) {
+ $dbw->update(
+ 'category',
+ $addFields,
+ array( 'cat_title' =>
$existingAdded ),
+ __METHOD__
);
}
- $dbw->upsert(
- 'category',
- $insertRows,
- array( 'cat_title' ),
- $addFields,
- $method
- );
+
+ $missingAdded = array_diff( $added,
$existingAdded );
+ if ( count( $missingAdded ) ) {
+ $insertRows = array();
+ foreach ( $missingAdded as $cat
) {
+ $insertRows[] = array(
+ 'cat_title'
=> $cat,
+ 'cat_pages'
=> 1,
+ 'cat_subcats'
=> ( $ns == NS_CATEGORY ) ? 1 : 0,
+ 'cat_files'
=> ( $ns == NS_FILE ) ? 1 : 0,
+ );
+ }
+ $dbw->upsert(
+ 'category',
+ $insertRows,
+ array( 'cat_title' ),
+ $addFields,
+ $method
+ );
+ }
}
if ( count( $deleted ) ) {
--
To view, visit https://gerrit.wikimedia.org/r/247029
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic2ab9ff14f04a0c7ea90a5b6756cade0c78e2885
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: PleaseStand <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits