Aaron Schulz has uploaded a new change for review.
https://gerrit.wikimedia.org/r/287007
Change subject: Batch links updates performed by refreshLinks jobs
......................................................................
Batch links updates performed by refreshLinks jobs
This should avoid erratic lag spikes that happen as many links are
added and removed via new pages (sometimes bot generated) and edits
that blank pages as well as their reversions.
Bug: T109943
Change-Id: Icd453fcc3d28342065893260ad327eae11870245
---
M includes/deferred/LinksUpdate.php
1 file changed, 64 insertions(+), 29 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/07/287007/1
diff --git a/includes/deferred/LinksUpdate.php
b/includes/deferred/LinksUpdate.php
index c0205be..f30b314 100644
--- a/includes/deferred/LinksUpdate.php
+++ b/includes/deferred/LinksUpdate.php
@@ -82,6 +82,8 @@
*/
private $user;
+ const BATCH_SIZE = 500; // try to keep typical updates in a single
transaction
+
/**
* Constructor
*
@@ -91,7 +93,8 @@
* @throws MWException
*/
function __construct( Title $title, ParserOutput $parserOutput,
$recursive = true ) {
- parent::__construct( false ); // no implicit transaction
+ // Implicit transactions are disabled as they interfere with
batching
+ parent::__construct( false );
$this->mTitle = $title;
$this->mId = $title->getArticleID( Title::GAID_FOR_UPDATE );
@@ -143,11 +146,21 @@
* Update link tables with outgoing links from an updated article
*/
public function doUpdate() {
+ // Make sure all links update threads see the changes of each
other.
+ // This handles the case when updates have to batched into
several COMMITs.
+ $scopedLock = $this->mDb->getScopedLockAndFlush(
+ "LinksUpdate:doUpdate:pageid:" . $this->mId,
+ __METHOD__,
+ 15
+ );
+
Hooks::run( 'LinksUpdate', [ &$this ] );
$this->doIncrementalUpdate();
- $this->mDb->onTransactionIdle( function() {
+ $this->mDb->onTransactionIdle( function() use ( $scopedLock ) {
Hooks::run( 'LinksUpdateComplete', [ &$this ] );
+ // Release the lock *after* the final COMMIT for
correctness
+ ScopedCallback::consume( $scopedLock );
} );
}
@@ -160,7 +173,6 @@
# Image links
$existing = $this->getExistingImages();
-
$imageDeletes = $this->getImageDeletions( $existing );
$this->incrTableUpdate( 'imagelinks', 'il', $imageDeletes,
$this->getImageInsertions( $existing ) );
@@ -191,9 +203,7 @@
# Category links
$existing = $this->getExistingCategories();
-
$categoryDeletes = $this->getCategoryDeletions( $existing );
-
$this->incrTableUpdate( 'categorylinks', 'cl', $categoryDeletes,
$this->getCategoryInsertions( $existing ) );
@@ -205,9 +215,7 @@
# Page properties
$existing = $this->getExistingProperties();
-
$propertiesDeletes = $this->getPropertyDeletions( $existing );
-
$this->incrTableUpdate( 'page_props', 'pp', $propertiesDeletes,
$this->getPropertyInsertions( $existing ) );
@@ -307,44 +315,71 @@
* @param array $deletions
* @param array $insertions Rows to insert
*/
- function incrTableUpdate( $table, $prefix, $deletions, $insertions ) {
- if ( $table == 'page_props' ) {
+ private function incrTableUpdate( $table, $prefix, $deletions,
$insertions ) {
+ if ( $table === 'page_props' ) {
$fromField = 'pp_page';
} else {
$fromField = "{$prefix}_from";
}
- $where = [ $fromField => $this->mId ];
- if ( $table == 'pagelinks' || $table == 'templatelinks' ||
$table == 'iwlinks' ) {
- if ( $table == 'iwlinks' ) {
- $baseKey = 'iwl_prefix';
- } else {
- $baseKey = "{$prefix}_namespace";
+
+ $deleteWheres = []; // list of WHERE clause arrays for each DB
delete() call
+ if ( $table === 'pagelinks' || $table === 'templatelinks' ||
$table === 'iwlinks' ) {
+ $baseKey = ( $table === 'iwlinks' ) ? 'iwl_prefix' :
"{$prefix}_namespace";
+
+ $curBatchSize = 0;
+ $curDeletionBatch = [];
+ $deletionBatches = [];
+ foreach ( $deletions as $ns => $dbKeys ) {
+ foreach ( $dbKeys as $dbKey => $unused ) {
+ $curDeletionBatch[$ns][$dbKey] = 1;
+ if ( ++$curBatchSize >=
self::BATCH_SIZE ) {
+ $deletionBatches[] =
$curDeletionBatch;
+ $curDeletionBatch = [];
+ $curBatchSize = 0;
+ }
+ }
}
- $clause = $this->mDb->makeWhereFrom2d( $deletions,
$baseKey, "{$prefix}_title" );
- if ( $clause ) {
- $where[] = $clause;
- } else {
- $where = false;
+ if ( $curDeletionBatch ) {
+ $deletionBatches[] = $curDeletionBatch;
}
+
+ foreach ( $deletionBatches as $deletionBatch ) {
+ $deleteWheres[] = [
+ $fromField => $this->mId,
+ $this->mDb->makeWhereFrom2d(
$deletionBatch, $baseKey, "{$prefix}_title" )
+ ];
+ }
+
+ var_dump( $deleteWheres );
} else {
- if ( $table == 'langlinks' ) {
+ if ( $table === 'langlinks' ) {
$toField = 'll_lang';
- } elseif ( $table == 'page_props' ) {
+ } elseif ( $table === 'page_props' ) {
$toField = 'pp_propname';
} else {
$toField = $prefix . '_to';
}
- if ( count( $deletions ) ) {
- $where[$toField] = array_keys( $deletions );
- } else {
- $where = false;
+
+ $deletionBatches = array_chunk( array_keys( $deletions
), self::BATCH_SIZE );
+ foreach ( $deletionBatches as $deletionBatch ) {
+ $deleteWheres[] = [ $fromField => $this->mId,
$toField => $deletionBatch ];
}
}
- if ( $where ) {
- $this->mDb->delete( $table, $where, __METHOD__ );
+
+ foreach ( $deleteWheres as $deleteWhere ) {
+ $this->mDb->delete( $table, $deleteWhere, __METHOD__ );
+ $this->mDb->commit( __METHOD__, 'flush' );
+ wfGetLBFactory()->waitForReplication();
}
+
+ $insertBatches = array_chunk( $insertions, self::BATCH_SIZE );
+ foreach ( $insertBatches as $insertBatch ) {
+ $this->mDb->insert( $table, $insertBatch, __METHOD__,
'IGNORE' );
+ $this->mDb->commit( __METHOD__, 'flush' );
+ wfGetLBFactory()->waitForReplication();
+ }
+
if ( count( $insertions ) ) {
- $this->mDb->insert( $table, $insertions, __METHOD__,
'IGNORE' );
Hooks::run( 'LinksUpdateAfterInsert', [ $this, $table,
$insertions ] );
}
}
--
To view, visit https://gerrit.wikimedia.org/r/287007
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Icd453fcc3d28342065893260ad327eae11870245
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits