jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/377046 )

Change subject: Allow batch sizes for different jobs to be defined separately.
......................................................................


Allow batch sizes for different jobs to be defined separately.

In particular, the batch size for UpdateHtmlCacheJob can be quite large,
while the batch size for the slow running RefreshLinksJob should be
rather small.

Bug: T174422
Bug: T173710
Change-Id: Ic095e2eba985ea3a6e51645d0be58589b326f218
---
M client/config/WikibaseClient.default.php
M client/includes/Changes/WikiPageUpdater.php
M client/includes/WikibaseClient.php
M client/tests/phpunit/includes/Changes/WikiPageUpdaterTest.php
M docs/options.wiki
5 files changed, 102 insertions(+), 42 deletions(-)

Approvals:
  Daniel Kinzler: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/client/config/WikibaseClient.default.php 
b/client/config/WikibaseClient.default.php
index f75c05c..9f97102 100644
--- a/client/config/WikibaseClient.default.php
+++ b/client/config/WikibaseClient.default.php
@@ -1,5 +1,6 @@
 <?php
 
+use MediaWiki\MediaWikiServices;
 use Wikibase\Client\WikibaseClient;
 use Wikibase\SettingsArray;
 use Wikibase\WikibaseSettings;
@@ -96,8 +97,21 @@
                                $settings->getSetting( 'hasFullEntityIdColumn' 
) : true;
                },
 
-               // Database batch size in WikiPageUpdater which ChangeHandler 
uses
-               'wikiPageUpdaterDbBatchSize' => 50,
+               // Batch size for UpdateHtmlCacheJob
+               'purgeCacheBatchSize' => function ( SettingsArray $settings ) {
+                       $mainConfig = 
MediaWikiServices::getInstance()->getMainConfig();
+                       return $settings->hasSetting( 
'wikiPageUpdaterDbBatchSize' )
+                               ? $settings->getSetting( 
'wikiPageUpdaterDbBatchSize' )
+                               : $mainConfig->get( 'UpdateRowsPerJob' );
+               },
+
+               // Batch size for InjectRCRecordsJob
+               'recentChangesBatchSize' => function ( SettingsArray $settings 
) {
+                       $mainConfig = 
MediaWikiServices::getInstance()->getMainConfig();
+                       return $settings->hasSetting( 
'wikiPageUpdaterDbBatchSize' )
+                               ? $settings->getSetting( 
'wikiPageUpdaterDbBatchSize' )
+                               : $mainConfig->get( 'UpdateRowsPerJob' );
+               },
        ];
 
        // Some defaults depend on information not available at this time.
diff --git a/client/includes/Changes/WikiPageUpdater.php 
b/client/includes/Changes/WikiPageUpdater.php
index c6b83b5..1a4a2d2 100644
--- a/client/includes/Changes/WikiPageUpdater.php
+++ b/client/includes/Changes/WikiPageUpdater.php
@@ -10,7 +10,6 @@
 use Wikibase\Client\RecentChanges\RecentChangeFactory;
 use Wikibase\Client\RecentChanges\RecentChangesDuplicateDetector;
 use Wikibase\EntityChange;
-use Wikimedia\Assert\Assert;
 use Wikimedia\Rdbms\LBFactory;
 
 /**
@@ -40,9 +39,14 @@
        private $LBFactory;
 
        /**
-        * @var int Batch size for database operations
+        * @var int Batch size for UpdateHtmlCacheJob
         */
-       private $dbBatchSize = 50;
+       private $purgeCacheBatchSize = 300;
+
+       /**
+        * @var int Batch size for InjectRCRecordsJob
+        */
+       private $rcBatchSize = 300;
 
        /**
         * @var RecentChangesDuplicateDetector|null
@@ -78,17 +82,29 @@
        /**
         * @return int
         */
-       public function getDbBatchSize() {
-               return $this->dbBatchSize;
+       public function getPurgeCacheBatchSize() {
+               return $this->purgeCacheBatchSize;
        }
 
        /**
-        * @param int $dbBatchSize
+        * @param int $purgeCacheBatchSize
         */
-       public function setDbBatchSize( $dbBatchSize ) {
-               Assert::parameterType( 'integer', $dbBatchSize, 'dbBatchSize' );
+       public function setPurgeCacheBatchSize( $purgeCacheBatchSize ) {
+               $this->purgeCacheBatchSize = $purgeCacheBatchSize;
+       }
 
-               $this->dbBatchSize = $dbBatchSize;
+       /**
+        * @return int
+        */
+       public function getRecentChangesBatchSize() {
+               return $this->rcBatchSize;
+       }
+
+       /**
+        * @param int $rcBatchSize
+        */
+       public function setRecentChangesBatchSize( $rcBatchSize ) {
+               $this->rcBatchSize = $rcBatchSize;
        }
 
        private function incrementStats( $updateType, $delta ) {
@@ -133,7 +149,7 @@
                }
 
                $jobs = [];
-               $titleBatches = array_chunk( $titles, $this->dbBatchSize );
+               $titleBatches = array_chunk( $titles, 
$this->getPurgeCacheBatchSize() );
 
                /* @var Title[] $batch */
                foreach ( $titleBatches as $batch ) {
@@ -166,26 +182,23 @@
                        return;
                }
 
-               $jobs = [];
-               $titleBatches = array_chunk( $titles, $this->dbBatchSize );
+               $c = 0;
 
-               /* @var Title[] $batch */
-               foreach ( $titleBatches as $batch ) {
-                       wfDebugLog( __CLASS__, __FUNCTION__ . ": scheduling 
refresh links for "
-                               . count( $batch ) . " titles" );
+               // NOTE: no batching here, since RefreshLinksJobs are slow, and 
benefit more from
+               // deduplication and checking against page_touched than from 
reducing overhead
+               // through batching.
 
-                       $dummyTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/' 
. __CLASS__ );
-
-                       $jobs[] = new RefreshLinksJob(
-                               $dummyTitle, // the title will be ignored 
because the 'pages' parameter is set.
-                               $this->addRootJobParameters( [
-                                       'pages' => 
$this->getPageParamForRefreshLinksJob( $batch ),
-                               ], $rootJobParams )
+               foreach ( $titles as $title ) {
+                       $job = new RefreshLinksJob(
+                               $title, // the title will be ignored because 
the 'pages' parameter is set.
+                               $this->addRootJobParameters( [], $rootJobParams 
)
                        );
+
+                       $this->jobQueueGroup->lazyPush( $job );
+                       $c++;
                }
 
-               $this->jobQueueGroup->lazyPush( $jobs );
-               $this->incrementStats( 'RefreshLinks.jobs', count( $jobs ) );
+               $this->incrementStats( 'RefreshLinks.jobs', $c );
                $this->incrementStats( 'RefreshLinks.titles', count( $titles ) 
);
        }
 
@@ -221,7 +234,7 @@
                }
 
                $jobs = [];
-               $titleBatches = array_chunk( $titles, $this->dbBatchSize );
+               $titleBatches = array_chunk( $titles, $this->rcBatchSize );
 
                /* @var Title[] $batch */
                foreach ( $titleBatches as $batch ) {
diff --git a/client/includes/WikibaseClient.php 
b/client/includes/WikibaseClient.php
index c349215..6cbd115 100644
--- a/client/includes/WikibaseClient.php
+++ b/client/includes/WikibaseClient.php
@@ -1155,7 +1155,8 @@
                        MediaWikiServices::getInstance()->getStatsdDataFactory()
                );
 
-               $pageUpdater->setDbBatchSize( $this->settings->getSetting( 
'wikiPageUpdaterDbBatchSize' ) );
+               $pageUpdater->setPurgeCacheBatchSize( 
$this->settings->getSetting( 'purgeCacheBatchSize' ) );
+               $pageUpdater->setRecentChangesBatchSize( 
$this->settings->getSetting( 'recentChangesBatchSize' ) );
 
                $changeListTransformer = new ChangeRunCoalescer(
                        $this->getStore()->getEntityRevisionLookup(),
diff --git a/client/tests/phpunit/includes/Changes/WikiPageUpdaterTest.php 
b/client/tests/phpunit/includes/Changes/WikiPageUpdaterTest.php
index 4dd4364..e6fe3b8 100644
--- a/client/tests/phpunit/includes/Changes/WikiPageUpdaterTest.php
+++ b/client/tests/phpunit/includes/Changes/WikiPageUpdaterTest.php
@@ -3,8 +3,10 @@
 namespace Wikibase\Client\Tests\Changes;
 
 use HTMLCacheUpdateJob;
+use IJobSpecification;
 use Job;
 use JobQueueGroup;
+use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
 use PHPUnit_Framework_MockObject_MockObject;
 use MediaWiki\MediaWikiServices;
 use RefreshLinksJob;
@@ -144,6 +146,22 @@
                return $LBFactory;
        }
 
+       /**
+        * @return StatsdDataFactoryInterface
+        */
+       private function getStatsdDataFactoryMock( array $expectedStats ) {
+               $stats = $this->getMock( StatsdDataFactoryInterface::class );
+
+               $i = 0;
+               foreach ( $expectedStats as $updateType => $delta ) {
+                       $stats->expects( $this->at( $i++ ) )
+                               ->method( 'updateCount' )
+                               ->with( 'wikibase.client.pageupdates.' . 
$updateType, $delta );
+               }
+
+               return $stats;
+       }
+
        public function testPurgeWebCache() {
                $titleFoo = $this->getTitleMock( 'Foo', 21 );
                $titleBar = $this->getTitleMock( 'Bar', 22 );
@@ -170,8 +188,13 @@
                        $jobQueueGroup,
                        $this->getRCFactoryMock(),
                        
MediaWikiServices::getInstance()->getDBLoadBalancerFactory(),
-                       $this->getRCDupeDetectorMock()
+                       $this->getRCDupeDetectorMock(),
+                       $this->getStatsdDataFactoryMock( [
+                               'WebCache.jobs' => 2, // 2 batches (batch size 
2, 3 titles)
+                               'WebCache.titles' => 3,
+                       ] )
                );
+               $updater->setPurgeCacheBatchSize( 2 );
 
                $updater->purgeWebCache( [
                        $titleFoo, $titleBar, $titleCuzz,
@@ -206,22 +229,24 @@
                $rootJobParams = [];
                $jobQueueGroup->expects( $this->atLeastOnce() )
                        ->method( 'lazyPush' )
-                       ->will( $this->returnCallback( function( array $jobs ) 
use ( &$pages, &$rootJobParams ) {
-                               /** @var Job $job */
-                               foreach ( $jobs as $job ) {
-                                       $this->assertInstanceOf( 
RefreshLinksJob::class, $job );
-                                       $params = $job->getParams();
-                                       $this->assertArrayHasKey( 'pages', 
$params, '$params["pages"]' );
-                                       $pages += $params['pages']; // addition 
uses keys, array_merge does not
-                                       $rootJobParams = 
$job->getRootJobParams();
-                               }
+                       ->will( $this->returnCallback( function( 
IJobSpecification $job ) use ( &$pages, &$rootJobParams ) {
+                               $this->assertInstanceOf( 
RefreshLinksJob::class, $job );
+                               $title = $job->getTitle();
+
+                               $id = $title->getArticleID();
+                               $pages[$id] = [ $title->getNamespace(), 
$title->getDBkey() ];
+                               $rootJobParams = $job->getRootJobParams();
                        } ) );
 
                $updater = new WikiPageUpdater(
                        $jobQueueGroup,
                        $this->getRCFactoryMock(),
                        
MediaWikiServices::getInstance()->getDBLoadBalancerFactory(),
-                       $this->getRCDupeDetectorMock()
+                       $this->getRCDupeDetectorMock(),
+                       $this->getStatsdDataFactoryMock( [
+                               'RefreshLinks.jobs' => 3, // no batching
+                               'RefreshLinks.titles' => 3,
+                       ] )
                );
 
                $updater->scheduleRefreshLinks( [
@@ -276,8 +301,13 @@
                        $jobQueueGroup,
                        $this->getRCFactoryMock(),
                        $this->getLBFactoryMock(),
-                       $this->getRCDupeDetectorMock()
+                       $this->getRCDupeDetectorMock(),
+                       $this->getStatsdDataFactoryMock( [
+                               'InjectRCRecords.jobs' => 2, // 2 batches 
(batch size 2, 3 titles)
+                               'InjectRCRecords.titles' => 3,
+                       ] )
                );
+               $updater->setRecentChangesBatchSize( 2 );
 
                $updater->injectRCRecords(
                        [ $titleFoo, $titleBar, $titleCuzz, ],
diff --git a/docs/options.wiki b/docs/options.wiki
index f0b1752..9d15907 100644
--- a/docs/options.wiki
+++ b/docs/options.wiki
@@ -111,4 +111,6 @@
 ;sendEchoNotification: If true, allows users on the client wiki to get a 
notification when a page they created is connected to a repo item. This 
requires the Echo extension.
 ;echoIcon: If <code>sendEchoNotification</code> is set to <code>true</code>, 
you can also provide what icon the user will see. The correct syntax is <code>[ 
'url' => '...' ]</code> or <code>[ 'path' => '...' ]</code> where 
<code>path</code> is relative to <code>$wgExtensionAssetsPath</code>. Defaults 
to <code>false</code> which means that there will be the default Echo icon.
 ;disabledUsageAspects: Array of usage aspects that should not be saved in the 
<code>wbc_entity_usage</code> table. This currently only supports aspect codes 
(like "T", "L" or "X"), but not full aspect keys (like "L.de").
-;wikiPageUpdaterDbBatchSize: Batch size in updating page table when applying 
changes in the client, default is 50.
+;wikiPageUpdaterDbBatchSize: DEPRECATED. If set, acts as a default for 
purgeCacheBatchSize and recentChangesBatchSize.
+;purgeCacheBatchSize: Number of pages to process in each HTMLUpdateCacheJob, 
defaults to $wgUpdateRowsPerJob (after wikiPageUpdaterDbBatchSize, for 
backwards compatibility).
+;recentChangesBatchSize: Number of recentchanges rows to create in each 
InjectRCRecordsJob, defaults to $wgUpdateRowsPerJob (after 
wikiPageUpdaterDbBatchSize, for backwards compatibility).

-- 
To view, visit https://gerrit.wikimedia.org/r/377046
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ic095e2eba985ea3a6e51645d0be58589b326f218
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Addshore <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Giuseppe Lavagetto <[email protected]>
Gerrit-Reviewer: Hoo man <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to