EBernhardson has uploaded a new change for review. https://gerrit.wikimedia.org/r/247345
Change subject: Handle per-cluster shard and replica counts ...................................................................... Handle per-cluster shard and replica counts Creates a new object, ClusterSettings, to hold the logic of extracting specific settings related to a cluster from a SearchConfig instance. This is intended to be expanded to also include other settings, such as timeouts and job drop timeouts. Change-Id: I79595a676a26abc8a6abc82569c21cd138cf81a7 (cherry picked from commit 102e7c5c2810f92feedd967028509e0742ec3d00) --- M CirrusSearch.php M autoload.php A includes/ClusterSettings.php M includes/Connection.php M maintenance/updateOneSearchIndexConfig.php M maintenance/updateSuggesterIndex.php M maintenance/updateVersionIndex.php A tests/unit/ClusterSettingsTest.php 8 files changed, 178 insertions(+), 43 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/45/247345/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index 6c873ca..28beb6b 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -69,6 +69,11 @@ $wgCirrusSearchConnectionAttempts = 1; // Number of shards for each index +// You can also set this setting for each cluster: +// $wgCirrusSearchShardCount = array( +// 'cluster1' => array( 'content' => 2, 'general' => 2 ), +// 'cluster2' => array( 'content' => 3, 'general' => 3 ), +//); $wgCirrusSearchShardCount = array( 'content' => 4, 'general' => 4, 'titlesuggest' => 4 ); // Number of replicas Elasticsearch can expand or contract to. This allows for @@ -76,7 +81,13 @@ // higher levels of replication. You if you need more redundancy you could // adjust this to '0-10' or '0-all' or even 'false' (string, not boolean) to // disable the behavior entirely. The default should be fine for most people. +// You can also set this setting for each cluster: +// $wgCirrusSearchReplicas = array( +// 'cluster1' => array( 'content' => '0-1', 'general' => '0-2' ), +// 'cluster2' => array( 'content' => '0-2', 'general' => '0-3' ), +//); $wgCirrusSearchReplicas = '0-2'; + // You can also specify this as an array of index type to replica count. If you // do then you must specify all index types. For example: // $wgCirrusSearchReplicas = array( 'content' => '0-3', 'general' => '0-2' ); diff --git a/autoload.php b/autoload.php index f476ca1..1673d56 100644 --- a/autoload.php +++ b/autoload.php @@ -25,6 +25,7 @@ 'CirrusSearch\\BuildDocument\\SuggestScoringMethodFactory' => __DIR__ . '/includes/BuildDocument/SuggestScoring.php', 'CirrusSearch\\CheckIndexes' => __DIR__ . '/maintenance/checkIndexes.php', 'CirrusSearch\\CirrusIsSetup' => __DIR__ . '/maintenance/cirrusNeedsToBeBuilt.php', + 'CirrusSearch\\ClusterSettings' => __DIR__ . '/includes/ClusterSettings.php', 'CirrusSearch\\Connection' => __DIR__ . '/includes/Connection.php', 'CirrusSearch\\DataSender' => __DIR__ . '/includes/DataSender.php', 'CirrusSearch\\Dump' => __DIR__ . '/includes/Dump.php', diff --git a/includes/ClusterSettings.php b/includes/ClusterSettings.php new file mode 100644 index 0000000..3370a4a --- /dev/null +++ b/includes/ClusterSettings.php @@ -0,0 +1,63 @@ +<?php + +namespace CirrusSearch; + +/** + * Handles resolving configuration variables into specific settings + * for a specific cluster. + */ +class ClusterSettings { + + /** + * @var SearchConfig + */ + protected $config; + + /** + * @var string + */ + protected $cluster; + + /** + * @param SearchConfig $config + * @param string $cluster + */ + public function __construct( SearchConfig $config, $cluster ) { + $this->config = $config; + $this->cluster = $cluster; + } + + /** + * @param string $indexType + * @return integer Number of shards the index should have + */ + public function getShardCount( $indexType ) { + $settings = $this->config->get( 'CirrusSearchShardCount' ); + if ( isset( $settings[$this->cluster][$indexType] ) ) { + return $settings[$this->cluster][$indexType]; + } elseif ( isset( $settings[$indexType] ) ) { + return $settings[$indexType]; + } + throw new \Exception( "Could not find a shard count for " + . "{$this->indexType}. Did you add an index to " + . "\$wgCirrusSearchNamespaceMappings but forget to " + . "add it to \$wgCirrusSearchShardCount?" ); + } + + /** + * @param string $indexType + * @return string Number of replicas Elasticsearch can expand or contract to. + */ + public function getReplicaCount( $indexType ) { + $settings = $this->config->get( 'CirrusSearchReplicas' ); + if ( !is_array( $settings ) ) { + return $settings; + } elseif ( isset( $settings[$this->cluster][$indexType] ) ) { + return $settings[$this->cluster][$indexType]; + } elseif ( isset( $settings[$indexType] ) ) { + return $settings[$indexType]; + } + throw new \Exception( "If \$wgCirrusSearchReplicas is " . + "an array it must contain all index types." ); + } +} diff --git a/includes/Connection.php b/includes/Connection.php index 126b90d..76aed8b 100644 --- a/includes/Connection.php +++ b/includes/Connection.php @@ -71,6 +71,11 @@ protected $cluster; /** + * @var ClusterSettings|null + */ + private $clusterSettings; + + /** * @var Connection[] */ private static $pool = array(); @@ -125,6 +130,16 @@ } /** + * @return ClusterSettings + */ + public function getSettings() { + if ( $this->clusterSettings === null ) { + $this->clusterSettings = new ClusterSettings( $this->config, $this->cluster ); + } + return $this->clusterSettings; + } + + /** * @return array(string) */ public function getServerList() { diff --git a/maintenance/updateOneSearchIndexConfig.php b/maintenance/updateOneSearchIndexConfig.php index a46e032..eab8ea5 100644 --- a/maintenance/updateOneSearchIndexConfig.php +++ b/maintenance/updateOneSearchIndexConfig.php @@ -525,28 +525,11 @@ } private function getShardCount() { - global $wgCirrusSearchShardCount; - if ( !isset( $wgCirrusSearchShardCount[ $this->indexType ] ) ) { - $this->error( 'Could not find a shard count for ' . $this->indexType . '. Did you add an index to ' . - '$wgCirrusSearchNamespaceMappings but forget to add it to $wgCirrusSearchShardCount?', 1 ); - } - return $wgCirrusSearchShardCount[ $this->indexType ]; + return $this->getConnection()->getSettings()->getShardCount( $this->indexType ); } private function getReplicaCount() { - global $wgCirrusSearchReplicas; - - // If $wgCirrusSearchReplicas is an array of index type to number of replicas then respect that - if ( is_array( $wgCirrusSearchReplicas ) ) { - if ( isset( $wgCirrusSearchReplicas[ $this->indexType ] ) ) { - return $wgCirrusSearchReplicas[ $this->indexType ]; - } else { - $this->error( 'If wgCirrusSearchReplicas is an array it must contain all index types.', 1 ); - } - } - - // Otherwise its just a raw scalar so we should respect that too - return $wgCirrusSearchReplicas; + return $this->getConnection()->getSettings()->getReplicaCount( $this->indexType ); } } diff --git a/maintenance/updateSuggesterIndex.php b/maintenance/updateSuggesterIndex.php index 35fa1fc..6bb2eb4 100644 --- a/maintenance/updateSuggesterIndex.php +++ b/maintenance/updateSuggesterIndex.php @@ -305,19 +305,7 @@ } private function getReplicaCount() { - global $wgCirrusSearchReplicas; - - // If $wgCirrusSearchReplicas is an array of index type to number of replicas then respect that - if ( is_array( $wgCirrusSearchReplicas ) ) { - if ( isset( $wgCirrusSearchReplicas[ $this->indexTypeName ] ) ) { - return $wgCirrusSearchReplicas[ $this->indexTypeName ]; - } else { - $this->error( 'If wgCirrusSearchReplicas is an array it must contain all index types.', 1 ); - } - } - - // Otherwise its just a raw scalar so we should respect that too - return $wgCirrusSearchReplicas; + return $this->getConnection()->getSettings()->getReplicaCount( $this->indexTypeName ); } private function createMapping() { @@ -339,17 +327,10 @@ } private function getShardCount() { - global $wgCirrusSearchShardCount; - if ( !isset( $wgCirrusSearchShardCount[ $this->indexTypeName ] ) ) { - $this->error( 'Could not find a shard count for ' . $this->indexTypeName . '. Did you add an index to ' . - '$wgCirrusSearchNamespaceMappings but forget to add it to $wgCirrusSearchShardCount?', 1 ); - } - return $wgCirrusSearchShardCount[ $this->indexTypeName ]; + return $this->getConnection()->getSettings()->getShardCount( $this->indexTypeName ); } private function updateVersions() { - global $wgCirrusSearchShardCount; - $this->outputIndented( "Updating tracking indexes..." ); $index = $this->getConnection()->getIndex( 'mw_cirrus_versions' ); if ( !$index->exists() ) { @@ -364,7 +345,7 @@ 'analysis_min' => $aMin, 'mapping_maj' => $mMaj, 'mapping_min' => $mMin, - 'shard_count' => $wgCirrusSearchShardCount[ $this->indexTypeName ], + 'shard_count' => $this->getShardCount(), ) ); $index->getType('version')->addDocument( $doc ); diff --git a/maintenance/updateVersionIndex.php b/maintenance/updateVersionIndex.php index 2ccf0e1..e3122da 100644 --- a/maintenance/updateVersionIndex.php +++ b/maintenance/updateVersionIndex.php @@ -73,12 +73,12 @@ } private function update( $baseName ) { - global $wgCirrusSearchShardCount; $versionType = $this->getType(); $this->outputIndented( "Updating tracking indexes..." ); $docs = array(); list( $aMaj, $aMin ) = explode( '.', \CirrusSearch\Maintenance\AnalysisConfigBuilder::VERSION ); list( $mMaj, $mMin ) = explode( '.', \CirrusSearch\Maintenance\MappingConfigBuilder::VERSION ); + $connSettings = $this->getConnection()->getSettings(); foreach( $this->getConnection()->getAllIndexTypes() as $type ) { $docs[] = new \Elastica\Document( $this->getConnection()->getIndexName( $baseName, $type ), @@ -87,7 +87,7 @@ 'analysis_min' => $aMin, 'mapping_maj' => $mMaj, 'mapping_min' => $mMin, - 'shard_count' => $wgCirrusSearchShardCount[ $type ], + 'shard_count' => $connSettings->getShardCount( $type ), ) ); } diff --git a/tests/unit/ClusterSettingsTest.php b/tests/unit/ClusterSettingsTest.php new file mode 100644 index 0000000..8c72d97 --- /dev/null +++ b/tests/unit/ClusterSettingsTest.php @@ -0,0 +1,81 @@ +<?php + +namespace CirrusSearch; + +class ClusterSettingsTest extends \PHPUnit_Framework_TestCase { + + public static function provideShardCount() { + return array( + 'Handles per-index shard counts' => array( + array( 'general' => 7 ), + 'eqiad', + 'general', + 7, + ), + + 'Handles per-cluster shard counts' => array( + array( 'content' => 6, 'eqiad' => array( 'content' => 9 ) ), + 'eqiad', + 'content', + 9, + ), + ); + } + + /** + * @dataProvider provideShardCount + */ + public function testShardCount( $shardCounts, $cluster, $indexType, $expect ) { + $config = $this->getMockBuilder( 'CirrusSearch\SearchConfig' ) + ->disableOriginalConstructor() + ->getMock(); + $config->expects( $this->any() ) + ->method( 'get' ) + ->with( 'CirrusSearchShardCount' ) + ->will( $this->returnValue( $shardCounts ) ); + + $settings = new ClusterSettings( $config, $cluster ); + $this->assertEquals( $expect, $settings->getShardCount( $indexType ) ); + } + + public static function provideReplicaCounts() { + return array( + 'Simple replica config returns exact setting ' => array( + '0-2', + 'eqiad', + 'content', + '0-2', + ), + + 'Accepts array for replica config' => array( + array( 'content' => '1-2' ), + 'eqiad', + 'content', + '1-2', + ), + + 'Accepts per-cluster replica config' => array( + array( 'content' => '1-2', 'eqiad' => array( 'content' => '2-3' ) ), + 'eqiad', + 'content', + '2-3' + ), + ); + } + + /** + * @dataProvider provideReplicaCounts + */ + public function testReplicaCount( $replicas, $cluster, $indexType, $expect) { + $config = $this->getMockBuilder( 'CirrusSearch\SearchConfig' ) + ->disableOriginalConstructor() + ->getMock(); + $config->expects( $this->any() ) + ->method( 'get' ) + ->with( 'CirrusSearchReplicas' ) + ->will( $this->returnValue( $replicas ) ); + + $settings = new ClusterSettings( $config, $cluster ); + $this->assertEquals( $expect, $settings->getReplicaCount( $indexType ) ); + } +} -- To view, visit https://gerrit.wikimedia.org/r/247345 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I79595a676a26abc8a6abc82569c21cd138cf81a7 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: wmf/1.27.0-wmf.2 Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits