DCausse has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/403199 )
Change subject: Switch saneitizer config to profile manager ...................................................................... Switch saneitizer config to profile manager Bug: T183279 Change-Id: Ifc6f4f258c1b8c5861bda3f0eafdcd6c47f621aa --- M CirrusSearch.php M includes/Job/CheckerJob.php M includes/Profile/SearchProfileService.php M includes/Profile/SearchProfileServiceFactory.php M maintenance/saneitizeJobs.php R profiles/SaneitizeProfiles.config.php 6 files changed, 26 insertions(+), 7 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/99/403199/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index a27a1ab..b9dea5e 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -21,7 +21,6 @@ * http://www.gnu.org/copyleft/gpl.html */ -require_once __DIR__ . "/profiles/SaneitizeProfiles.php"; require_once __DIR__ . "/profiles/FullTextQueryBuilderProfiles.config.php"; $wgExtensionCredits['other'][] = [ @@ -1130,7 +1129,7 @@ * The process will scan and check discrepancies between mysql and * elasticsearch for all possible ids in the database. * Settings will be automatically chosen according to wiki size (see - * profiles/SaneitizeProfiles.php) + * profiles/SaneitizeProfiles.config.php) * The script responsible for pushing sanitization jobs is saneitizeJobs.php. * It needs to be scheduled by cron, default settings provided are suited * for a bi-hourly schedule (--refresh-freq=7200). diff --git a/includes/Job/CheckerJob.php b/includes/Job/CheckerJob.php index 1b13bc5..f898e82 100644 --- a/includes/Job/CheckerJob.php +++ b/includes/Job/CheckerJob.php @@ -3,6 +3,7 @@ namespace CirrusSearch\Job; use ArrayObject; +use CirrusSearch\Profile\SearchProfileService; use CirrusSearch\Searcher; use CirrusSearch\Sanity\Checker; use CirrusSearch\Sanity\QueueingRemediator; @@ -79,7 +80,9 @@ * @throws \MWException */ protected function doJob() { - $profile = $this->searchConfig->getElement( 'CirrusSearchSanitizationProfiles', $this->params['profile'] ); + $profile = $this->searchConfig + ->getProfileService() + ->loadProfileByName( SearchProfileService::SANEITIZER, $this->params['profile'], false ); if ( !$profile ) { LoggerFactory::getInstance( 'CirrusSearch' )->warning( "Cannot run CheckerJob invalid profile {profile} provided, check CirrusSearchSanityCheck config.", diff --git a/includes/Profile/SearchProfileService.php b/includes/Profile/SearchProfileService.php index f49ce76..48ad6be 100644 --- a/includes/Profile/SearchProfileService.php +++ b/includes/Profile/SearchProfileService.php @@ -33,6 +33,7 @@ const RESCORE_FUNCTION_CHAINS = 'rescore_function_chains'; const COMPLETION = 'completion'; const PHRASE_SUGGESTER = 'phrase_suggester'; + const SANEITIZER = 'saneitizer'; const CONTEXT_FULLTEXT = 'fulltext'; const CONTEXT_PREFIXSEARCH = 'prefixsearch'; diff --git a/includes/Profile/SearchProfileServiceFactory.php b/includes/Profile/SearchProfileServiceFactory.php index 1e15821..009f118 100644 --- a/includes/Profile/SearchProfileServiceFactory.php +++ b/includes/Profile/SearchProfileServiceFactory.php @@ -28,6 +28,7 @@ $this->loadRescoreProfiles( $service, $config ); $this->loadCompletionProfiles( $service, $config ); $this->loadPhraseSuggesterProfiles( $service, $config ); + $this->loadSaneitizerProfiles( $service ); if ( $config->isLocalWiki() ) { \Hooks::run( 'CirrusSearchProfileService', [ $service ] ); @@ -143,4 +144,13 @@ $service->registerSimpleNameResolver( SearchProfileService::PHRASE_SUGGESTER, $defaultProfile ); } } + + /** + * @param SearchProfileService $service + */ + private function loadSaneitizerProfiles( SearchProfileService $service ) { + $service->registerArrayRepository( SearchProfileService::SANEITIZER, self::CIRRUS_BASE, + require __DIR__ . '/../../profiles/SaneitizeProfiles.config.php' ); + // no name resolver, profile is automatically chosen based on wiki + } } diff --git a/maintenance/saneitizeJobs.php b/maintenance/saneitizeJobs.php index 65a99bc..42abbb0 100644 --- a/maintenance/saneitizeJobs.php +++ b/maintenance/saneitizeJobs.php @@ -5,6 +5,7 @@ use CirrusSearch\Connection; use CirrusSearch\Job\CheckerJob; +use CirrusSearch\Profile\SearchProfileService; use JobQueueGroup; /** @@ -95,7 +96,8 @@ $this->minId = $row->min_id; /** @suppress PhanUndeclaredProperty */ $this->maxId = $row->max_id; - $profiles = $this->getSearchConfig()->get( 'CirrusSearchSanitizationProfiles' ); + $profiles = $this->getSearchConfig()->getProfileService() + ->listExposedProfiles( SearchProfileService::SANEITIZER ); uasort( $profiles, function ( $a, $b ) { return $a['max_wiki_size'] < $b['max_wiki_size'] ? -1 : 1; } ); @@ -145,7 +147,9 @@ } private function showJobDetail() { - $profile = $this->getSearchConfig()->getElement( 'CirrusSearchSanitizationProfiles', $this->profileName ); + $profile = $this->getSearchConfig() + ->getProfileService() + ->loadProfileByName( SearchProfileService::SANEITIZER, $this->profileName ); $minLoopDuration = $profile['min_loop_duration']; $maxJobs = $profile['max_checker_jobs']; $maxUpdates = $profile['update_jobs_max_pressure']; @@ -228,7 +232,9 @@ if ( !$this->getSearchConfig()->get( 'CirrusSearchSanityCheck' ) ) { $this->fatalError( "Sanity check disabled, abandonning...\n" ); } - $profile = $this->getSearchConfig()->getElement( 'CirrusSearchSanitizationProfiles', $this->profileName ); + $profile = $this->getSearchConfig() + ->getProfileService() + ->loadProfileByName( SearchProfileService::SANEITIZER, $this->profileName ); $chunkSize = $profile['jobs_chunk_size']; $maxJobs = $profile['max_checker_jobs']; if ( !$maxJobs || $maxJobs <= 0 ) { diff --git a/profiles/SaneitizeProfiles.php b/profiles/SaneitizeProfiles.config.php similarity index 98% rename from profiles/SaneitizeProfiles.php rename to profiles/SaneitizeProfiles.config.php index 54e5003..b20b45f 100644 --- a/profiles/SaneitizeProfiles.php +++ b/profiles/SaneitizeProfiles.config.php @@ -26,7 +26,7 @@ * The first profile that verifies max(page_id)-min(page_id) < max_wiki_size * will be chosen (the array is sorted before applying profile selection) */ -$wgCirrusSearchSanitizationProfiles = [ +return [ // Loop in 9 days for 11k ids, 0.00014 jobs/sec, with 18% ids wrong // it's 0.0025 updates/sec per cluster 'XS' => [ -- To view, visit https://gerrit.wikimedia.org/r/403199 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ifc6f4f258c1b8c5861bda3f0eafdcd6c47f621aa Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: DCausse <dcau...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits