DCausse has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/403199 )

Change subject: Switch saneitizer config to profile manager
......................................................................

Switch saneitizer config to profile manager

Bug: T183279
Change-Id: Ifc6f4f258c1b8c5861bda3f0eafdcd6c47f621aa
---
M CirrusSearch.php
M includes/Job/CheckerJob.php
M includes/Profile/SearchProfileService.php
M includes/Profile/SearchProfileServiceFactory.php
M maintenance/saneitizeJobs.php
R profiles/SaneitizeProfiles.config.php
6 files changed, 26 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/99/403199/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index a27a1ab..b9dea5e 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -21,7 +21,6 @@
  * http://www.gnu.org/copyleft/gpl.html
  */
 
-require_once __DIR__ . "/profiles/SaneitizeProfiles.php";
 require_once __DIR__ . "/profiles/FullTextQueryBuilderProfiles.config.php";
 
 $wgExtensionCredits['other'][] = [
@@ -1130,7 +1129,7 @@
  * The process will scan and check discrepancies between mysql and
  * elasticsearch for all possible ids in the database.
  * Settings will be automatically chosen according to wiki size (see
- * profiles/SaneitizeProfiles.php)
+ * profiles/SaneitizeProfiles.config.php)
  * The script responsible for pushing sanitization jobs is saneitizeJobs.php.
  * It needs to be scheduled by cron, default settings provided are suited
  * for a bi-hourly schedule (--refresh-freq=7200).
diff --git a/includes/Job/CheckerJob.php b/includes/Job/CheckerJob.php
index 1b13bc5..f898e82 100644
--- a/includes/Job/CheckerJob.php
+++ b/includes/Job/CheckerJob.php
@@ -3,6 +3,7 @@
 namespace CirrusSearch\Job;
 
 use ArrayObject;
+use CirrusSearch\Profile\SearchProfileService;
 use CirrusSearch\Searcher;
 use CirrusSearch\Sanity\Checker;
 use CirrusSearch\Sanity\QueueingRemediator;
@@ -79,7 +80,9 @@
         * @throws \MWException
         */
        protected function doJob() {
-               $profile = $this->searchConfig->getElement( 
'CirrusSearchSanitizationProfiles', $this->params['profile'] );
+               $profile = $this->searchConfig
+                       ->getProfileService()
+                       ->loadProfileByName( SearchProfileService::SANEITIZER, 
$this->params['profile'], false );
                if ( !$profile ) {
                        LoggerFactory::getInstance( 'CirrusSearch' )->warning(
                                "Cannot run CheckerJob invalid profile 
{profile} provided, check CirrusSearchSanityCheck config.",
diff --git a/includes/Profile/SearchProfileService.php 
b/includes/Profile/SearchProfileService.php
index f49ce76..48ad6be 100644
--- a/includes/Profile/SearchProfileService.php
+++ b/includes/Profile/SearchProfileService.php
@@ -33,6 +33,7 @@
        const RESCORE_FUNCTION_CHAINS = 'rescore_function_chains';
        const COMPLETION = 'completion';
        const PHRASE_SUGGESTER = 'phrase_suggester';
+       const SANEITIZER = 'saneitizer';
 
        const CONTEXT_FULLTEXT = 'fulltext';
        const CONTEXT_PREFIXSEARCH = 'prefixsearch';
diff --git a/includes/Profile/SearchProfileServiceFactory.php 
b/includes/Profile/SearchProfileServiceFactory.php
index 1e15821..009f118 100644
--- a/includes/Profile/SearchProfileServiceFactory.php
+++ b/includes/Profile/SearchProfileServiceFactory.php
@@ -28,6 +28,7 @@
                $this->loadRescoreProfiles( $service, $config );
                $this->loadCompletionProfiles( $service, $config );
                $this->loadPhraseSuggesterProfiles( $service, $config );
+               $this->loadSaneitizerProfiles( $service );
 
                if ( $config->isLocalWiki() ) {
                        \Hooks::run( 'CirrusSearchProfileService', [ $service ] 
);
@@ -143,4 +144,13 @@
                        $service->registerSimpleNameResolver( 
SearchProfileService::PHRASE_SUGGESTER, $defaultProfile );
                }
        }
+
+       /**
+        * @param SearchProfileService $service
+        */
+       private function loadSaneitizerProfiles( SearchProfileService $service 
) {
+               $service->registerArrayRepository( 
SearchProfileService::SANEITIZER, self::CIRRUS_BASE,
+                       require __DIR__ . 
'/../../profiles/SaneitizeProfiles.config.php' );
+               // no name resolver, profile is automatically chosen based on 
wiki
+       }
 }
diff --git a/maintenance/saneitizeJobs.php b/maintenance/saneitizeJobs.php
index 65a99bc..42abbb0 100644
--- a/maintenance/saneitizeJobs.php
+++ b/maintenance/saneitizeJobs.php
@@ -5,6 +5,7 @@
 use CirrusSearch\Connection;
 use CirrusSearch\Job\CheckerJob;
 
+use CirrusSearch\Profile\SearchProfileService;
 use JobQueueGroup;
 
 /**
@@ -95,7 +96,8 @@
                $this->minId = $row->min_id;
                /** @suppress PhanUndeclaredProperty */
                $this->maxId = $row->max_id;
-               $profiles = $this->getSearchConfig()->get( 
'CirrusSearchSanitizationProfiles' );
+               $profiles = $this->getSearchConfig()->getProfileService()
+                       ->listExposedProfiles( SearchProfileService::SANEITIZER 
);
                uasort( $profiles, function ( $a, $b ) {
                        return $a['max_wiki_size'] < $b['max_wiki_size'] ? -1 : 
1;
                } );
@@ -145,7 +147,9 @@
        }
 
        private function showJobDetail() {
-               $profile = $this->getSearchConfig()->getElement( 
'CirrusSearchSanitizationProfiles', $this->profileName );
+               $profile = $this->getSearchConfig()
+                       ->getProfileService()
+                       ->loadProfileByName( SearchProfileService::SANEITIZER, 
$this->profileName );
                $minLoopDuration = $profile['min_loop_duration'];
                $maxJobs = $profile['max_checker_jobs'];
                $maxUpdates = $profile['update_jobs_max_pressure'];
@@ -228,7 +232,9 @@
                if ( !$this->getSearchConfig()->get( 'CirrusSearchSanityCheck' 
) ) {
                        $this->fatalError( "Sanity check disabled, 
abandonning...\n" );
                }
-               $profile = $this->getSearchConfig()->getElement( 
'CirrusSearchSanitizationProfiles', $this->profileName );
+               $profile = $this->getSearchConfig()
+                       ->getProfileService()
+                       ->loadProfileByName( SearchProfileService::SANEITIZER, 
$this->profileName );
                $chunkSize = $profile['jobs_chunk_size'];
                $maxJobs = $profile['max_checker_jobs'];
                if ( !$maxJobs || $maxJobs <= 0 ) {
diff --git a/profiles/SaneitizeProfiles.php 
b/profiles/SaneitizeProfiles.config.php
similarity index 98%
rename from profiles/SaneitizeProfiles.php
rename to profiles/SaneitizeProfiles.config.php
index 54e5003..b20b45f 100644
--- a/profiles/SaneitizeProfiles.php
+++ b/profiles/SaneitizeProfiles.config.php
@@ -26,7 +26,7 @@
  * The first profile that verifies max(page_id)-min(page_id) < max_wiki_size
  * will be chosen (the array is sorted before applying profile selection)
  */
-$wgCirrusSearchSanitizationProfiles = [
+return [
        // Loop in 9 days for 11k ids, 0.00014 jobs/sec, with 18% ids wrong
        // it's 0.0025 updates/sec per cluster
        'XS' => [

-- 
To view, visit https://gerrit.wikimedia.org/r/403199
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifc6f4f258c1b8c5861bda3f0eafdcd6c47f621aa
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to