DCausse has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/402071 )
Change subject: [cleanup] Simplify phrase suggester settings ...................................................................... [cleanup] Simplify phrase suggester settings - remove code to override settings with URI params - switch profile setting to a string instead of the profile array - remove deprecated settings - moved safeguard settings to class constant instead of config vars Change-Id: Id76389c5c1a640bad7175f5507b7abd7f7be4e7f --- M CirrusSearch.php M docs/settings.txt M includes/Api/ConfigDump.php M includes/Hooks.php M includes/Query/FullTextQueryStringQueryBuilder.php M profiles/PhraseSuggesterProfiles.php 6 files changed, 40 insertions(+), 230 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/71/402071/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index 5c85acc..189a1e8 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -397,53 +397,10 @@ $wgCirrusSearchEnablePhraseSuggest = true; /** - * NOTE: This settings is deprecated: update or create your own PhraseSuggester profile. - * Maximum number of terms that we ask phrase suggest to correct. - * See max_errors on http://www.elasticsearch.org/guide/reference/api/search/suggest/ - * $wgCirrusSearchPhraseSuggestMaxErrors = 2; - */ - -/** - * NOTE: This settings is deprecated: update or create your own PhraseSuggester profile. - * Confidence level required to suggest new phrases. - * See confidence on http://www.elasticsearch.org/guide/reference/api/search/suggest/ - * $wgCirrusSearchPhraseSuggestConfidence = 2.0; - */ - -/** - * Set the hard limit for $wgCirrusSearchPhraseSuggestMaxErrors. This prevents customizing - * this setting in a way that could hurt the system performances. - */ -$wgCirrusSearchPhraseSuggestMaxErrorsHardLimit = 2; - -/** - * Set the hard limit for $wgCirrusSearchPhraseMaxTermFreq. This prevents customizing - * this setting in a way that could hurt the system performances. - */ -$wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit = 0.6; - -/** - * List of allowed values for the suggest mode - */ -$wgCirrusSearchPhraseSuggestAllowedMode = [ 'missing', 'popular', 'always' ]; - -/** - * List of allowed smoothing models - */ -$wgCirrusSearchPhraseSuggestAllowedSmoothingModel = [ 'stupid_backoff', 'laplace', 'linear' ]; - -/** - * Set the hard limit for $wgCirrusSearchPhraseSuggestPrefixLength. This prevents customizing - * this setting in a way that could hurt the system performances. - * (This is the minimal value) - */ -$wgCirrusSearchPhraseSuggestPrefixLengthHardLimit = 2; - -/** * Set the Phrase suggester settings using the default profile. * see profiles/PhraseSuggesterProfiles.php */ -$wgCirrusSearchPhraseSuggestSettings = $wgCirrusSearchPhraseSuggestProfiles['default']; +$wgCirrusSearchPhraseSuggestSettings = 'default'; /** * Use a reverse field to build the did you mean suggestions. diff --git a/docs/settings.txt b/docs/settings.txt index 131ad0c..f00900b 100644 --- a/docs/settings.txt +++ b/docs/settings.txt @@ -432,49 +432,10 @@ Should the phrase suggester (did you mean) be enabled? -; $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit - -Default: - $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit = 2; - -Set the hard limit for $wgCirrusSearchPhraseSuggestMaxErrors. This prevents customizing -this setting in a way that could hurt the system performances. - -; $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit - -Default: - $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit = 0.6; - -Set the hard limit for $wgCirrusSearchPhraseMaxTermFreq. This prevents customizing -this setting in a way that could hurt the system performances. - -; $wgCirrusSearchPhraseSuggestAllowedMode - -Default: - $wgCirrusSearchPhraseSuggestAllowedMode = [ 'missing', 'popular', 'always' ]; - -List of allowed values for the suggest mode. - -; $wgCirrusSearchPhraseSuggestAllowedSmoothingModel - -Default: - $wgCirrusSearchPhraseSuggestAllowedSmoothingModel = [ 'stupid_backoff', 'laplace', 'linear' ]; - -List of allowed smoothing models. - -; $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit - -Default: - $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit = 2; - -Set the hard limit for $wgCirrusSearchPhraseSuggestPrefixLength. This prevents customizing -this setting in a way that could hurt the system performances. -(This is the minimal value) - ; $wgCirrusSearchPhraseSuggestSettings Default: - $wgCirrusSearchPhraseSuggestSettings = $wgCirrusSearchPhraseSuggestProfiles['default']; + $wgCirrusSearchPhraseSuggestSettings = 'default'; Set the Phrase suggester settings using the default profile. see profiles/PhraseSuggesterProfiles.php @@ -1476,28 +1437,6 @@ Customize certain fields with a specific implementation. Useful to apply CirrusSearch specific config to fields controlled by MediaWiki core. - -=== Deprecated settings === - -; $wgCirrusSearchPhraseSuggestMaxErrors - -Default: - $wgCirrusSearchPhraseSuggestMaxErrors = 2; - -NOTE: This setting is deprecated: update or create your own PhraseSuggester profile. - -Maximum number of terms that we ask phrase suggest to correct. -See max_errors on http://www.elasticsearch.org/guide/reference/api/search/suggest/ - -; $wgCirrusSearchPhraseSuggestConfidence - -Default: - $wgCirrusSearchPhraseSuggestConfidence = 2.0; - -NOTE: This setting is deprecated: update or create your own PhraseSuggester profile. - -Confidence level required to suggest new phrases. -See confidence on http://www.elasticsearch.org/guide/reference/api/search/suggest/ ; $wgCirrusSearchExtraIndexSettings diff --git a/includes/Api/ConfigDump.php b/includes/Api/ConfigDump.php index e3d3c1f..c14db45 100644 --- a/includes/Api/ConfigDump.php +++ b/includes/Api/ConfigDump.php @@ -81,6 +81,7 @@ 'CirrusSearchEnableArchive', 'CirrusSearchUseIcuFolding', 'CirrusSearchUseIcuTokenizer', + 'CirrusSearchPhraseSuggestProfiles', // All the config below was added when moving this data // from CirrusSearch config to a static array in this class 'CirrusSearchDevelOptions', diff --git a/includes/Hooks.php b/includes/Hooks.php index 42950dc..18d20fd 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -102,7 +102,6 @@ } self::overrideMoreLikeThisOptionsFromMessage(); - PhraseSuggesterProfiles::overrideOptionsFromMessage(); if ( $request ) { // Engage the experimental highlighter if a url parameter requests it @@ -119,7 +118,6 @@ self::overrideYesNo( $wgCirrusSearchAllFieldsForRescore, $request, 'cirrusUseAllFieldsForRescore' ); self::overrideUseExtraPluginForRegex( $request ); self::overrideMoreLikeThisOptions( $request ); - PhraseSuggesterProfiles::overrideOptions( $request ); RescoreProfiles::overrideOptions( $request ); FullTextQueryBuilderProfiles::overrideOptions( $request ); self::overrideSecret( $wgCirrusSearchLogElasticRequests, $wgCirrusSearchLogElasticRequestsSecret, $request, 'cirrusLogElasticRequests', false ); diff --git a/includes/Query/FullTextQueryStringQueryBuilder.php b/includes/Query/FullTextQueryStringQueryBuilder.php index 6a0458d..4f1fea8 100644 --- a/includes/Query/FullTextQueryStringQueryBuilder.php +++ b/includes/Query/FullTextQueryStringQueryBuilder.php @@ -2,10 +2,12 @@ namespace CirrusSearch\Query; +use CirrusSearch\PhraseSuggesterProfiles; use CirrusSearch\SearchConfig; use CirrusSearch\Searcher; use CirrusSearch\Search\SearchContext; use CirrusSearch\Extra\Query\TokenCountRouter; +use Elastica\Exception\RuntimeException; use MediaWiki\Logger\LoggerFactory; /** @@ -275,16 +277,18 @@ */ private function buildSuggestConfig( $field, $searchContext ) { // check deprecated settings - $suggestSettings = $this->config->get( 'CirrusSearchPhraseSuggestSettings' ); - $maxErrors = $this->config->get( 'CirrusSearchPhraseSuggestMaxErrors' ); - if ( isset( $maxErrors ) ) { - $suggestSettings['max_errors'] = $maxErrors; - } - $confidence = $this->config->get( 'CirrusSearchPhraseSuggestMaxErrors' ); - if ( isset( $confidence ) ) { - $suggestSettings['confidence'] = $confidence; + $profile = $this->config->get( 'CirrusSearchPhraseSuggestSettings' ); + if ( is_array( $profile ) ) { + // BC code to support profiles set as array in this config var + $suggestSettings = $profile; + } else { + $suggestSettings = $this->config->getElement( 'CirrusSearchPhraseSuggestProfiles', $profile ); + if ( !$suggestSettings ) { + throw new RuntimeException( "Unkown phrase_suggest profile $profile" ); + } } + PhraseSuggesterProfiles::overrideOptionsFromMessage( $suggestSettings ); $settings = [ 'phrase' => [ 'field' => $field, diff --git a/profiles/PhraseSuggesterProfiles.php b/profiles/PhraseSuggesterProfiles.php index 09ee5ff..6a5d5cc 100644 --- a/profiles/PhraseSuggesterProfiles.php +++ b/profiles/PhraseSuggesterProfiles.php @@ -22,102 +22,20 @@ */ class PhraseSuggesterProfiles { + const MAX_ERRORS_HARD_LIMIT = 2; + const MAX_TERM_FREQ_HARD_LIMIT = 0.6; /** - * Override Phrase suggester options ("Did you mean?" suggestions) - * - * @param \WebRequest $request + * @var string[] */ - public static function overrideOptions( $request ) { - global $wgCirrusSearchPhraseSuggestMaxErrors, - $wgCirrusSearchPhraseSuggestConfidence, - $wgCirrusSearchPhraseSuggestSettings, - $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit, - $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit, - $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit, - $wgCirrusSearchPhraseSuggestAllowedMode, - $wgCirrusSearchPhraseSuggestAllowedSmoothingModel, - $wgCirrusSearchPhraseSuggestReverseField; + private static $ALLOWED_MODE = [ 'missing', 'popular', 'always' ]; - Util::overrideYesNo( $wgCirrusSearchPhraseSuggestReverseField['use'], $request, - 'cirrusSuggUseReverse' ); - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestSettings['max_errors'], $request, - 'cirrusSuggMaxErrors', $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit ); - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestSettings['confidence'], $request, - 'cirrusSuggConfidence' ); - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestSettings['max_term_freq'], $request, - 'cirrusSuggMaxTermFreq', $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit ); - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestSettings['min_doc_freq'], $request, - 'cirrusSuggMinDocFreq' ); - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestSettings['prefix_length'], $request, - 'cirrusSuggPrefixLength', $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit, false ); - $mode = $request->getVal( 'cirrusSuggMode' ); - if ( isset( $mode ) && in_array( $mode, $wgCirrusSearchPhraseSuggestAllowedMode ) ) { - $wgCirrusSearchPhraseSuggestSettings['mode'] = $mode; - } - - // NOTE: we do not allow collate_minimum_should_match to be customized, it'd be hard to parse. - Util::overrideYesNo( $wgCirrusSearchPhraseSuggestSettings['collate'], $request, 'cirrusSuggCollate' ); - - $smoothing = $request->getVal( 'cirrusSuggSmoothing' ); - if ( isset( $smoothing ) && in_array( $smoothing, $wgCirrusSearchPhraseSuggestAllowedSmoothingModel ) ) { - // We do not support linear_interpolation customization yet, should be added - // later if proven useful. - switch ( $smoothing ) { - case 'laplace' : - $wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [ - 'laplace' => [ - 'alpha' => 0.5 - ] - ]; - break; - case 'stupid_backoff' : - $wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [ - 'stupid_backoff' => [ - 'discount' => 0.4 - ] - ]; - break; - } - } - - // Custom discount for stupid_backoff smoothing model - if ( isset( $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff'] ) ) { - $discount = $request->getVal( 'cirrusSuggDiscount' ); - if ( is_numeric( $discount ) && $discount <= 1 && $discount >= 0 ) { - $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff']['discount'] = floatval( $discount ); - } - } - - // Custom alpha for laplace smoothing model - if ( isset( $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace'] ) ) { - $alpha = $request->getVal( 'cirrusSuggAlpha' ); - if ( is_numeric( $alpha ) && $alpha <= 1 && $alpha >= 0 ) { - $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace']['alpha'] = floatval( $alpha ); - } - } - - // Support deprecated settings - if ( isset( $wgCirrusSearchPhraseSuggestConfidence ) ) { - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestConfidence, $request, 'cirrusSuggConfidence' ); - } - if ( isset( $wgCirrusSearchPhraseSuggestMaxErrors ) ) { - Util::overrideNumeric( $wgCirrusSearchPhraseSuggestMaxErrors, $request, 'cirrusSuggMaxErrors', - $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit ); - } - } + const PREFIX_LENGTH_HARD_LIMIT = 2; /** * Override Phrase suggester options ("Did you mean?" suggestions) + * @param array &$settings */ - public static function overrideOptionsFromMessage() { - global $wgCirrusSearchPhraseSuggestMaxErrors, - $wgCirrusSearchPhraseSuggestConfidence, - $wgCirrusSearchPhraseSuggestSettings, - $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit, - $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit, - $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit, - $wgCirrusSearchPhraseSuggestAllowedMode; - + public static function overrideOptionsFromMessage( &$settings ) { $cache = MediaWikiServices::getInstance()->getLocalServerObjectCache(); $lines = $cache->getWithSetCallback( $cache->makeKey( 'cirrussearch-didyoumean-settings' ), @@ -144,58 +62,51 @@ switch ( $k ) { case 'max_errors' : - if ( is_numeric( $v ) && $v >= 1 && $v <= $wgCirrusSearchPhraseSuggestMaxErrorsHardLimit ) { - $wgCirrusSearchPhraseSuggestSettings['max_errors'] = floatval( $v ); - // Support deprecated settings - if ( isset( $wgCirrusSearchPhraseSuggestMaxErrors ) ) { - $wgCirrusSearchPhraseSuggestMaxErrors = floatval( $v ); - } + if ( is_numeric( $v ) && $v >= 1 && $v <= self::MAX_ERRORS_HARD_LIMIT ) { + $settings['max_errors'] = floatval( $v ); } break; case 'confidence' : if ( is_numeric( $v ) && $v >= 0 ) { - $wgCirrusSearchPhraseSuggestSettings['confidence'] = floatval( $v ); - if ( isset( $wgCirrusSearchPhraseSuggestConfidence ) ) { - $wgCirrusSearchPhraseSuggestConfidence = floatval( $v ); - } + $settings['confidence'] = floatval( $v ); } break; case 'max_term_freq' : - if ( is_numeric( $v ) && $v >= 0 && $v <= $wgCirrusSearchPhraseSuggestMaxTermFreqHardLimit ) { - $wgCirrusSearchPhraseSuggestSettings['max_term_freq'] = floatval( $v ); + if ( is_numeric( $v ) && $v >= 0 && $v <= self::MAX_TERM_FREQ_HARD_LIMIT ) { + $settings['max_term_freq'] = floatval( $v ); } break; case 'min_doc_freq' : if ( is_numeric( $v ) && $v >= 0 && $v < 1 ) { - $wgCirrusSearchPhraseSuggestSettings['min_doc_freq'] = floatval( $v ); + $settings['min_doc_freq'] = floatval( $v ); } break; case 'prefix_length' : - if ( is_numeric( $v ) && $v >= 0 && $v <= $wgCirrusSearchPhraseSuggestPrefixLengthHardLimit ) { - $wgCirrusSearchPhraseSuggestSettings['prefix_length'] = intval( $v ); + if ( is_numeric( $v ) && $v >= 0 && $v <= self::PREFIX_LENGTH_HARD_LIMIT ) { + $settings['prefix_length'] = intval( $v ); } break; case 'suggest_mode' : - if ( in_array( $v, $wgCirrusSearchPhraseSuggestAllowedMode ) ) { - $wgCirrusSearchPhraseSuggestSettings['mode'] = $v; + if ( in_array( $v, self::$ALLOWED_MODE ) ) { + $settings['mode'] = $v; } break; case 'collate' : if ( $v === 'true' ) { - $wgCirrusSearchPhraseSuggestSettings['collate'] = true; + $settings['collate'] = true; } elseif ( $v === 'false' ) { - $wgCirrusSearchPhraseSuggestSettings['collate'] = false; + $settings['collate'] = false; } break; case 'smoothing' : if ( $v === 'laplace' ) { - $wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [ + $settings['smoothing_model'] = [ 'laplace' => [ 'alpha' => 0.5 ] ]; } elseif ( $v === 'stupid_backoff' ) { - $wgCirrusSearchPhraseSuggestSettings['smoothing_model'] = [ + $settings['smoothing_model'] = [ 'stupid_backoff' => [ 'discount' => 0.4 ] @@ -216,15 +127,15 @@ } // Apply smoothing model options, if none provided we'll use elasticsearch defaults - if ( isset( $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace'] ) && + if ( isset( $settings['smoothing_model']['laplace'] ) && isset( $laplaceAlpha ) ) { - $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['laplace'] = [ + $settings['smoothing_model']['laplace'] = [ 'alpha' => $laplaceAlpha ]; } - if ( isset( $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff'] ) && + if ( isset( $settings['smoothing_model']['stupid_backoff'] ) && isset( $stupidBackoffDiscount ) ) { - $wgCirrusSearchPhraseSuggestSettings['smoothing_model']['stupid_backoff'] = [ + $settings['smoothing_model']['stupid_backoff'] = [ 'discount' => $stupidBackoffDiscount ]; } -- To view, visit https://gerrit.wikimedia.org/r/402071 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id76389c5c1a640bad7175f5507b7abd7f7be4e7f Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: DCausse <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
