DCausse has uploaded a new change for review.
https://gerrit.wikimedia.org/r/249460
Change subject: [WIP] Allow customization of rescore window
......................................................................
[WIP] Allow customization of rescore window
WIP:
* need to write unit tests
* write better docs for profiles
* write sensible profile
Change-Id: I15296898b15761e85555ea42d9fa23c2a22f82b5
---
M CirrusSearch.php
M autoload.php
M includes/BuildDocument/SuggestScoring.php
M includes/Hooks.php
M includes/InterwikiSearcher.php
A includes/Search/RescoreBuilders.php
M includes/Search/SearchContext.php
M includes/Searcher.php
M includes/Util.php
M profiles/CommonTermsQueryProfiles.php
M profiles/PhraseSuggesterProfiles.php
A profiles/RescoreProfiles.php
M profiles/SuggestProfiles.php
13 files changed, 914 insertions(+), 251 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/60/249460/1
diff --git a/CirrusSearch.php b/CirrusSearch.php
index efe253d..6a5ce79 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -24,6 +24,7 @@
require_once __DIR__ . "/profiles/SuggestProfiles.php";
require_once __DIR__ . "/profiles/PhraseSuggesterProfiles.php";
require_once __DIR__ . "/profiles/CommonTermsQueryProfiles.php";
+require_once __DIR__ . "/profiles/RescoreProfiles.php";
$wgExtensionCredits['other'][] = array(
'path' => __FILE__,
@@ -813,6 +814,12 @@
*/
$wgCirrusSearchCustomFields = array();
+/**
+ * Set the rescore profile to default.
+ * see profile/RescoreProfiles.php for more info
+ */
+$wgCirrusSearchRescoreProfile = $wgCirrusSearchRescoreProfiles['default'];
+
$includes = __DIR__ . "/includes/";
$apiDir = $includes . 'Api/';
$buildDocument = $includes . 'BuildDocument/';
diff --git a/autoload.php b/autoload.php
index 1673d56..e27d540 100644
--- a/autoload.php
+++ b/autoload.php
@@ -45,6 +45,7 @@
'CirrusSearch\\Maintenance\\AnalysisConfigBuilder' => __DIR__ .
'/includes/Maintenance/AnalysisConfigBuilder.php',
'CirrusSearch\\Maintenance\\ChunkBuilder' => __DIR__ .
'/includes/Maintenance/ChunkBuilder.php',
'CirrusSearch\\Maintenance\\ConfigUtils' => __DIR__ .
'/includes/Maintenance/ConfigUtils.php',
+ 'CirrusSearch\\Maintenance\\CopySearchIndex' => __DIR__ .
'/maintenance/copySearchIndex.php',
'CirrusSearch\\Maintenance\\DumpIndex' => __DIR__ .
'/maintenance/dumpIndex.php',
'CirrusSearch\\Maintenance\\FreezeWritesToCluster' => __DIR__ .
'/maintenance/freezeWritesToCluster.php',
'CirrusSearch\\Maintenance\\IndexDumperException' => __DIR__ .
'/maintenance/dumpIndex.php',
@@ -81,12 +82,22 @@
'CirrusSearch\\Sanity\\QueueingRemediator' => __DIR__ .
'/includes/Sanity/QueueingRemediator.php',
'CirrusSearch\\Sanity\\Remediator' => __DIR__ .
'/includes/Sanity/Remediator.php',
'CirrusSearch\\SearchConfig' => __DIR__ . '/includes/SearchConfig.php',
+ 'CirrusSearch\\Search\\BoostTemplatesFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\CustomFieldFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\Escaper' => __DIR__ .
'/includes/Search/Escaper.php',
'CirrusSearch\\Search\\FancyTitleResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
'CirrusSearch\\Search\\Filters' => __DIR__ .
'/includes/Search/Filters.php',
'CirrusSearch\\Search\\FullTextResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
+ 'CirrusSearch\\Search\\FunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\FunctionScoreChain' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\FunctionScoreDecorator' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\IdResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
+ 'CirrusSearch\\Search\\IncomingLinksFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\InterwikiResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
+ 'CirrusSearch\\Search\\LangWeightFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\NamespacesFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\PreferRecentFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\RescoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\Result' => __DIR__ .
'/includes/Search/Result.php',
'CirrusSearch\\Search\\ResultSet' => __DIR__ .
'/includes/Search/ResultSet.php',
'CirrusSearch\\Search\\ResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
diff --git a/includes/BuildDocument/SuggestScoring.php
b/includes/BuildDocument/SuggestScoring.php
index fb9c4df..68bc76e 100644
--- a/includes/BuildDocument/SuggestScoring.php
+++ b/includes/BuildDocument/SuggestScoring.php
@@ -124,11 +124,11 @@
/**
* @param integer $maxDocs the number of docs in the index
* @param array of key values, key is the template name, value the
boost factor.
- * Defaults to Searcher::getDefaultBoostTemplates()
+ * Defaults to Util::getDefaultBoostTemplates()
*/
public function __construct( $maxDocs, $boostTemplates = null ) {
$this->maxDocs = $maxDocs;
- $this->boostTemplates = $boostTemplates ?:
Searcher::getDefaultBoostTemplates();
+ $this->boostTemplates = $boostTemplates ?:
Util::getDefaultBoostTemplates();
// We normalize incoming links according to the size of the
index
$this->incomingLinksNorm = (int) ($maxDocs *
self::INCOMING_LINKS_MAX_DOCS_FACTOR);
if ( $this->incomingLinksNorm < 1 ) {
diff --git a/includes/Hooks.php b/includes/Hooks.php
index e006e2d..4130251 100644
--- a/includes/Hooks.php
+++ b/includes/Hooks.php
@@ -115,6 +115,7 @@
self::overrideMoreLikeThisOptions( $request );
PhraseSuggesterProfiles::overrideOptions( $request );
CommonTermsQueryProfiles::overrideOptions( $request );
+ RescoreProfiles::overrideOptions( $request );
self::overrideSecret(
$wgCirrusSearchLogElasticRequests, $wgCirrusSearchLogElasticRequestsSecret,
$request, 'cirrusLogElasticRequests', false );
self::overrideYesNo( $wgCirrusSearchEnableAltLanguage,
$request, 'cirrusAltLanguage' );
}
diff --git a/includes/InterwikiSearcher.php b/includes/InterwikiSearcher.php
index 1365f49..9dbd716 100644
--- a/includes/InterwikiSearcher.php
+++ b/includes/InterwikiSearcher.php
@@ -41,14 +41,14 @@
* @param string $interwiki Interwiki prefix we're searching
*/
public function __construct( Connection $connection, array $namespaces,
User $user = null, $index, $interwiki ) {
- parent::__construct( $connection, 0, self::MAX_RESULTS, null,
$namespaces, $user, $index );
- $this->interwiki = $interwiki;
// Only allow core namespaces. We can't be sure any others exist
- if ( $this->namespaces !== null ) {
- $this->namespaces = array_filter( $namespaces,
function( $namespace ) {
+ if ( $namespaces !== null ) {
+ $namespaces = array_filter( $namespaces, function(
$namespace ) {
return $namespace <= 15;
} );
}
+ parent::__construct( $connection, 0, self::MAX_RESULTS, null,
$namespaces, $user, $index );
+ $this->interwiki = $interwiki;
}
/**
@@ -64,8 +64,8 @@
return;
}
- $namespaceKey = $this->namespaces !== null ?
- implode( ',', $this->namespaces ) : '';
+ $namespaceKey = $this->getNamespaces() !== null ?
+ implode( ',', $this->getNamespaces() ) : '';
$results = array();
$key = wfMemcKey(
diff --git a/includes/Search/RescoreBuilders.php
b/includes/Search/RescoreBuilders.php
new file mode 100644
index 0000000..5ac4794
--- /dev/null
+++ b/includes/Search/RescoreBuilders.php
@@ -0,0 +1,539 @@
+<?php
+
+namespace CirrusSearch\Search;
+
+use CirrusSearch\Util;
+use Elastica\Query\FunctionScore;
+use Elastica\Filter\AbstractFilter;
+use MWNamespace;
+
+
+
+/**
+ * Set of rescore builders
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+/**
+ * Builds a rescore queries by reading a rescore profile.
+ */
+class RescoreBuilder {
+ /**
+ * List of allowed rescore params
+ * @todo: refactor to const with php 5.6
+ */
+ private $RESCORE_MAIN_PARAMS = array(
+ 'query_weight',
+ 'rescore_query_weight',
+ 'score_mode'
+ );
+
+ const FUNCTION_SCORE_TYPE = "function_score";
+
+ /**
+ * @var SearchContext
+ */
+ private $context;
+
+ /**
+ * @var array a rescore profile
+ */
+ private $profile;
+
+ public function __construct( SearchContext $context, $profile ) {
+ $this->context = $context;
+ $this->profile = $this->getSupportedProfile( $profile );
+ }
+
+ /**
+ * @return array of rescore queries
+ */
+ public function build() {
+ $rescores = array();
+ foreach( $this->profile['rescore'] as $rescoreDef ) {
+ $windowSize = $this->windowSize( $rescoreDef );
+ $rescore = array(
+ 'window_size' => $windowSize,
+ );
+
+ $rescore['query'] = array_intersect_key( $rescoreDef,
array_flip( $this->RESCORE_MAIN_PARAMS ) );
+ $rescoreQuery = $this->buildRescoreQuery( $rescoreDef );
+ if ( $rescoreQuery === null ) {
+ continue;
+ }
+ $rescore['query']['rescore_query'] = $rescoreQuery;
+ $rescores[] = $rescore;
+ }
+ return $rescores;
+ }
+
+ /**
+ * builds the 'query' attribute by reading type
+ * @return array the rescore query
+ */
+ private function buildRescoreQuery( $rescoreDef ) {
+ switch( $rescoreDef['type'] ) {
+ case self::FUNCTION_SCORE_TYPE:
+ $funcChain = new FunctionScoreChain( $this->context,
$rescoreDef['function_chain'] );
+ return $funcChain->buildRescoreQuery();
+ default: throw new \Exception( "Unsupported rescore query type:
" . $rescoreDef['type'] );
+ }
+ }
+
+ /**
+ * @return integer the window size defined in the profile
+ * or the value from config if window_size_override is set.
+ */
+ private function windowSize( $rescore ) {
+ if ( isset( $rescore['window_size_override'] ) ) {
+ $windowSize = $this->context->getConfig()->get(
$rescore['window_size_override'] );
+ if ( $windowSize !== null ) {
+ return $windowSize;
+ }
+ }
+ return $rescore['window'];
+ }
+
+ /**
+ * Inspect requested namespaces and return the supported profile
+ * @return array the supported rescore profile.
+ */
+ private function getSupportedProfile( $profile ) {
+ if ( !is_array( $profile['supported_namespaces'] ) &&
+ $profile['supported_namespaces'] === 'all' ) {
+ return $profile;
+ }
+
+ if ( !is_array( $profile['supported_namespaces'] ) ) {
+ throw new \Exception( "Invalid rescore profile:
supported_namespaces should be 'all' or an array of namespaces" );
+ }
+
+ if ( ! isset( $profile['fallback_profile'] ) ) {
+ throw new \Exception( "Invalid rescore profile:
fallback_profile is mandatory if supported_namespaces is not 'all'." );
+ }
+
+ $queryNs = $this->context->getNamespaces();
+
+ if ( !$queryNs ) {
+ // According to comments in Searcher if namespaces is
+ // not set we run the query on all namespaces
+ // @todo: verify comments.
+ return $this->getFallbackProfile(
$profile['fallback_profile'] );
+ }
+
+ foreach( $queryNs as $ns ) {
+ if ( !in_array( $ns, $profile['supported_namespaces'] )
) {
+ return $this->getFallbackProfile(
$profile['fallback_profile'] );
+ }
+ }
+ return $profile;
+ }
+
+ /**
+ * @param string $profileName the profile to load
+ * @return array the rescore profile identified by $profileName
+ */
+ private function getFallbackProfile( $profileName ) {
+ $profile = $this->context->getConfig()->getElement(
'CirrusSearchRescoreProfiles', $profileName );
+ if ( !$profile ) {
+ throw new \Exception( "Unknown fallback profile profile
$profileName." );
+ }
+ if ( $profile['supported_namespaces'] !== 'all' ) {
+ throw new \Exception( "Fallback profile $profileName
must support all namespaces." );
+ }
+ return $profile;
+ }
+}
+
+class FunctionScoreChain {
+ /**
+ * @var SearchContext
+ */
+ private $context;
+
+ /**
+ * @var \Elastica\Query\FunctionScoreDecorator
+ */
+ private $functionScore;
+
+ /**
+ * @var array the function score chain
+ */
+ private $chain;
+
+ public function __construct( SearchContext $context, $chain ) {
+ $this->context = $context;
+ $this->functionScore = new FunctionScoreDecorator();
+ $this->chain = $context->getConfig()->getElement(
'CirrusSearchRescoreFunctionScoreChains', $chain );
+ if ( $this->chain === null ) {
+ throw new \Exception( "Unknown rescore function chain
$chain" );
+ }
+ }
+
+ /**
+ * @return FunctionScore|null the rescore query or null none of
functions were
+ * needed.
+ */
+ public function buildRescoreQuery() {
+ foreach( $this->chain as $func ) {
+ $impl = $this->getImplementation( $func );
+ $impl->append( $this->functionScore );
+ }
+ if ( !$this->functionScore->isEmptyFunction() ) {
+ return $this->functionScore;
+ }
+ return null;
+ }
+
+ private function getImplementation( $func ) {
+ switch( $func['type'] ) {
+ case 'boostlinks':
+ return new IncomingLinksFunctionScoreBuilder(
$this->context );
+ case 'recency':
+ return new PreferRecentFunctionScoreBuilder(
$this->context );
+ case 'templates':
+ return new BoostTemplatesFunctionScoreBuilder(
$this->context );
+ case 'namespaces':
+ return new NamespacesFunctionScoreBuilder(
$this->context );
+ case 'language':
+ return new LangWeightFunctionScoreBuilder(
$this->context );
+ case 'custom_field':
+ return new CustomFieldFunctionScoreBuilder(
$this->context, $func['params'] );
+ default:
+ throw new \Exception( "Unknown function score type
{$func['type']}." );
+ }
+ }
+}
+
+/**
+ * This is usefull to check if the function score is empty
+ * Function score builders may not add any function if some
+ * criteria are not met. If there's no function we should not
+ * not build the rescore query.
+ * @todo: find another pattern to deal with this problem and avoid
+ * this strong dependency to FunctionScore::addFunction signature.
+ */
+class FunctionScoreDecorator extends FunctionScore {
+ private $emptyFunction = true;
+
+ public function addFunction( $functionType, $functionParams,
AbstractFilter $filter = null, $weight = null ) {
+ $this->emptyFunction = false;
+ return parent::addFunction( $functionType, $functionParams,
$filter, $weight );
+ }
+
+ /**
+ * @return boolean true if this function score is empty
+ */
+ public function isEmptyFunction() {
+ return $this->emptyFunction;
+ }
+
+ /**
+ * Default elastica behaviour is to use class name
+ * as property name. We must override this function
+ * to force the name to function_score
+ */
+ protected function _getBaseName() {
+ return "function_score";
+ }
+}
+
+abstract class FunctionScoreBuilder {
+ /**
+ * @param SearchContext the search context
+ */
+ protected $context;
+ public function __construct( SearchContext $context ) {
+ $this->context = $context;
+ }
+
+ /**
+ * Append functions to the function score $container
+ * @param FunctionScore $container
+ */
+ public abstract function append( FunctionScore $container );
+}
+
+/**
+ * Buils a set of functions with boosted templates
+ * Uses a weight function with a filter for each template.
+ * The list of boosted templates is read from SearchContext
+ */
+class BoostTemplatesFunctionScoreBuilder extends FunctionScoreBuilder {
+ private $boostTemplates;
+
+ /**
+ * @param SearchContext the search context
+ */
+ public function __construct( SearchContext $context ) {
+ parent::__construct( $context );
+ // Use the boosted template from query string if available
+ $this->boostTemplates = $context->getBoostTemplatesFromQuery();
+ // empty array may be returned here in the case of a syntax
error
+ // @todo: verify that this is what we want: in case of a syntax
error
+ // we disable default boost templates.
+ if ( $this->boostTemplates === null ) {
+ // Fallback to default otherwize
+ $this->boostTemplates =
Util::getDefaultBoostTemplates();
+ }
+ }
+
+ public function append( FunctionScore $container ) {
+ if( !$this->boostTemplates ) {
+ return;
+ }
+ foreach ( $this->boostTemplates as $name => $weight ) {
+ $match = new \Elastica\Query\Match();
+ $match->setFieldQuery( 'template', $name );
+ $filterQuery = new \Elastica\Filter\Query( $match );
+ $filterQuery->setCached( true );
+ $functionScore->addWeightFunction( $weight,
$filterQuery );
+ }
+ }
+}
+
+/**
+ * Builds a set of functions with namespaces.
+ * Uses a weight function with a filter for each namespace.
+ * Activated only if more than one namespace is requested.
+ */
+class NamespacesFunctionScoreBuilder extends FunctionScoreBuilder {
+ /**
+ * @var null|float[] initialized version of
$wgCirrusSearchNamespaceWeights with all string keys
+ * translated into integer namespace codes using $this->language.
+ */
+ private $normalizedNamespaceWeights;
+ private $namespaceToBoost;
+
+
+ /**
+ * @param SearchContext the search context
+ */
+ public function __construct( SearchContext $context ) {
+ parent::__construct( $context );
+ $this->namespacesToBoost = $this->context->getNamespaces() ?:
MWNamespace::getValidNamespaces();
+ if ( !$this->namespacesToBoost || count(
$this->namespacesToBoost ) == 1 ) {
+ // nothing to boost, no need to initialize anything
else.
+ return;
+ }
+ $this->normalizedNamespaceWeights = array();
+ $language = $this->context->getConfig()->get( 'ContLang' );
+ foreach ( $this->context->getConfig()->get(
'CirrusSearchNamespaceWeights' ) as $ns => $weight ) {
+ if ( is_string( $ns ) ) {
+ $ns = $language->getNsIndex( $ns );
+ // Ignore namespaces that don't exist.
+ if ( $ns === false ) {
+ continue;
+ }
+ }
+ // Now $ns should always be an integer.
+ $this->normalizedNamespaceWeights[ $ns ] = $weight;
+ }
+
+ }
+
+ /**
+ * Get the weight of a namespace.
+ * @param int $namespace the namespace
+ * @return float the weight of the namespace
+ */
+ private function getBoostForNamespace( $namespace ) {
+ if ( isset( $this->normalizedNamespaceWeights[ $namespace ] ) )
{
+ return $this->normalizedNamespaceWeights[ $namespace ];
+ }
+ if ( MWNamespace::isSubject( $namespace ) ) {
+ if ( $namespace === NS_MAIN ) {
+ return 1;
+ }
+ return $this->context->getConfig()->get(
'CirrusSearchDefaultNamespaceWeight' );
+ }
+ $subjectNs = MWNamespace::getSubject( $namespace );
+ if ( isset( $this->normalizedNamespaceWeights[ $subjectNs ] ) )
{
+ return $this->context->getConfig()->get(
'CirrusSearchTalkNamespaceWeight' ) * $this->normalizedNamespaceWeights[
$subjectNs ];
+ }
+ if ( $namespace === NS_TALK ) {
+ return $this->context->getConfig()->get(
'CirrusSearchTalkNamespaceWeight' );
+ }
+ return $this->context->getConfig()->get(
'CirrusSearchDefaultNamespaceWeight' ) * $this->context->getConfig()->get(
'CirrusSearchTalkNamespaceWeight' );
+ }
+
+ public function append( FunctionScore $functionScore ) {
+ if ( !$this->namespacesToBoost || count(
$this->namespacesToBoost ) == 1 ) {
+ // nothing to boost, no need to initialize anything
else.
+ return;
+ }
+
+ // first build the opposite map, this will allow us to add a
+ // single factor function per weight by using a terms filter.
+ $weightToNs = array();
+ foreach( $this->namespacesToBoost as $ns ) {
+ $weight = $this->getBoostForNamespace( $ns );
+ $key = (string) $weight;
+ if ( $key == '1' ) {
+ // such weigths would have no effect
+ // we can ignore them.
+ continue;
+ }
+ if ( !isset( $weightToNs[$key] ) ) {
+ $weightToNs[$key] = array( $ns );
+ } else {
+ $weightToNs[$key][] = $ns;
+ }
+ }
+ // If all the namespaces are flattened to the same weight
+ // there's no need to rescore anything here
+ if ( count( $weightToNs ) <= 1 ) {
+ return;
+ }
+ foreach( $weightToNs as $weight => $namespaces ) {
+ $filter = new \Elastica\Filter\Terms( 'namespace',
$namespaces );
+ $functionScore->addWeightFunction( $weight, $filter );
+ }
+ }
+}
+
+/**
+ * Builds a function that boosts incoming links
+ * formula is log( incoming_links + 2 )
+ */
+class IncomingLinksFunctionScoreBuilder extends FunctionScoreBuilder {
+ public function __construct( SearchContext $context ) {
+ parent::__construct( $context );
+ }
+
+ public function append( FunctionScore $functionScore ) {
+ // Backward compat code, allows to disable this function
+ // even if specified in the rescore profile
+ if( !$this->context->isBoostLinks() ) {
+ return;
+ }
+ if( $this->context->isUseFieldValueFactorWithDefault() ) {
+ $functionScore->addFunction(
'field_value_factor_with_default', array(
+ 'field' => 'incoming_links',
+ 'modifier' => 'log2p',
+ 'missing' => 0,
+ ) );
+ } else {
+ $scoreBoostExpression =
"log10(doc['incoming_links'].value + 2)";
+ $functionScore->addScriptScoreFunction( new
\Elastica\Script( $scoreBoostExpression, null, 'expression' ) );
+ }
+ }
+}
+
+/**
+ * Builds a function using a custom numeric field and
+ * parameters attached to a profile.
+ * Uses the function field_value_factor
+ */
+class CustomFieldFunctionScoreBuilder extends FunctionScoreBuilder {
+ /**
+ * @var array the field_value_factor profile
+ */
+ private $profile;
+
+ public function __construct( SearchContext $context, $profile ) {
+ parent::__construct( $context );
+ $this->profile = $profile;
+ }
+
+ public function append( FunctionScore $functionScore ) {
+ $functionScore->addFunction( 'field_value_factor',
$this->profile );
+ }
+}
+
+/**
+ * Builds a script score boost documents on the timestamp field.
+ * Reads its param from SearchContext: preferRecentDecayPortion and
preferRecentHalfLife
+ * Can be initialized by config for full text and by special syntax in user
query
+ */
+class PreferRecentFunctionScoreBuilder extends FunctionScoreBuilder {
+ public function __construct( SearchContext $context ) {
+ parent::__construct( $context );
+ }
+
+ public function append( FunctionScore $functionScore ) {
+ if ( !$this->context->hasPreferRecentOptions() ) {
+ return;
+ }
+ // Convert half life for time in days to decay constant for
time in milliseconds.
+ $decayConstant = log( 2 ) /
$this->context->getPreferRecentHalfLife() / 86400000;
+ $parameters = array(
+ 'decayConstant' => $decayConstant,
+ 'decayPortion' =>
$this->context->getPreferRecentDecayPortion(),
+ 'nonDecayPortion' => 1 -
$this->context->getPreferRecentDecayPortion(),
+ 'now' => time() * 1000
+ );
+
+ // e^ct where t is last modified time - now which is negative
+ $exponentialDecayExpression = "exp(decayConstant *
(doc['timestamp'].value - now))";
+ if ( $this->decayPortion !== 1.0 ) {
+ $exponentialDecayExpression =
"$exponentialDecayExpression * decayPortion + nonDecayPortion";
+ }
+ $functionScore->addScriptScoreFunction( new \Elastica\Script(
$exponentialDecayExpression,
+ $parameters, 'expression' ) );
+ }
+}
+
+/**
+ * Boosts documents in user language and in wiki language if different
+ * Uses getUserLanguage in SearchConfig and LanguageCode for language values
+ * and CirrusSearchLanguageWeight['user'|'wiki'] for respective weights.
+ */
+class LangWeightFunctionScoreBuilder extends FunctionScoreBuilder {
+ /**
+ * @var string user language
+ */
+ private $userLang;
+ /**
+ * @var float user language weight
+ */
+ private $userWeight;
+ /**
+ * @var string wiki language
+ */
+ private $wikiLang;
+ /**
+ * @var float wiki language weight
+ */
+ private $wikiWeight;
+
+ public function __construct( SearchContext $context ) {
+ parent::__construct( $context );
+ $this->userLang =
$this->context->getConfig()->getUserLanguage();
+ $this->userWeight = $this->context->getConfig()->getElement(
'CirrusSearchLanguageWeight', 'user' );
+ $this->wikiLang = $this->context->getConfig()->get(
'LanguageCode' );
+ $this->wikiWeight = $this->context->getConfig()->getElement(
'CirrusSearchLanguageWeight', 'wiki' );
+ }
+
+ public function append( FunctionScore $functionScore ) {
+ // Boost pages in a user's language
+ if ( $this->userWeight ) {
+ $functionScore->addWeightFunction(
+ $this->userWeight,
+ new \Elastica\Filter\Term( array( 'language' =>
$this->userLang ) )
+ );
+ }
+
+ // And a wiki's language, if it's different
+ if ( $this->wikiWeight && $this->userLang != $this->wikiLang ) {
+ $functionScore->addWeightFunction(
+ $this->wikiWeight,
+ new \Elastica\Filter\Term( array( 'language' =>
$this->wikiLang ) )
+ );
+ }
+ }
+}
diff --git a/includes/Search/SearchContext.php
b/includes/Search/SearchContext.php
index de7dd45..40e8b4e 100644
--- a/includes/Search/SearchContext.php
+++ b/includes/Search/SearchContext.php
@@ -35,6 +35,11 @@
private $config;
/**
+ * @var array of integer (list of namespaces)
+ */
+ private $namespaces;
+
+ /**
* @var boolean
*/
private $searchContainedSyntax = false;
@@ -50,11 +55,33 @@
*/
private $searchTextQueryBuilder;
+ /**
+ * @var array list of boost templates extracted from the query string
+ */
+ private $boostTemplatesFromQuery;
+ /**
+ * @deprecated use rescore profiles instead
+ * @var boolean do we need to boost links
+ */
+ private $boostLinks = false;
- public function __construct( SearchConfig $config ) {
+ /**
+ * @var float portion of article's score which decays with time.
Defaults to 0 meaning don't decay the score
+ * with time since the last update.
+ */
+ private $preferRecentDecayPortion = 0;
+ /**
+ * @var float number of days it takes an the portion of an article
score that will decay with time
+ * since last update to decay half way. Defaults to 0 meaning don't
decay the score with time.
+ */
+ private $preferRecentHalfLife = 0;
+
+ public function __construct( SearchConfig $config, $namespaces ) {
$this->config = $config;
$this->searchTextQueryBuilderFactory = new
SearchTextQueryBuilderFactory( $this );
+ $this->boostLinks = $this->config->get(
'CirrusSearchBoostLinks' );
+ $this->namespaces = $namespaces;
}
/**
@@ -62,6 +89,23 @@
*/
public function getConfig() {
return $this->config;
+ }
+
+ /**
+ * the namespaces being requested.
+ * NOTE: this value may change during the Searcher process.
+ * @return array of integer
+ */
+ public function getNamespaces() {
+ return $this->namespaces;
+ }
+
+ /**
+ * set the namespaces
+ * @param array $namespaces array of integer
+ */
+ public function setNamespaces( $namespaces ) {
+ $this->namespaces = $namespaces;
}
/**
@@ -79,18 +123,26 @@
}
/**
- * @return true if CommonTermsQuery is allowed
+ * @return boolean true if CommonTermsQuery is allowed
*/
public function isUseCommonTermsQuery() {
return $this->config->get('CirrusSearchUseCommonTermsQuery' );
}
/**
- * @return true if we can use the safer query from the wikimedia extra
+ * @return boolean true if we can use the safer query from the
wikimedia extra
* plugin
*/
public function isUseSafer() {
return ( !is_null( $this->config->getElement(
'CirrusSearchWikimediaExtraPlugin', 'safer' ) ) );
+ }
+
+ /**
+ * @return boolean true if we can use the function score
field_value_factor_with_default provided
+ * by the wikimedia extra plugin
+ */
+ public function isUseFieldValueFactorWithDefault() {
+ return ( !is_null( $this->config->getElement(
'CirrusSearchWikimediaExtraPlugin', 'field_value_factor_with_default' ) ) );
}
/**
@@ -130,4 +182,71 @@
public function searchTextQueryBuilder( $queryStringQueryString ) {
return $this->searchTextQueryBuilderFactory->getBuilder(
$queryStringQueryString );
}
+
+ /**
+ * Return the list of boosted templates specified in the user query
(special syntax)
+ * null if not used in the query or an empty array if there was a
syntax error.
+ * Initiliazed after special syntax extraction.
+ * @return array|null of boosted templates, key is the template value
is the weight
+ */
+ public function getBoostTemplatesFromQuery() {
+ return $this->boostTemplatesFromQuery;
+ }
+
+ /**
+ * @param array $boostTemplatesFromQuery boosted templates extracted
from query
+ */
+ public function setBoostTemplatesFromQuery( $boostTemplatesFromQuery ) {
+ $this->boostTemplatesFromQuery = $boostTemplatesFromQuery;
+ }
+
+ /**
+ * @deprecated use rescore profiles
+ * @param boolean deactivate IncomingLinksFunctionScoreBuilder if
present in the rescore profile
+ */
+ public function setBoostLinks( $boostLinks ) {
+ $this->boostLinks = $boostLinks;
+ }
+
+ /**
+ * @deprecated use custom rescore profile
+ * @return boolean
+ */
+ public function isBoostLinks() {
+ return $this->boostLinks;
+ }
+
+ /**
+ * Set prefer recent options
+ * @param float $preferRecentDecayPortion
+ * @param float $preferRecentHalfLife
+ */
+ public function setPreferRecentOptions( $preferRecentDecayPortion,
$preferRecentHalfLife ) {
+ $this->preferRecentDecayPortion = $preferRecentDecayPortion;
+ $this->preferRecentHalfLife = $preferRecentHalfLife;
+ }
+
+
+ /**
+ * @return true if preferRecent options has been set.
+ */
+ public function hasPreferRecentOptions() {
+ return $this->preferRecentHalfLife > 0 &&
$this->preferRecentDecayPortion > 0;
+ }
+
+ /**
+ * Parameter used by Search\PreferRecentFunctionScoreBuilder
+ * @return float the decay portion for prefer recent
+ */
+ public function getPreferRecentDecayPortion() {
+ return $this->getPreferRecentDecayPortion;
+ }
+
+ /**
+ * Parameter used by Search\PreferRecentFunctionScoreBuilder
+ * @return float the half life for prefer recent
+ */
+ public function getPreferRecentHalfLife() {
+ return $this->preferRecentHalfLife;
+ }
}
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 43222b0..77654f7 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -10,6 +10,7 @@
use CirrusSearch\Search\Filters;
use CirrusSearch\Search\FullTextResultsType;
use CirrusSearch\Search\ResultsType;
+use CirrusSearch\Search\RescoreBuilder;
use CirrusSearch\Search\SearchContext;
use CirrusSearch\Search\SearchTextQueryBuilderFactory;
use CirrusSearch\Search\SearchTextQueryBuilder;
@@ -82,11 +83,6 @@
private $limit;
/**
- * @var int[]|null array of namespaces in which to search
- */
- protected $namespaces;
-
- /**
* @var Language language of the wiki
*/
private $language;
@@ -134,26 +130,6 @@
*/
private $rescore = array();
/**
- * @var float portion of article's score which decays with time.
Defaults to 0 meaning don't decay the score
- * with time since the last update.
- */
- private $preferRecentDecayPortion = 0;
- /**
- * @var float number of days it takes an the portion of an article
score that will decay with time
- * since last update to decay half way. Defaults to 0 meaning don't
decay the score with time.
- */
- private $preferRecentHalfLife = 0;
- /**
- * @var boolean should the query results boost pages with more incoming
links. Default to false.
- */
- private $boostLinks = false;
- /**
- * @var float[] template name to boost multiplier for having a
template. Defaults to none but initialized by
- * queries that use it to self::getDefaultBoostTemplates() if they need
it. That is too expensive to do by
- * default though.
- */
- private $boostTemplates = array();
- /**
* @var string index base name to use
*/
private $indexBaseName;
@@ -196,12 +172,6 @@
* @var boolean return explanation with results
*/
private $returnExplain = false;
-
- /**
- * @var null|float[] lazily initialized version of
$wgCirrusSearchNamespaceWeights with all string keys
- * translated into integer namespace codes using $this->language.
- */
- private $normalizedNamespaceWeights = null;
/**
* @var \Elastica\Query\Match[] queries that don't use Elastic's "query
string" query, for more
@@ -249,11 +219,10 @@
$this->config = $config;
$this->offset = min( $offset, self::MAX_OFFSET );
$this->limit = $limit;
- $this->namespaces = $namespaces;
$this->indexBaseName = $index ?: $config->getWikiId();
$this->language = $config->get( 'ContLang' );
$this->escaper = new Escaper( $config->get( 'LanguageCode' ),
$config->get( 'CirrusSearchAllowLeadingWildcard' ) );
- $this->searchContext = new SearchContext( $this->config );
+ $this->searchContext = new SearchContext( $this->config,
$namespaces );
}
/**
@@ -362,8 +331,8 @@
} else {
$this->query = new \Elastica\Query\MatchAll();
}
- $this->boostTemplates = self::getDefaultBoostTemplates();
- $this->boostLinks = true;
+ // @todo: use dedicated rescore profiles for prefix search.
+ $this->searchContext->setBoostLinks( true );
return $this->search( 'prefix', $search );
}
@@ -392,7 +361,6 @@
$originalTerm = $term;
$searchContainedSyntax = false;
$this->term = $term;
- $this->boostLinks = $this->config->get(
'CirrusSearchBoostLinks' );
$searchType = 'full_text';
// Handle title prefix notation
$prefixPos = strpos( $this->term, 'prefix:' );
@@ -407,7 +375,7 @@
$cirrusSearchEngine = new CirrusSearch();
$cirrusSearchEngine->setConnection(
$this->connection );
$value = trim(
$cirrusSearchEngine->replacePrefixes( $value ) );
- $this->namespaces =
$cirrusSearchEngine->namespaces;
+ $this->searchContext->setNamespaces(
$cirrusSearchEngine->namespaces );
// If the namespace prefix wasn't the entire
prefix filter then add a filter for the title
if ( strpos( $value, ':' ) !== strlen( $value )
- 1 ) {
$value = str_replace( '_', ' ', $value
);
@@ -439,8 +407,7 @@
return '';
}
);
- $this->preferRecentDecayPortion = $preferRecentDecayPortion;
- $this->preferRecentHalfLife = $preferRecentHalfLife;
+ $this->searchContext->setPreferRecentOptions(
$preferRecentDecayPortion, $preferRecentDecayPortion );
$this->extractSpecialSyntaxFromTerm(
'/^\s*local:/',
@@ -453,7 +420,6 @@
// Handle other filters
$filters = $this->filters;
$notFilters = $this->notFilters;
- $boostTemplates = self::getDefaultBoostTemplates();
$highlightSource = array();
$this->extractSpecialSyntaxFromTerm(
'/(?<not>-)?insource:\/(?<pattern>(?:[^\\\\\/]|\\\\.)+)\/(?<insensitive>i)? ?/',
@@ -539,7 +505,7 @@
$isEmptyQuery = false;
$this->extractSpecialSyntaxFromTerm(
'/(?<key>[a-z\\-]{7,15}):\s*(?<value>"(?<quoted>(?:[^"]|(?<=\\\)")+)"|(?<unquoted>\S+))
?/',
- function ( $matches ) use ( $searcher, $escaper,
&$filters, &$notFilters, &$boostTemplates,
+ function ( $matches ) use ( $searcher, $escaper,
&$filters, &$notFilters,
&$searchContainedSyntax, &$fuzzyQuery,
&$highlightSource, &$isEmptyQuery ) {
$key = $matches['key'];
$quotedValue = $matches['value'];
@@ -555,10 +521,8 @@
}
switch ( $key ) {
case 'boost-templates':
- $boostTemplates =
Searcher::parseBoostTemplates( $value );
- if ( $boostTemplates === null )
{
- $boostTemplates =
Searcher::getDefaultBoostTemplates();
- }
+ $boostTemplates =
Util::parseBoostTemplates( $value );
+
$context->setBoostTemplatesFromQuery( $boostTemplates );
$searchContainedSyntax = true;
return '';
case 'hastemplate':
@@ -611,7 +575,6 @@
}
$this->filters = $filters;
$this->notFilters = $notFilters;
- $this->boostTemplates = $boostTemplates;
$this->searchContext->setSearchContainedSyntax(
$searchContainedSyntax );
$this->fuzzyQuery = $fuzzyQuery;
$this->highlightSource = $highlightSource;
@@ -1311,10 +1274,10 @@
$extraIndexes = array();
$indexType = $this->pickIndexTypeFromNamespaces();
- if ( $this->namespaces ) {
+ if ( $this->getNamespaces() ) {
$extraIndexes = $this->getAndFilterExtraIndexes();
if ( $this->needNsFilter( $extraIndexes, $indexType ) )
{
- $this->filters[] = new \Elastica\Filter\Terms(
'namespace', $this->namespaces );
+ $this->filters[] = new \Elastica\Filter\Terms(
'namespace', $this->getNamespaces() );
}
}
@@ -1535,7 +1498,7 @@
* @return int[]|null
*/
public function getNamespaces() {
- return $this->namespaces;
+ return $this->searchContext->getNamespaces();
}
/**
@@ -1548,7 +1511,7 @@
// We're reaching into another wiki's indexes and we
don't know what is there so be defensive.
return true;
}
- $nsCount = count( $this->namespaces );
+ $nsCount = count( $this->getNamespaces() );
$validNsCount = count( MWNamespace::getValidNamespaces() );
if ( $nsCount === $validNsCount ) {
// We're only on our wiki and we're searching
_everything_.
@@ -1743,7 +1706,7 @@
$fields[] = "opening_text${fieldSuffix}^${openingTextWeight}";
$fields[] = "text${fieldSuffix}^${textWeight}";
$fields[] =
"auxiliary_text${fieldSuffix}^${auxiliaryTextWeight}";
- if ( !$this->namespaces || in_array( NS_FILE, $this->namespaces
) ) {
+ if ( !$this->getNamespaces() || in_array( NS_FILE,
$this->getNamespaces() ) ) {
$fileTextWeight = $weight * $searchWeights[ 'file_text'
];
$fields[] = "file_text${fieldSuffix}^${fileTextWeight}";
}
@@ -1755,12 +1718,12 @@
* @return string|false either an index type or false to use all index
types
*/
private function pickIndexTypeFromNamespaces() {
- if ( !$this->namespaces ) {
+ if ( !$this->getNamespaces() ) {
return false; // False selects all index types
}
$indexTypes = array();
- foreach ( $this->namespaces as $namespace ) {
+ foreach ( $this->getNamespaces() as $namespace ) {
$indexTypes[] =
$this->connection->getIndexSuffixForNamespace(
$namespace );
}
@@ -1779,7 +1742,7 @@
if ( $this->limitSearchToLocalWiki ) {
return array();
}
- $extraIndexes = OtherIndexes::getExtraIndexesForNamespaces(
$this->namespaces );
+ $extraIndexes = OtherIndexes::getExtraIndexesForNamespaces(
$this->getNamespaces() );
if ( $extraIndexes ) {
$this->notFilters[] = new \Elastica\Filter\Term(
array( 'local_sites_with_dupe' =>
$this->indexBaseName ) );
@@ -1799,186 +1762,10 @@
return;
}
- $functionScore = new \Elastica\Query\FunctionScore();
- $useFunctionScore = false;
-
- // Customize score by boosting based on incoming links count
- if ( $this->boostLinks ) {
- $useFunctionScore = true;
- if ( $this->config->getElement(
'CirrusSearchWikimediaExtraPlugin', 'field_value_factor_with_default' ) ) {
- $functionScore->addFunction(
'field_value_factor_with_default', array(
- 'field' => 'incoming_links',
- 'modifier' => 'log2p',
- 'missing' => 0,
- ) );
- } else {
- $scoreBoostExpression =
"log10(doc['incoming_links'].value + 2)";
- $functionScore->addScriptScoreFunction( new
\Elastica\Script( $scoreBoostExpression, null, 'expression' ) );
- }
- }
-
- // Customize score by decaying a portion by time since last
update
- if ( $this->preferRecentDecayPortion > 0 &&
$this->preferRecentHalfLife > 0 ) {
- // Convert half life for time in days to decay constant
for time in milliseconds.
- $decayConstant = log( 2 ) / $this->preferRecentHalfLife
/ 86400000;
- $parameters = array(
- 'decayConstant' => $decayConstant,
- 'decayPortion' =>
$this->preferRecentDecayPortion,
- 'nonDecayPortion' => 1 -
$this->preferRecentDecayPortion,
- 'now' => time() * 1000
- );
-
- // e^ct where t is last modified time - now which is
negative
- $exponentialDecayExpression = "exp(decayConstant *
(doc['timestamp'].value - now))";
- if ( $this->preferRecentDecayPortion !== 1.0 ) {
- $exponentialDecayExpression =
"$exponentialDecayExpression * decayPortion + nonDecayPortion";
- }
- $functionScore->addScriptScoreFunction( new
\Elastica\Script( $exponentialDecayExpression,
- $parameters, 'expression' ) );
- $useFunctionScore = true;
- }
-
- // Add boosts for pages that contain certain templates
- if ( $this->boostTemplates ) {
- foreach ( $this->boostTemplates as $name => $boost ) {
- $match = new \Elastica\Query\Match();
- $match->setFieldQuery( 'template', $name );
- $filterQuery = new \Elastica\Filter\Query(
$match );
- $filterQuery->setCached( true );
- $functionScore->addBoostFactorFunction( $boost,
$filterQuery );
- }
- $useFunctionScore = true;
- }
-
- // Add boosts for namespaces
- $namespacesToBoost = $this->namespaces ?:
MWNamespace::getValidNamespaces();
- if ( $namespacesToBoost ) {
- // Group common weights together and build a single
filter per weight
- // to save on filters.
- $weightToNs = array();
- foreach ( $namespacesToBoost as $ns ) {
- $weight = $this->getBoostForNamespace( $ns );
- $weightToNs[ (string)$weight ][] = $ns;
- }
- if ( count( $weightToNs ) > 1 ) {
- unset( $weightToNs[ '1' ] ); // That'd be
redundant.
- foreach ( $weightToNs as $weight => $namespaces
) {
- $filter = new \Elastica\Filter\Terms(
'namespace', $namespaces );
- $functionScore->addBoostFactorFunction(
$weight, $filter );
- $useFunctionScore = true;
- }
- }
- }
-
- // Boost pages in a user's language
- $userLang = $this->config->getUserLanguage();
- $userWeight = $this->config->getElement(
'CirrusSearchLanguageWeight', 'user' );
- if ( $userWeight ) {
- $functionScore->addBoostFactorFunction(
- $userWeight,
- new \Elastica\Filter\Term( array( 'language' =>
$userLang ) )
- );
- $useFunctionScore = true;
- }
- // And a wiki's language, if it's different
- $wikiWeight = $this->config->getElement(
'CirrusSearchLanguageWeight', 'wiki' );
- if ( $userLang != $this->config->get( 'LanguageCode' ) &&
$wikiWeight ) {
- $functionScore->addBoostFactorFunction(
- $wikiWeight,
- new \Elastica\Filter\Term( array( 'language' =>
$this->config->get( 'LanguageCode' ) ) )
- );
- $useFunctionScore = true;
- }
-
- if ( !$useFunctionScore ) {
- // Nothing to do
- return;
- }
-
- // The function score is done as a rescore on top of everything
else
- $this->rescore[] = array(
- 'window_size' => $this->config->get(
'CirrusSearchFunctionRescoreWindowSize' ),
- 'query' => array(
- 'rescore_query' => $functionScore,
- 'query_weight' => 1.0,
- 'rescore_query_weight' => 1.0,
- 'score_mode' => 'multiply',
- )
- );
+ $builder = new RescoreBuilder( $this->searchContext,
$this->config->get( 'CirrusSearchRescoreProfile' ) );
+ $this->rescore = array_merge( $this->rescore, $builder->build()
);
}
- /**
- * @return float[]
- */
- public static function getDefaultBoostTemplates() {
- static $defaultBoostTemplates = null;
- if ( $defaultBoostTemplates === null ) {
- $source = wfMessage( 'cirrussearch-boost-templates'
)->inContentLanguage();
- $defaultBoostTemplates = array();
- if( !$source->isDisabled() ) {
- $lines = Util::parseSettingsInMessage(
$source->plain() );
- $defaultBoostTemplates =
self::parseBoostTemplates(
- implode( ' ', $lines ) );
// Now parse the templates
- }
- }
- return $defaultBoostTemplates;
- }
-
- /**
- * Parse boosted templates. Parse failures silently return no boosted
templates.
- * @param string $text text representation of boosted templates
- * @return float[] array of boosted templates.
- */
- public static function parseBoostTemplates( $text ) {
- $boostTemplates = array();
- $templateMatches = array();
- if ( preg_match_all( '/([^|]+)\|(\d+)% ?/', $text,
$templateMatches, PREG_SET_ORDER ) ) {
- foreach ( $templateMatches as $templateMatch ) {
- $boostTemplates[ $templateMatch[ 1 ] ] =
floatval( $templateMatch[ 2 ] ) / 100;
- }
- }
- return $boostTemplates;
- }
-
- /**
- * Get the weight of a namespace.
- * @param int $namespace the namespace
- * @return float the weight of the namespace
- */
- private function getBoostForNamespace( $namespace ) {
- if ( $this->normalizedNamespaceWeights === null ) {
- $this->normalizedNamespaceWeights = array();
- foreach ( $this->config->get(
'CirrusSearchNamespaceWeights' ) as $ns => $weight ) {
- if ( is_string( $ns ) ) {
- $ns = $this->language->getNsIndex( $ns
);
- // Ignore namespaces that don't exist.
- if ( $ns === false ) {
- continue;
- }
- }
- // Now $ns should always be an integer.
- $this->normalizedNamespaceWeights[ $ns ] =
$weight;
- }
- }
-
- if ( isset( $this->normalizedNamespaceWeights[ $namespace ] ) )
{
- return $this->normalizedNamespaceWeights[ $namespace ];
- }
- if ( MWNamespace::isSubject( $namespace ) ) {
- if ( $namespace === NS_MAIN ) {
- return 1;
- }
- return $this->config->get(
'CirrusSearchDefaultNamespaceWeight' );
- }
- $subjectNs = MWNamespace::getSubject( $namespace );
- if ( isset( $this->normalizedNamespaceWeights[ $subjectNs ] ) )
{
- return $this->config->get(
'CirrusSearchTalkNamespaceWeight' ) * $this->normalizedNamespaceWeights[
$subjectNs ];
- }
- if ( $namespace === NS_TALK ) {
- return $this->config->get(
'CirrusSearchTalkNamespaceWeight' );
- }
- return $this->config->get( 'CirrusSearchDefaultNamespaceWeight'
) * $this->config->get( 'CirrusSearchTalkNamespaceWeight' );
- }
/**
* @param string $search
@@ -2031,7 +1818,7 @@
}
$foundNamespace = $foundNamespace[ 0 ];
$query = substr( $query, $colon + 1 );
- $this->namespaces = array( $foundNamespace->getId() );
+ $this->searchContext->setNamespaces( array(
$foundNamespace->getId() ) );
}
/**
diff --git a/includes/Util.php b/includes/Util.php
index 7b3db35..f317728 100644
--- a/includes/Util.php
+++ b/includes/Util.php
@@ -415,4 +415,37 @@
}
}
}
+
+ /**
+ * Parse boosted templates. Parse failures silently return no boosted
templates.
+ * @param string $text text representation of boosted templates
+ * @return array of boosted templates (key is the template, value is a
float).
+ */
+ public static function parseBoostTemplates( $text ) {
+ $boostTemplates = array();
+ $templateMatches = array();
+ if ( preg_match_all( '/([^|]+)\|(\d+)% ?/', $text,
$templateMatches, PREG_SET_ORDER ) ) {
+ foreach ( $templateMatches as $templateMatch ) {
+ $boostTemplates[ $templateMatch[ 1 ] ] =
floatval( $templateMatch[ 2 ] ) / 100;
+ }
+ }
+ return $boostTemplates;
+ }
+
+ /**
+ * @return float[]
+ */
+ public static function getDefaultBoostTemplates() {
+ static $defaultBoostTemplates = null;
+ if ( $defaultBoostTemplates === null ) {
+ $source = wfMessage( 'cirrussearch-boost-templates'
)->inContentLanguage();
+ $defaultBoostTemplates = array();
+ if( !$source->isDisabled() ) {
+ $lines = Util::parseSettingsInMessage(
$source->plain() );
+ $defaultBoostTemplates =
self::parseBoostTemplates(
+ implode( ' ', $lines ) );
// Now parse the templates
+ }
+ }
+ return $defaultBoostTemplates;
+ }
}
diff --git a/profiles/CommonTermsQueryProfiles.php
b/profiles/CommonTermsQueryProfiles.php
index 9b89ee1..3dca4e7 100644
--- a/profiles/CommonTermsQueryProfiles.php
+++ b/profiles/CommonTermsQueryProfiles.php
@@ -5,8 +5,6 @@
* CirrusSearch - List of profiles for CommonsTermQuery
* see
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-common-terms-query.html
*
- * Set $wgSearchType to 'CirrusSearch'
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -89,7 +87,7 @@
$wgCirrusSearchCommonTermsQueryProfiles;
Util::overrideYesNo( $wgCirrusSearchUseCommonTermsQuery,
$request, 'cirrusUseCommonTermsQuery' );
- if ( $wgCirrusSearchCommonTermsQueryProfile ) {
+ if ( $wgCirrusSearchUseCommonTermsQuery ) {
$profile = $request->getVal(
'cirrusCommonTermsQueryProfile' );
if ( $profile !== null && isset (
$wgCirrusSearchCommonTermsQueryProfiles[$profile] ) ) {
$wgCirrusSearchCommonTermsQueryProfile =
$wgCirrusSearchCommonTermsQueryProfiles[$profile];
diff --git a/profiles/PhraseSuggesterProfiles.php
b/profiles/PhraseSuggesterProfiles.php
index d73c141..5f8cb53 100644
--- a/profiles/PhraseSuggesterProfiles.php
+++ b/profiles/PhraseSuggesterProfiles.php
@@ -4,8 +4,6 @@
/**
* CirrusSearch - List of profiles for "Did you mean" suggestions
*
- * Set $wgSearchType to 'CirrusSearch'
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
diff --git a/profiles/RescoreProfiles.php b/profiles/RescoreProfiles.php
new file mode 100644
index 0000000..de1a78d
--- /dev/null
+++ b/profiles/RescoreProfiles.php
@@ -0,0 +1,172 @@
+<?php
+
+namespace CirrusSearch;
+/**
+ * CirrusSearch - List of profiles for function score rescores.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+
+// @todo: write docs and sensible profiles
+$wgCirrusSearchRescoreProfiles = array(
+ 'default' => array(
+ 'supported_namespaces' => 'all',
+ 'rescore' => array(
+ array(
+ 'window' => 8192,
+ 'window_size_override' =>
'CirrusSearchFunctionRescoreWindowSize',
+ 'query_weight' => 1.0,
+ 'rescore_query_weight' => 1.0,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'default_allinone_chain'
+ )
+ )
+ ),
+ 'overboostlinks' => array(
+ 'supported_namespaces' => 'all',
+ 'rescore' => array(
+ array(
+ 'window' => 8192,
+ 'window_size_override' =>
'CirrusSearchFunctionRescoreWindowSize',
+ 'query_weight' => 1.0,
+ 'rescore_query_weight' => 1.0,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'optional_chain'
+ ),
+ array(
+ 'window' => 8192,
+ 'window_size_override' =>
'CirrusSearchFunctionRescoreWindowSize',
+ 'query_weight' => 0.5,
+ 'rescore_query_weight' => 1.5,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'boostlinks_only'
+ )
+ )
+ ),
+ 'underboostlinks' => array(
+ 'supported_namespaces' => 'all',
+ 'rescore' => array(
+ array(
+ 'window' => 8192,
+ 'window_size_override' =>
'CirrusSearchFunctionRescoreWindowSize',
+ 'query_weight' => 1.0,
+ 'rescore_query_weight' => 1.0,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'optional_chain'
+ ),
+ array(
+ 'window' => 8192,
+ 'window_size_override' =>
'CirrusSearchFunctionRescoreWindowSize',
+ 'query_weight' => 1.5,
+ 'rescore_query_weight' => 0.5,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'boostlinks_only'
+ )
+ )
+ ),
+ // @fixme: just for test, to remove
+ 'negativeboostlinks' => array(
+ 'supported_namespaces' => array( 0 ),
+ 'fallback_profile' => 'default',
+ 'rescore' => array(
+ array(
+ 'window' => 8192,
+ 'window_size_override' =>
'CirrusSearchFunctionRescoreWindowSize',
+ 'query_weight' => 1.0,
+ 'rescore_query_weight' => 1.0,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'optional_chain'
+ ),
+ array(
+ 'window' => 8192,
+ 'query_weight' => 1.0,
+ 'rescore_query_weight' => 1.0,
+ 'score_mode' => 'multiply',
+ 'type' => 'function_score',
+ 'function_chain' => 'negativelinks'
+ )
+ )
+ )
+);
+
+$wgCirrusSearchRescoreFunctionScoreChains = array(
+ 'default_allinone_chain' => array(
+ array( 'type' => 'boostlinks' ),
+ array( 'type' => 'recency' ),
+ array( 'type' => 'templates' ),
+ array( 'type' => 'namespaces' ),
+ array( 'type' => 'language' ),
+ ),
+ 'optional_chain' => array(
+ array( 'type' => 'recency' ),
+ array( 'type' => 'templates' ),
+ array( 'type' => 'namespaces' ),
+ array( 'type' => 'language' ),
+ ),
+ 'boostlinks_only' => array(
+ array( 'type' => 'boostlinks' )
+ ),
+ // @fixme: just for test, to remove
+ 'negativelinks' => array(
+ array(
+ 'type' => 'custom_field',
+ 'params' => array(
+ 'field' => 'incoming_links',
+ 'factor' => -1,
+ 'modifier' => 'none',
+ 'missing' => 1,
+ )
+ ),
+ ),
+ 'pvpr_chain' => array(
+ array(
+ 'type' => 'custom_field',
+ 'params' => array(
+ 'field' => 'pageviews',
+ 'factor' => 1000,
+ 'modifier' => 'none',
+ 'missing' => 1e-10,
+ )
+ ),
+ array(
+ 'type' => 'custom_field',
+ 'params' => array(
+ 'field' => 'pagerank',
+ 'factor' => 1000,
+ 'modifier' => 'none',
+ 'missing' => 1e-10,
+ )
+ ),
+ )
+);
+
+class RescoreProfiles {
+ public static function overrideOptions( $request ) {
+ global $wgCirrusSearchRescoreProfile,
+ $wgCirrusSearchRescoreProfiles;
+
+ $profile = $request->getVal( 'cirrusRescoreProfile' );
+ if ( $profile !== null && isset (
$wgCirrusSearchRescoreProfiles[$profile] ) ) {
+ $wgCirrusSearchRescoreProfile =
$wgCirrusSearchRescoreProfiles[$profile];
+ }
+ }
+}
diff --git a/profiles/SuggestProfiles.php b/profiles/SuggestProfiles.php
index c04a6f9..6e32bd8 100644
--- a/profiles/SuggestProfiles.php
+++ b/profiles/SuggestProfiles.php
@@ -4,8 +4,6 @@
* CirrusSearch - List of profiles for search as you type suggestions
* (Completion suggester)
*
- * Set $wgSearchType to 'CirrusSearch'
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
--
To view, visit https://gerrit.wikimedia.org/r/249460
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I15296898b15761e85555ea42d9fa23c2a22f82b5
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits