jenkins-bot has submitted this change and it was merged.
Change subject: Use structured fields API to build mapping
......................................................................
Use structured fields API to build mapping
Change-Id: Ibb4fc637637a8305b966a2f9702f8dcfac9dc94b
Depends-On: Iad6876aae109ad84c5534619f47c72edc900d704
Bug: T89733
---
M CirrusSearch.php
M autoload.php
M includes/CirrusSearch.php
M includes/Maintenance/MappingConfigBuilder.php
A includes/Search/BooleanIndexField.php
A includes/Search/CirrusIndexField.php
A includes/Search/DatetimeIndexField.php
A includes/Search/IntegerIndexField.php
A includes/Search/KeywordIndexField.php
A includes/Search/NestedIndexField.php
A includes/Search/NumberIndexField.php
A includes/Search/TextIndexField.php
A tests/unit/IndexFieldsTest.php
A tests/unit/Search/SearchFieldsTest.php
14 files changed, 648 insertions(+), 206 deletions(-)
Approvals:
Cindy-the-browser-test-bot: Looks good to me, but someone else must approve
DCausse: Looks good to me, approved
jenkins-bot: Verified
diff --git a/CirrusSearch.php b/CirrusSearch.php
index 5a041b1..2c1f573 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -997,6 +997,19 @@
);
/**
+ * Mapping of result types to CirrusSearch classes.
+ */
+$wgCirrusSearchFieldTypes = array(
+ SearchIndexField::INDEX_TYPE_TEXT =>
\CirrusSearch\Search\TextIndexField::class,
+ SearchIndexField::INDEX_TYPE_KEYWORD =>
\CirrusSearch\Search\KeywordIndexField::class,
+ SearchIndexField::INDEX_TYPE_INTEGER =>
\CirrusSearch\Search\IntegerIndexField::class,
+ SearchIndexField::INDEX_TYPE_NUMBER =>
\CirrusSearch\Search\NumberIndexField::class,
+ SearchIndexField::INDEX_TYPE_DATETIME =>
\CirrusSearch\Search\DatetimeIndexField::class,
+ SearchIndexField::INDEX_TYPE_BOOL =>
\CirrusSearch\Search\BooleanIndexField::class,
+ SearchIndexField::INDEX_TYPE_NESTED =>
\CirrusSearch\Search\NestedIndexField::class,
+);
+
+/**
* Jenkins configuration required to get all the browser tests passing cleanly.
*
* @todo re-enable the code below if/when browser tests are enabled again
diff --git a/autoload.php b/autoload.php
index 751f078..55fa2b6 100644
--- a/autoload.php
+++ b/autoload.php
@@ -94,8 +94,11 @@
'CirrusSearch\\Sanity\\QueueingRemediator' => __DIR__ .
'/includes/Sanity/QueueingRemediator.php',
'CirrusSearch\\Sanity\\Remediator' => __DIR__ .
'/includes/Sanity/Remediator.php',
'CirrusSearch\\SearchConfig' => __DIR__ . '/includes/SearchConfig.php',
+ 'CirrusSearch\\Search\\BooleanIndexField' => __DIR__ .
'/includes/Search/BooleanIndexField.php',
'CirrusSearch\\Search\\BoostTemplatesFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\CirrusIndexField' => __DIR__ .
'/includes/Search/CirrusIndexField.php',
'CirrusSearch\\Search\\CustomFieldFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\DatetimeIndexField' => __DIR__ .
'/includes/Search/DatetimeIndexField.php',
'CirrusSearch\\Search\\EmptyResultSet' => __DIR__ .
'/includes/Search/EmptyResultSet.php',
'CirrusSearch\\Search\\Escaper' => __DIR__ .
'/includes/Search/Escaper.php',
'CirrusSearch\\Search\\FancyTitleResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
@@ -108,12 +111,16 @@
'CirrusSearch\\Search\\GeoRadiusFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\IdResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
'CirrusSearch\\Search\\IncomingLinksFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\IntegerIndexField' => __DIR__ .
'/includes/Search/IntegerIndexField.php',
'CirrusSearch\\Search\\InterwikiResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
'CirrusSearch\\Search\\InvalidRescoreProfileException' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\KeywordIndexField' => __DIR__ .
'/includes/Search/KeywordIndexField.php',
'CirrusSearch\\Search\\LangWeightFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\LogMultFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\LogScaleBoostFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\NamespacesFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
+ 'CirrusSearch\\Search\\NestedIndexField' => __DIR__ .
'/includes/Search/NestedIndexField.php',
+ 'CirrusSearch\\Search\\NumberIndexField' => __DIR__ .
'/includes/Search/NumberIndexField.php',
'CirrusSearch\\Search\\PreferRecentFunctionScoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\RescoreBuilder' => __DIR__ .
'/includes/Search/RescoreBuilders.php',
'CirrusSearch\\Search\\Result' => __DIR__ .
'/includes/Search/Result.php',
@@ -126,6 +133,7 @@
'CirrusSearch\\Search\\SearchTextQueryBuilder' => __DIR__ .
'/includes/Search/SearchTextQueryBuilders.php',
'CirrusSearch\\Search\\SearchTextQueryBuilderFactory' => __DIR__ .
'/includes/Search/SearchTextQueryBuilders.php',
'CirrusSearch\\Search\\SearchTextQueryStringBuilder' => __DIR__ .
'/includes/Search/SearchTextQueryBuilders.php',
+ 'CirrusSearch\\Search\\TextIndexField' => __DIR__ .
'/includes/Search/TextIndexField.php',
'CirrusSearch\\Search\\TitleResultsType' => __DIR__ .
'/includes/Search/ResultsType.php',
'CirrusSearch\\Searcher' => __DIR__ . '/includes/Searcher.php',
'CirrusSearch\\Test\\DummyConnection' => __DIR__ .
'/tests/unit/TestUtils.php',
diff --git a/includes/CirrusSearch.php b/includes/CirrusSearch.php
index c5cc7b1..7646cf7 100644
--- a/includes/CirrusSearch.php
+++ b/includes/CirrusSearch.php
@@ -751,4 +751,19 @@
}
return null;
}
+
+ /**
+ * Create a search field definition
+ * @param string $name
+ * @param int $type
+ * @return SearchIndexField
+ */
+ public function makeSearchFieldMapping( $name, $type ) {
+ $mappings = $this->config->get( 'CirrusSearchFieldTypes' );
+ if ( !isset( $mappings[$type] ) ) {
+ return new NullIndexField();
+ }
+ $klass = $mappings[$type];
+ return new $klass( $name, $type, $this->config );
+ }
}
diff --git a/includes/Maintenance/MappingConfigBuilder.php
b/includes/Maintenance/MappingConfigBuilder.php
index 4e7f645..ec7b9b6 100644
--- a/includes/Maintenance/MappingConfigBuilder.php
+++ b/includes/Maintenance/MappingConfigBuilder.php
@@ -2,9 +2,14 @@
namespace CirrusSearch\Maintenance;
+use CirrusSearch\Search\CirrusIndexField;
+use CirrusSearch\Search\IntegerIndexField;
+use CirrusSearch\Search\KeywordIndexField;
use CirrusSearch\SearchConfig;
+use CirrusSearch\Search\TextIndexField;
use Hooks;
use MediaWiki\MediaWikiServices;
+use SearchIndexField;
/**
* Builds elasticsearch mapping configuration arrays.
@@ -25,26 +30,10 @@
* http://www.gnu.org/copyleft/gpl.html
*/
class MappingConfigBuilder {
- // Bit field parameters for buildStringField.
- const MINIMAL = 0;
- const ENABLE_NORMS = 1;
- const COPY_TO_SUGGEST = 2;
- const SPEED_UP_HIGHLIGHTING = 4;
-
// Bit field parameters for buildConfig
const PREFIX_START_WITH_ANY = 1;
const PHRASE_SUGGEST_USE_TEXT = 2;
-
- /**
- * Maximum number of characters allowed in keyword terms.
- */
- const KEYWORD_IGNORE_ABOVE = 5000;
-
- /**
- * Distance that lucene places between multiple values of the same
field.
- * Set pretty high to prevent accidental phrase queries between those
values.
- */
- const POSITION_INCREMENT_GAP = 10;
+ const OPTIMIZE_FOR_EXPERIMENTAL_HIGHLIGHTER = 4;
/**
* Version number for the core analysis. Increment the major
@@ -59,7 +48,15 @@
*/
private $optimizeForExperimentalHighlighter;
- private $similarity;
+ /**
+ * @var SearchConfig
+ */
+ private $config;
+
+ /**
+ * @var \CirrusSearch
+ */
+ private $engine;
/**
* Constructor
@@ -68,29 +65,28 @@
*/
public function __construct( $optimizeForExperimentalHighlighter,
SearchConfig $config = null ) {
$this->optimizeForExperimentalHighlighter =
$optimizeForExperimentalHighlighter;
- if ( is_null ( $config ) ) {
- $config = MediaWikiServices::getInstance()
- ->getConfigFactory()
- ->makeConfig( 'CirrusSearch' );
+ if ( is_null( $config ) ) {
+ $config =
+
MediaWikiServices::getInstance()->getConfigFactory()->makeConfig(
'CirrusSearch' );
}
- $this->similarity = $config->get(
'CirrusSearchSimilarityProfile' );
+ $this->config = $config;
+ $this->engine = new \CirrusSearch();
+ $this->engine->setConfig( $config );
}
/**
- * Build the mapping config.
- * @param int $flags Flags for building the configuration
- * @return array the mapping config
+ * Get definitions for default index fields.
+ * These fields are always present in the index.
+ * @param int $flags
+ * @return array
*/
- public function buildConfig( $flags = 0 ) {
- global $wgCirrusSearchAllFields,
- $wgCirrusSearchWeights,
- $wgCirrusSearchWikimediaExtraPlugin;
+ private function getDefaultFields( $flags ) {
+ global $wgCirrusSearchWikimediaExtraPlugin;
- $suggestExtra = array( 'analyzer' => 'suggest' );
// Note never to set something as type='object' here because
that isn't returned by elasticsearch
// and is inferred anyway.
$titleExtraAnalyzers = array(
- $suggestExtra,
+ array( 'analyzer' => 'suggest' ),
array( 'analyzer' => 'prefix', 'search_analyzer' =>
'near_match', 'index_options' => 'docs', 'norms' => array( 'enabled' => false )
),
array( 'analyzer' => 'prefix_asciifolding',
'search_analyzer' => 'near_match_asciifolding', 'index_options' => 'docs',
'norms' => array( 'enabled' => false ) ),
array( 'analyzer' => 'near_match', 'index_options' =>
'docs', 'norms' => array( 'enabled' => false ) ),
@@ -104,23 +100,17 @@
'index_options' => 'docs'
);
}
+
$sourceExtraAnalyzers = array();
if ( isset( $wgCirrusSearchWikimediaExtraPlugin[ 'regex' ] ) &&
- in_array( 'build',
$wgCirrusSearchWikimediaExtraPlugin[ 'regex' ] ) ) {
+ in_array( 'build', $wgCirrusSearchWikimediaExtraPlugin[
'regex' ] ) ) {
$sourceExtraAnalyzers[] = array(
'analyzer' => 'trigram',
'index_options' => 'docs',
);
}
- $textExtraAnalyzers = array();
- $textOptions = MappingConfigBuilder::ENABLE_NORMS |
MappingConfigBuilder::SPEED_UP_HIGHLIGHTING;
- if ( $flags & self::PHRASE_SUGGEST_USE_TEXT ) {
- $textExtraAnalyzers[] = $suggestExtra;
- $textOptions |= MappingConfigBuilder::COPY_TO_SUGGEST;
- }
-
- $page = array(
+ $page = [
'dynamic' => false,
'_all' => array( 'enabled' => false ),
'properties' => array(
@@ -128,58 +118,85 @@
'type' => 'date',
'format' => 'dateOptionalTime',
),
- 'wiki' => $this->buildKeywordField(),
- 'namespace' => $this->buildLongField(),
- 'namespace_text' => $this->buildKeywordField(),
+ 'wiki' => $this->buildKeywordField( 'wiki'
)->getMapping( $this->engine ),
+ 'namespace' => $this->buildLongField(
'namespace' )->getMapping( $this->engine ),
+ 'namespace_text' => $this->buildKeywordField(
'namespace_text' )
+ ->getMapping( $this->engine ),
'title' => $this->buildStringField( 'title',
- MappingConfigBuilder::ENABLE_NORMS |
MappingConfigBuilder::COPY_TO_SUGGEST,
- $titleExtraAnalyzers ),
- 'text' => array_merge_recursive(
- $this->buildStringField( 'text',
$textOptions, $textExtraAnalyzers ),
- array( 'fields' => array( 'word_count'
=> array(
- 'type' => 'token_count',
- 'store' => true,
- 'analyzer' => 'plain',
- ) ) )
- ),
- 'opening_text' => $this->buildStringField(
'opening_text', MappingConfigBuilder::ENABLE_NORMS ),
- 'auxiliary_text' => $this->buildStringField(
'auxiliary_text', $textOptions ),
- 'file_text' => $this->buildStringField(
'file_text', $textOptions ),
- 'source_text' => $this->buildStringField(
'source_text', MappingConfigBuilder::MINIMAL,
+ TextIndexField::ENABLE_NORMS |
TextIndexField::COPY_TO_SUGGEST,
+ $titleExtraAnalyzers
)->setMappingFlags( $flags )->getMapping( $this->engine ),
+ 'text' => array_merge_recursive(
$this->buildStringField( 'text', null,
+ ( $flags &
self::PHRASE_SUGGEST_USE_TEXT ) ? [ 'analyzer' => 'suggest' ] : [ ] )
+ ->setMappingFlags( $flags
)->getMapping( $this->engine ), array(
+ 'fields' => array(
+ 'word_count' => array(
+ 'type' =>
'token_count',
+ 'store' => true,
+ 'analyzer' =>
'plain',
+ )
+ )
+ ) ),
+ 'text_bytes' => $this->buildLongField(
'text_bytes' )
+ ->setFlag(
SearchIndexField::FLAG_NO_INDEX )
+ ->getMapping( $this->engine ),
+ 'source_text' => $this->buildStringField(
'source_text', 0,
$sourceExtraAnalyzers
- ),
- 'category' => $this->buildStringField(
'category', $textOptions, array(
- array(
- 'analyzer' =>
'lowercase_keyword',
- 'norms' => array( 'enabled' =>
false ),
- 'index_options' => 'docs',
- 'ignore_above' =>
self::KEYWORD_IGNORE_ABOVE,
- ) )
- ),
- 'template' =>
$this->buildLowercaseKeywordField(),
- 'outgoing_link' => $this->buildKeywordField(),
- 'external_link' => $this->buildKeywordField(),
- 'heading' => $this->buildStringField(
'heading', MappingConfigBuilder::SPEED_UP_HIGHLIGHTING ),
- 'text_bytes' => $this->buildLongField( false ),
+ )->setMappingFlags( $flags )->getMapping(
$this->engine ),
'redirect' => array(
'dynamic' => false,
'properties' => array(
- 'namespace' =>
$this->buildLongField(),
+ 'namespace' =>
$this->buildLongField( 'namespace' )
+ ->getMapping(
$this->engine ),
'title' =>
$this->buildStringField( 'redirect.title',
- $textOptions |
MappingConfigBuilder::COPY_TO_SUGGEST,
- $titleExtraAnalyzers ),
+
TextIndexField::ENABLE_NORMS | TextIndexField::SPEED_UP_HIGHLIGHTING |
+
TextIndexField::COPY_TO_SUGGEST, $titleExtraAnalyzers )
+ ->setMappingFlags(
$flags )
+ ->getMapping(
$this->engine ),
)
),
- 'incoming_links' => $this->buildLongField(),
- 'local_sites_with_dupe' =>
$this->buildLowercaseKeywordField(),
+ 'incoming_links' => $this->buildLongField(
'incoming_links' )
+ ->getMapping( $this->engine ),
+ 'local_sites_with_dupe' =>
$this->buildKeywordField( 'local_sites_with_dupe' )
+ ->setFlag(
SearchIndexField::FLAG_CASEFOLD )
+ ->getMapping( $this->engine ),
'suggest' => array(
'type' => 'string',
'analyzer' => 'suggest',
),
- 'language' => $this->buildKeywordField(),
- 'wikibase_item' => $this->buildKeywordField(),
- ),
- );
+ // FIXME: this should be moved to Wikibase
Client
+ 'wikibase_item' => $this->buildKeywordField(
'wikibase_item' )
+ ->getMapping( $this->engine ),
+ )
+ ];
+
+ return $page;
+ }
+
+ /**
+ * Build the mapping config.
+ * @param int $flags Flags for building the configuration
+ * @return array the mapping config
+ */
+ public function buildConfig( $flags = 0 ) {
+ global $wgCirrusSearchAllFields,
+ $wgCirrusSearchWeights;
+
+ if ( $this->optimizeForExperimentalHighlighter ) {
+ $flags |= self::OPTIMIZE_FOR_EXPERIMENTAL_HIGHLIGHTER;
+ }
+ $page = $this->getDefaultFields( $flags );
+
+ $fields = $this->engine->getSearchIndexFields();
+
+ foreach ( $fields as $fieldName => $field ) {
+ if ( $field instanceof CirrusIndexField ) {
+ $field->setMappingFlags( $flags );
+ }
+ $config = $field->getMapping( $this->engine );
+ if ( $config ) {
+ $page['properties'][$fieldName] = $config;
+ }
+ }
if ( $wgCirrusSearchAllFields[ 'build' ] ) {
// Now layer all the fields into the all field once per
weight. Querying it isn't strictly the
@@ -190,7 +207,9 @@
// This field can't be used for the fvh/experimental
highlighter for several reasons:
// 1. It is built with copy_to and not stored.
// 2. The term frequency information is all whoppy
compared to the "real" source text.
- $page[ 'properties' ][ 'all' ] =
$this->buildStringField( 'all', MappingConfigBuilder::ENABLE_NORMS );
+ $allField = $this->buildStringField( 'all',
TextIndexField::ENABLE_NORMS );
+ $page['properties']['all'] =
+ $allField->setMappingFlags( $flags
)->getMapping( $this->engine );
$page = $this->setupCopyTo( $page,
$wgCirrusSearchWeights, 'all' );
// Now repeat for near_match fields. The same
considerations above apply except near_match
@@ -199,17 +218,17 @@
'type' => 'string',
'analyzer' => 'near_match',
'index_options' => 'freqs',
- 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
+ 'position_increment_gap' =>
TextIndexField::POSITION_INCREMENT_GAP,
'norms' => array( 'enabled' => false ),
- 'similarity' => $this->getSimilarity(
'all_near_match' ),
+ 'similarity' => $allField->getSimilarity(
'all_near_match' ),
'fields' => array(
'asciifolding' => array(
'type' => 'string',
'analyzer' =>
'near_match_asciifolding',
'index_options' => 'freqs',
- 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
+ 'position_increment_gap' =>
TextIndexField::POSITION_INCREMENT_GAP,
'norms' => array( 'enabled' =>
false ),
- 'similarity' =>
$this->getSimilarity( 'all_near_match', 'asciifolding' ),
+ 'similarity' =>
$allField->getSimilarity( 'all_near_match', 'asciifolding' ),
),
),
);
@@ -219,6 +238,7 @@
);
$page = $this->setupCopyTo( $page, $nearMatchFields,
'all_near_match' );
}
+
$config[ 'page' ] = $page;
$config[ 'namespace' ] = array(
@@ -230,36 +250,14 @@
'analyzer' => 'near_match_asciifolding',
'norms' => array( 'enabled' => false ),
'index_options' => 'docs',
- 'ignore_above' =>
self::KEYWORD_IGNORE_ABOVE,
+ 'ignore_above' =>
KeywordIndexField::KEYWORD_IGNORE_ABOVE,
),
),
);
Hooks::run( 'CirrusSearchMappingConfig', array( &$config, $this
) );
+
return $config;
- }
-
-
- /**
- * Get the field similarity
- * @param string $field
- * @param string $analyzer
- * @return string
- */
- private function getSimilarity( $field, $analyzer = null ) {
- $fieldSimilarity = 'default';
- if ( isset( $this->similarity['fields'] ) ) {
- if( isset( $this->similarity['fields'][$field] ) ) {
- $fieldSimilarity =
$this->similarity['fields'][$field];
- } else if ( $this->similarity['fields']['__default__']
) {
- $fieldSimilarity =
$this->similarity['fields']['__default__'];
- }
-
- if ( $analyzer != null && isset(
$this->similarity['fields']["$field.$analyzer"] ) ) {
- $fieldSimilarity =
$this->similarity['fields']["$field.$analyzer"];
- }
- }
- return $fieldSimilarity;
}
/**
@@ -296,112 +294,32 @@
* SPEED_UP_HIGHLIGHTING: Store extra data in the field to speed up
highlighting. This is important for long
* strings or fields with many values.
* @param array $extra Extra analyzers for this field beyond the basic
text and plain.
- * @return array definition of the field
+ * @return TextIndexField definition of the field
*/
- public function buildStringField( $fieldName, $options, $extra =
array() ) {
- // multi_field is dead in 1.0 so we do this which actually
looks less gnarly.
- $field = array(
- 'type' => 'string',
- 'analyzer' => 'text',
- 'search_analyzer' => 'text_search',
- 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
- 'similarity' => $this->getSimilarity( $fieldName ),
- 'fields' => array(
- 'plain' => array(
- 'type' => 'string',
- 'analyzer' => 'plain',
- 'search_analyzer' => 'plain_search',
- 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
- 'similarity' => $this->getSimilarity(
$fieldName, 'plain' ),
- ),
- )
- );
- $disableNorms = ( $options & MappingConfigBuilder::ENABLE_NORMS
) === 0;
- if ( $disableNorms ) {
- $disableNorms = array( 'norms' => array( 'enabled' =>
false ) );
- $field = array_merge( $field, $disableNorms );
- $field[ 'fields' ][ 'plain' ] = array_merge( $field[
'fields' ][ 'plain' ], $disableNorms );
- }
- if ( $options & MappingConfigBuilder::COPY_TO_SUGGEST ) {
- $field[ 'copy_to' ] = array( 'suggest' );
- }
- foreach ( $extra as $extraField ) {
- $extraName = $extraField[ 'analyzer' ];
- $field[ 'fields' ][ $extraName ] = array_merge( array(
- 'similarity' => $this->getSimilarity(
$fieldName, $extraName ),
- 'type' => 'string',
- 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
- ), $extraField );
- if ( $disableNorms ) {
- $field[ 'fields' ][ $extraName ] = array_merge(
- $field[ 'fields' ][ $extraName ],
- $disableNorms
- );
- }
- }
- if ( $this->optimizeForExperimentalHighlighter ) {
- if ( $options &
MappingConfigBuilder::SPEED_UP_HIGHLIGHTING ) {
- $field[ 'index_options' ] = 'offsets';
- $fieldNames = array( 'plain', 'prefix',
'prefix_asciifolding', 'near_match', 'near_match_asciifolding' );
- foreach ( $fieldNames as $fieldName ) {
- if ( isset( $field[ 'fields' ][
$fieldName ] ) ) {
- $field[ 'fields' ][ $fieldName
][ 'index_options' ] = 'offsets';
- }
- }
- }
- } else {
- // We use the FVH on all fields so turn on term vectors
- $field[ 'term_vector' ] = 'with_positions_offsets';
- $fieldNames = array( 'plain', 'prefix',
'prefix_asciifolding', 'near_match', 'near_match_asciifolding' );
- foreach ( $fieldNames as $fieldName ) {
- if ( isset( $field[ 'fields' ][ $fieldName ] )
) {
- $field[ 'fields' ][ $fieldName ][
'term_vector' ] = 'with_positions_offsets';
- }
- }
- }
+ public function buildStringField( $fieldName, $options = null, $extra =
[] ) {
+ $field =
+ new TextIndexField( $fieldName,
SearchIndexField::INDEX_TYPE_TEXT, $this->config,
+ $extra );
+ $field->setTextOptions( $options );
return $field;
}
/**
- * Create a string field that only lower cases and does ascii folding
(if enabled) for the language.
- * @return array definition of the field
+ * Create a long field.
+ * @param string $name Field name
+ * @return IntegerIndexField
*/
- public function buildLowercaseKeywordField() {
- return array(
- 'type' => 'string',
- 'analyzer' => 'lowercase_keyword',
- 'norms' => array( 'enabled' => false ), // Omit the
length norm because there is only even one token
- 'index_options' => 'docs', // Omit the frequency and
position information because neither are useful
- 'ignore_above' => self::KEYWORD_IGNORE_ABOVE,
- );
- }
-
- /**
- * Create a string field that does no analyzing whatsoever.
- * @return array definition of the field
- */
- public function buildKeywordField() {
- return array(
- 'type' => 'string',
- 'analyzer' => 'keyword',
- 'norms' => array( 'enabled' => false ), // Omit the
length norm because there is only even one token
- 'index_options' => 'docs', // Omit the frequency and
position information because neither are useful
- 'ignore_above' => self::KEYWORD_IGNORE_ABOVE,
- );
+ public function buildLongField( $name ) {
+ return new IntegerIndexField( $name,
SearchIndexField::INDEX_TYPE_INTEGER, $this->config );
}
/**
* Create a long field.
- * @param boolean $index should this be indexed
- * @return array definition of the field
+ * @param string $name Field name
+ * @return KeywordIndexField
*/
- public function buildLongField( $index = true ) {
- $config = array(
- 'type' => 'long',
- );
- if ( !$index ) {
- $config[ 'index' ] = 'no';
- }
- return $config;
+ public function buildKeywordField( $name ) {
+ return new KeywordIndexField( $name,
SearchIndexField::INDEX_TYPE_KEYWORD, $this->config );
}
}
+
diff --git a/includes/Search/BooleanIndexField.php
b/includes/Search/BooleanIndexField.php
new file mode 100644
index 0000000..1a01edf
--- /dev/null
+++ b/includes/Search/BooleanIndexField.php
@@ -0,0 +1,10 @@
+<?php
+namespace CirrusSearch\Search;
+
+/**
+ * Index field representing boolean value.
+ * @package CirrusSearch
+ */
+class BooleanIndexField extends CirrusIndexField {
+ protected $typeName = 'boolean';
+}
\ No newline at end of file
diff --git a/includes/Search/CirrusIndexField.php
b/includes/Search/CirrusIndexField.php
new file mode 100644
index 0000000..240167d
--- /dev/null
+++ b/includes/Search/CirrusIndexField.php
@@ -0,0 +1,71 @@
+<?php
+namespace CirrusSearch\Search;
+
+use SearchEngine;
+use SearchIndexFieldDefinition;
+use SearchIndexField;
+use CirrusSearch\SearchConfig;
+
+/**
+ * Basic ElasticSearch index field
+ * @since 1.28
+ */
+abstract class CirrusIndexField extends SearchIndexFieldDefinition {
+
+ /**
+ * Name of the type in Elastic
+ * @var string
+ */
+ protected $typeName = 'unknown';
+
+ /**
+ * @var SearchConfig
+ */
+ protected $config;
+
+ /**
+ * Specific mapping flags
+ * @var int
+ */
+ protected $mappingFlags;
+
+ /**
+ * CirrusIndexField constructor.
+ * @param string $name
+ * @param int $type
+ * @param SearchConfig $config
+ */
+ public function __construct( $name, $type, SearchConfig $config ) {
+ parent::__construct( $name, $type );
+ $this->config = $config;
+ }
+
+ /**
+ * Set flags for specific mapping
+ * @param $flags
+ * @return $this
+ */
+ public function setMappingFlags( $flags ) {
+ $this->mappingFlags = $flags;
+ return $this;
+ }
+
+ /**
+ * Get mapping for specific search engine
+ * @param SearchEngine $engine
+ * @return array
+ */
+ public function getMapping( SearchEngine $engine ) {
+ if ( !( $engine instanceof \CirrusSearch ) ) {
+ throw new \LogicException( "Cannot map CirrusSearch
fields for another engine." );
+ }
+
+ $config = [
+ 'type' => $this->typeName,
+ ];
+ if ( $this->checkFlag( SearchIndexField::FLAG_NO_INDEX ) ) {
+ $config['index'] = 'no';
+ }
+ return $config;
+ }
+}
\ No newline at end of file
diff --git a/includes/Search/DatetimeIndexField.php
b/includes/Search/DatetimeIndexField.php
new file mode 100644
index 0000000..ba2a496
--- /dev/null
+++ b/includes/Search/DatetimeIndexField.php
@@ -0,0 +1,18 @@
+<?php
+namespace CirrusSearch\Search;
+
+use SearchEngine;
+/**
+ * Index field representing datetime field.
+ * @package CirrusSearch
+ */
+class DatetimeIndexField extends CirrusIndexField {
+
+ protected $typeName = 'date';
+
+ public function getMapping( SearchEngine $engine ) {
+ $config = parent::getMapping( $engine );
+ $config['format'] = 'dateOptionalTime';
+ return $config;
+ }
+}
\ No newline at end of file
diff --git a/includes/Search/IntegerIndexField.php
b/includes/Search/IntegerIndexField.php
new file mode 100644
index 0000000..748a94f
--- /dev/null
+++ b/includes/Search/IntegerIndexField.php
@@ -0,0 +1,10 @@
+<?php
+namespace CirrusSearch\Search;
+
+/**
+ * Index field representing integer.
+ * @package CirrusSearch
+ */
+class IntegerIndexField extends CirrusIndexField {
+ protected $typeName = 'long';
+}
\ No newline at end of file
diff --git a/includes/Search/KeywordIndexField.php
b/includes/Search/KeywordIndexField.php
new file mode 100644
index 0000000..dd672fa
--- /dev/null
+++ b/includes/Search/KeywordIndexField.php
@@ -0,0 +1,30 @@
+<?php
+namespace CirrusSearch\Search;
+
+/**
+ * Index field representing keyword.
+ * Keywords use special analyzer.
+ * @package CirrusSearch
+ */
+class KeywordIndexField extends CirrusIndexField {
+ protected $typeName = 'string';
+
+ /**
+ * Maximum number of characters allowed in keyword terms.
+ */
+ const KEYWORD_IGNORE_ABOVE = 5000;
+
+ public function getMapping( \SearchEngine $engine ) {
+ $config = parent::getMapping( $engine );
+ $config['analyzer'] =
+ $this->checkFlag( self::FLAG_CASEFOLD ) ?
'lowercase_keyword' : 'keyword';
+ $config += [
+ 'norms' => [ 'enabled' => false ],
+ // Omit the length norm because there is only even one
token
+ 'index_options' => 'docs',
+ // Omit the frequency and position information because
neither are useful
+ 'ignore_above' => self::KEYWORD_IGNORE_ABOVE,
+ ];
+ return $config;
+ }
+}
\ No newline at end of file
diff --git a/includes/Search/NestedIndexField.php
b/includes/Search/NestedIndexField.php
new file mode 100644
index 0000000..21bbcdc
--- /dev/null
+++ b/includes/Search/NestedIndexField.php
@@ -0,0 +1,26 @@
+<?php
+namespace CirrusSearch\Search;
+
+use SearchIndexField;
+use SearchEngine;
+
+class NestedIndexField extends CirrusIndexField {
+ protected $typeName = "nested";
+
+ /**
+ * Add sub-field for nested field
+ * @param string $name Field name
+ * @param SearchIndexField $subfield Field object
+ */
+ public function addSubfield($name, SearchIndexField $subfield) {
+ $this->subfields[$name] = $subfield;
+ }
+
+ public function getMapping( SearchEngine $engine ) {
+ $fields = parent::getMapping( $engine );
+ foreach ( $this->subfields as $name => $sub ) {
+ $fields['properties'][$name] = $sub->getMapping(
$engine );
+ }
+ return $fields;
+ }
+}
\ No newline at end of file
diff --git a/includes/Search/NumberIndexField.php
b/includes/Search/NumberIndexField.php
new file mode 100644
index 0000000..f391aae
--- /dev/null
+++ b/includes/Search/NumberIndexField.php
@@ -0,0 +1,10 @@
+<?php
+namespace CirrusSearch\Search;
+
+/**
+ * Index field representing double.
+ * @package CirrusSearch
+ */
+class NumberIndexField extends CirrusIndexField {
+ protected $typeName = 'double';
+}
\ No newline at end of file
diff --git a/includes/Search/TextIndexField.php
b/includes/Search/TextIndexField.php
new file mode 100644
index 0000000..b84de69
--- /dev/null
+++ b/includes/Search/TextIndexField.php
@@ -0,0 +1,217 @@
+<?php
+namespace CirrusSearch\Search;
+
+use CirrusSearch\Maintenance\MappingConfigBuilder;
+use SearchIndexField;
+use CirrusSearch\SearchConfig;
+use SearchEngine;
+
+/**
+ * Index field representing keyword.
+ * Keywords use special analyzer.
+ * @package CirrusSearch
+ */
+class TextIndexField extends CirrusIndexField {
+ /**
+ * Distance that lucene places between multiple values of the same
field.
+ * Set pretty high to prevent accidental phrase queries between those
values.
+ */
+ const POSITION_INCREMENT_GAP = 10;
+
+ /* Bit field parameters for string fields.
+ * ENABLE_NORMS: Enable norms on the field. Good for text you search
against but useless
+ * for fields that don't get involved in the score.
+ * COPY_TO_SUGGEST: Copy the contents of this field to the suggest
field for "Did you mean".
+ * SPEED_UP_HIGHLIGHTING: Store extra data in the field to speed up
highlighting. This is important for long
+ * strings or fields with many values.
+ */
+ const ENABLE_NORMS = 0x1000000;
+ // FIXME: when exactly we want to disable norms for text fields?
+ const COPY_TO_SUGGEST = 0x2000000;
+ const SPEED_UP_HIGHLIGHTING = 0x4000000;
+ const STRING_FIELD_MASK = 0xFFFFFF;
+
+ /**
+ * Extra definitions.
+ * @var array
+ */
+ protected $extra;
+ /**
+ * Similarity config
+ * @var array
+ */
+ private $similarity;
+ /**
+ * Text options for this field
+ * @var int
+ */
+ private $textOptions;
+
+ /**
+ * Name of the type in Elastic
+ * @var string
+ */
+ protected $typeName = 'string';
+
+ public function __construct( $name, $type, SearchConfig $config, $extra
= [] ) {
+ parent::__construct($name, $type, $config );
+
+ $this->similarity = $config->get(
'CirrusSearchSimilarityProfile' );
+ $this->extra = $extra;
+ }
+
+ /**
+ * Set text options for this field if non-default
+ * @param $options
+ * @return $this
+ */
+ public function setTextOptions( $options ) {
+ $this->textOptions = $options;
+ return $this;
+ }
+
+ /**
+ * Get text options for this field
+ * @param $mappingFlags
+ * @return int
+ */
+ protected function getTextOptions( $mappingFlags ) {
+ if ( !is_null( $this->textOptions ) ) {
+ return $this->textOptions;
+ }
+ $options = self::ENABLE_NORMS | self::SPEED_UP_HIGHLIGHTING;
+ if ( $mappingFlags &
MappingConfigBuilder::PHRASE_SUGGEST_USE_TEXT &&
+ !$this->checkFlag( SearchIndexField::FLAG_SCORING )
+ ) {
+ // SCORING fields are not copied since this info is
already in other fields
+ $options |= self::COPY_TO_SUGGEST;
+ }
+ if ( $this->checkFlag( SearchIndexField::FLAG_NO_HIGHLIGHT ) ) {
+ // Disable highlighting is asked to
+ $options &= ~self::SPEED_UP_HIGHLIGHTING;
+ }
+ return $options;
+ }
+
+ /**
+ * @param SearchEngine $engine
+ * @return array|void
+ */
+ public function getMapping( SearchEngine $engine ) {
+ if (!($engine instanceof \CirrusSearch)) {
+ throw new \LogicException("Cannot map CirrusSearch
fields for another engine.");
+ }
+ /**
+ * @var \CirrusSearch $engine
+ */
+ $this->flags =
+ ( $this->flags & self::STRING_FIELD_MASK ) |
$this->getTextOptions( $this->mappingFlags );
+
+ $field = parent::getMapping( $engine );
+
+ if ( $this->checkFlag( self::COPY_TO_SUGGEST ) ) {
+ $field[ 'copy_to' ] = array( 'suggest' );
+ }
+
+ if ( $this->checkFlag( self::FLAG_NO_INDEX ) ) {
+ // no need to configure further a not-indexed field
+ return $field;
+ }
+
+ $extra = $this->extra;
+ if ( $this->mappingFlags &
MappingConfigBuilder::PREFIX_START_WITH_ANY ) {
+ $extra[] = [
+ 'analyzer' => 'word_prefix',
+ 'search_analyzer' => 'plain_search',
+ 'index_options' => 'docs'
+ ];
+ }
+ if ( $this->checkFlag( SearchIndexField::FLAG_CASEFOLD ) ) {
+ $extra[] = [
+ 'analyzer' => 'lowercase_keyword',
+ 'norms' => [ 'enabled' => false ],
+ 'index_options' => 'docs',
+ 'ignore_above' =>
KeywordIndexField::KEYWORD_IGNORE_ABOVE,
+ ];
+ }
+
+ // multi_field is dead in 1.0 so we do this which actually
looks less gnarly.
+ $field += array(
+ 'analyzer' => 'text',
+ 'search_analyzer' => 'text_search',
+ 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
+ 'similarity' => $this->getSimilarity( $this->name ),
+ 'fields' => array(
+ 'plain' => array(
+ 'type' => 'string',
+ 'analyzer' => 'plain',
+ 'search_analyzer' => 'plain_search',
+ 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
+ 'similarity' => $this->getSimilarity(
$this->name, 'plain' ),
+ ),
+ )
+ );
+ $disableNorms = !$this->checkFlag( self::ENABLE_NORMS );
+ if ( $disableNorms ) {
+ $disableNorms = array( 'norms' => array( 'enabled' =>
false ) );
+ $field = array_merge( $field, $disableNorms );
+ $field[ 'fields' ][ 'plain' ] = array_merge( $field[
'fields' ][ 'plain' ], $disableNorms );
+ }
+ foreach ( $extra as $extraField ) {
+ $extraName = $extraField[ 'analyzer' ];
+
+ $field[ 'fields' ][ $extraName ] = array_merge( array(
+ 'similarity' => $this->getSimilarity(
$this->name, $extraName ),
+ 'type' => 'string',
+ 'position_increment_gap' =>
self::POSITION_INCREMENT_GAP,
+ ), $extraField );
+ if ( $disableNorms ) {
+ $field[ 'fields' ][ $extraName ] = array_merge(
+ $field[ 'fields' ][ $extraName ],
$disableNorms );
+ }
+ }
+ if ( $this->mappingFlags &
MappingConfigBuilder::OPTIMIZE_FOR_EXPERIMENTAL_HIGHLIGHTER ) {
+ if ( $this->checkFlag( self::SPEED_UP_HIGHLIGHTING ) ) {
+ $field[ 'index_options' ] = 'offsets';
+ $fieldNames = array( 'plain', 'prefix',
'prefix_asciifolding', 'near_match', 'near_match_asciifolding' );
+ foreach ( $fieldNames as $fieldName ) {
+ if ( isset( $field[ 'fields' ][
$fieldName ] ) ) {
+ $field[ 'fields' ][ $fieldName
][ 'index_options' ] = 'offsets';
+ }
+ }
+ }
+ } else {
+ // We use the FVH on all fields so turn on term vectors
+ $field[ 'term_vector' ] = 'with_positions_offsets';
+ $fieldNames = array( 'plain', 'prefix',
'prefix_asciifolding', 'near_match', 'near_match_asciifolding' );
+ foreach ( $fieldNames as $fieldName ) {
+ if ( isset( $field[ 'fields' ][ $fieldName ] )
) {
+ $field[ 'fields' ][ $fieldName ][
'term_vector' ] = 'with_positions_offsets';
+ }
+ }
+ }
+ return $field;
+ }
+
+ /**
+ * Get the field similarity
+ * @param string $field
+ * @param string $analyzer
+ * @return string
+ */
+ public function getSimilarity( $field, $analyzer = null ) {
+ $fieldSimilarity = 'default';
+ if ( isset( $this->similarity['fields'] ) ) {
+ if( isset( $this->similarity['fields'][$field] ) ) {
+ $fieldSimilarity =
$this->similarity['fields'][$field];
+ } else if ( $this->similarity['fields']['__default__']
) {
+ $fieldSimilarity =
$this->similarity['fields']['__default__'];
+ }
+
+ if ( $analyzer != null && isset(
$this->similarity['fields']["$field.$analyzer"] ) ) {
+ $fieldSimilarity =
$this->similarity['fields']["$field.$analyzer"];
+ }
+ }
+ return $fieldSimilarity;
+ }
+}
diff --git a/tests/unit/IndexFieldsTest.php b/tests/unit/IndexFieldsTest.php
new file mode 100644
index 0000000..c7de4a8
--- /dev/null
+++ b/tests/unit/IndexFieldsTest.php
@@ -0,0 +1,52 @@
+<?php
+
+use MediaWiki\MediaWikiServices;
+
+class IndexFieldsTest extends MediaWikiTestCase {
+
+ public function getTypes() {
+ return [
+ [ SearchIndexField::INDEX_TYPE_TEXT, 'string',
'CirrusSearch\\Search\\TextIndexField' ],
+ [ SearchIndexField::INDEX_TYPE_KEYWORD, 'string',
'CirrusSearch\\Search\\KeywordIndexField' ],
+ [ SearchIndexField::INDEX_TYPE_INTEGER, 'long',
'CirrusSearch\\Search\\IntegerIndexField' ],
+ [ SearchIndexField::INDEX_TYPE_NUMBER, 'double',
'CirrusSearch\\Search\\NumberIndexField' ],
+ [ SearchIndexField::INDEX_TYPE_DATETIME, 'date',
'CirrusSearch\\Search\\DatetimeIndexField' ],
+ [ SearchIndexField::INDEX_TYPE_NESTED, 'nested',
'CirrusSearch\\Search\\NestedIndexField' ],
+ [ SearchIndexField::INDEX_TYPE_BOOL, 'boolean',
'CirrusSearch\\Search\\BooleanIndexField' ],
+ ];
+ }
+
+ /**
+ * @dataProvider getTypes
+ * @param int $type Field type
+ * @param string $typeName Internal type name
+ * @param string $klass Class name
+ */
+ public function testFieldTypes( $type, $typeName, $klass ) {
+ $config =
+
MediaWikiServices::getInstance()->getConfigFactory()->makeConfig(
'CirrusSearch' );
+ $engine = new CirrusSearch();
+ /**
+ * @var \CirrusSearch\Search\CirrusIndexField $idxField
+ */
+ $idxField = new $klass( "test$typeName", $type, $config );
+ $map = $idxField->getMapping( $engine );
+ $this->assertEquals( $typeName, $map['type'] );
+ $this->assertEquals( $type, $idxField->getIndexType() );
+ $this->assertEquals( "test$typeName", $idxField->getName() );
+ }
+
+ /**
+ * @dataProvider getTypes
+ * @param int $type Field type
+ * @param string $typeName Internal type name
+ * @param string $klass Class name
+ */
+ public function testFieldEngine( $type, $typeName, $klass ) {
+ $engine = new CirrusSearch();
+ $field = $engine->makeSearchFieldMapping( "test$typeName",
$type );
+ $this->assertInstanceOf( $klass, $field );
+ $this->assertEquals( $type, $field->getIndexType() );
+ $this->assertEquals( "test$typeName", $field->getName() );
+ }
+}
\ No newline at end of file
diff --git a/tests/unit/Search/SearchFieldsTest.php
b/tests/unit/Search/SearchFieldsTest.php
new file mode 100644
index 0000000..37fd7af
--- /dev/null
+++ b/tests/unit/Search/SearchFieldsTest.php
@@ -0,0 +1,44 @@
+<?php
+
+namespace CirrusSearch\Search;
+
+use SearchIndexField;
+
+class SearchFieldsTest extends \PHPUnit_Framework_TestCase {
+
+ public function getFields() {
+ return [
+ [ SearchIndexField::INDEX_TYPE_TEXT, 'string' ],
+ [ SearchIndexField::INDEX_TYPE_KEYWORD, 'string' ],
+ [ SearchIndexField::INDEX_TYPE_INTEGER, 'long' ],
+ [ SearchIndexField::INDEX_TYPE_NUMBER, 'double' ],
+ [ SearchIndexField::INDEX_TYPE_DATETIME, 'date' ],
+ [ SearchIndexField::INDEX_TYPE_BOOL, 'boolean' ],
+ [ SearchIndexField::INDEX_TYPE_NESTED, 'nested' ],
+ ];
+ }
+
+ /**
+ * @dataProvider getFields
+ * @param int $type Generic type
+ * @param string $elasticType Elasticsearch type
+ */
+ public function testFields( $type, $elasticType ) {
+ $engine = new \CirrusSearch();
+ $field = $engine->makeSearchFieldMapping( 'testField-' . $type,
$type );
+ $this->assertInstanceOf( CirrusIndexField::class, $field );
+ $mapping = $field->getMapping( $engine );
+ $this->assertEquals( $elasticType, $mapping['type'] );
+
+ $field->setFlag( SearchIndexField::FLAG_NO_INDEX );
+ $mapping = $field->getMapping( $engine );
+ $this->assertEquals( 'no', $mapping['index'] );
+ }
+
+ public function testBadField() {
+ $engine = new \CirrusSearch();
+ $field = $engine->makeSearchFieldMapping( 'testBadField', 42 );
+ $this->assertInstanceOf( \NullIndexField::class, $field );
+ $this->assertEquals( null, $field->getMapping( $engine ) );
+ }
+}
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/288567
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibb4fc637637a8305b966a2f9702f8dcfac9dc94b
Gerrit-PatchSet: 29
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Cindy-the-browser-test-bot <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Gehel <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: Smalyshev <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits