jenkins-bot has submitted this change and it was merged.
Change subject: Introduce hook handlers for CirrusSearch
......................................................................
Introduce hook handlers for CirrusSearch
We use the CirrusSearchMappingConfig and the
CirrusSearchBuildDocumentParse hooks to add
fields that contain a count of site links and
count of statements.
These fields can potentially be considered when
ranking search results and boost some items with
a high number of site links and/or statements.
Bug: T119066
Change-Id: I34cce7281d10cc1c4176ebed6b9a3b90e10fe1da
---
M repo/Wikibase.php
A repo/includes/Hooks/CirrusSearchHookHandlers.php
A repo/includes/Search/Elastic/Fields/SearchIndexField.php
A repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
A repo/includes/Search/Elastic/Fields/StatementCountField.php
A repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
A repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
A repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
A repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
A
repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
10 files changed, 563 insertions(+), 0 deletions(-)
Approvals:
Daniel Kinzler: Looks good to me, but someone else must approve
Thiemo Mättig (WMDE): Looks good to me, approved
DCausse: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/repo/Wikibase.php b/repo/Wikibase.php
index a9202ca..8464e21 100644
--- a/repo/Wikibase.php
+++ b/repo/Wikibase.php
@@ -243,6 +243,10 @@
$wgHooks['SkinMinervaDefaultModules'][] =
'Wikibase\RepoHooks::onSkinMinervaDefaultModules';
$wgHooks['ResourceLoaderRegisterModules'][] =
'Wikibase\RepoHooks::onResourceLoaderRegisterModules';
+ // CirrusSearch hooks
+ $wgHooks['CirrusSearchMappingConfig'][] =
'Wikibase\Repo\Hooks\CirrusSearchHookHandlers::onCirrusSearchMappingConfig';
+ $wgHooks['CirrusSearchBuildDocumentParse'][] =
'Wikibase\Repo\Hooks\CirrusSearchHookHandlers::onCirrusSearchBuildDocumentParse';
+
// update hooks
$wgHooks['LoadExtensionSchemaUpdates'][] =
'\Wikibase\Repo\Store\Sql\ChangesSubscriptionSchemaUpdater::onSchemaUpdate';
diff --git a/repo/includes/Hooks/CirrusSearchHookHandlers.php
b/repo/includes/Hooks/CirrusSearchHookHandlers.php
new file mode 100644
index 0000000..e9ee6e2
--- /dev/null
+++ b/repo/includes/Hooks/CirrusSearchHookHandlers.php
@@ -0,0 +1,119 @@
+<?php
+
+namespace Wikibase\Repo\Hooks;
+
+use CirrusSearch\Connection;
+use CirrusSearch\Maintenance\MappingConfigBuilder;
+use Content;
+use Elastica\Document;
+use ParserOutput;
+use Title;
+use UnexpectedValueException;
+use Wikibase\EntityContent;
+use Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class CirrusSearchHookHandlers {
+
+ /**
+ * @var WikibaseFieldDefinitions
+ */
+ private $fieldDefinitions;
+
+ /**
+ * @param Document $document
+ * @param Title $title
+ * @param Content $content
+ * @param ParserOutput $parserOutput
+ * @param Connection $connection
+ *
+ * @return bool
+ */
+ public static function onCirrusSearchBuildDocumentParse(
+ Document $document,
+ Title $title,
+ Content $content,
+ ParserOutput $parserOutput,
+ Connection $connection
+ ) {
+ $hookHandler = self::newFromGlobalState();
+ $hookHandler->indexExtraFields( $document, $content );
+
+ return true;
+ }
+
+ /**
+ * @param array &$config
+ * @param MappingConfigBuilder $mappingConfigBuilder
+ *
+ * @return bool
+ */
+ public static function onCirrusSearchMappingConfig(
+ array &$config,
+ MappingConfigBuilder $mappingConfigBuilder
+ ) {
+ $handler = self::newFromGlobalState();
+ $handler->addExtraFieldsToMappingConfig( $config );
+
+ return true;
+ }
+
+ /**
+ * @return BuildDocumentParserHookHandler
+ */
+ public static function newFromGlobalState() {
+ return new self(
+ new WikibaseFieldDefinitions()
+ );
+ }
+
+ /**
+ * @param WikibaseFieldDefinitions $fieldDefinitions
+ */
+ public function __construct(
+ WikibaseFieldDefinitions $fieldDefinitions
+ ) {
+ $this->fieldDefinitions = $fieldDefinitions;
+ }
+
+ /**
+ * @param Document $document
+ * @param Content $content
+ */
+ public function indexExtraFields( Document $document, Content $content
) {
+ if ( !$content instanceof EntityContent ||
$content->isRedirect() === true ) {
+ return;
+ }
+
+ $fields = $this->fieldDefinitions->getFields();
+ $entity = $content->getEntity();
+
+ foreach ( $fields as $fieldName => $field ) {
+ $data = $field->getFieldData( $entity );
+ $document->set( $fieldName, $data );
+ }
+ }
+
+ /**
+ * @param array &$config
+ *
+ * @throws UnexpectedValueException
+ */
+ public function addExtraFieldsToMappingConfig( array &$config ) {
+ $fields = $this->fieldDefinitions->getFields();
+
+ foreach ( $fields as $fieldName => $field ) {
+ if ( array_key_exists( $fieldName,
$config['page']['properties'] ) ) {
+ throw new UnexpectedValueException( "$fieldName
is already set in the mapping." );
+ }
+
+ $config['page']['properties'][$fieldName] =
$field->getMapping();
+ }
+ }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/SearchIndexField.php
b/repo/includes/Search/Elastic/Fields/SearchIndexField.php
new file mode 100644
index 0000000..4365c59
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/SearchIndexField.php
@@ -0,0 +1,41 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+
+/**
+ * Each field is intended to be by CirrusSearch as an
+ * additional property of a page.
+ *
+ * The data returned by the field must match the field
+ * type defined in the mapping. (e.g. nested must be array,
+ * integer field must get an int, etc)
+ *
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+interface SearchIndexField {
+
+ /**
+ * @return array The field mapping defines attributes of the field,
+ * such as the field type (e.g. "string", "long",
"nested")
+ * and other attributes like "not_analyzed".
+ *
+ * For detailed documentation about mapping of fields,
see:
+ *
https://www.elastic.co/guide/en/elasticsearch/guide/current/mapping-intro.html
+ */
+ public function getMapping();
+
+ /**
+ * @param EntityDocument $entity
+ *
+ * @return mixed Get the value of the field to be indexed when a
page/document
+ * is indexed. This might be an array with nested data,
if the field
+ * is defined with nested type or an int or string for
simple field types.
+ */
+ public function getFieldData( EntityDocument $entity );
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
b/repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
new file mode 100644
index 0000000..718befa
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
@@ -0,0 +1,42 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+use Wikibase\DataModel\Entity\Item;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class SiteLinkCountField implements SearchIndexField {
+
+ /**
+ * @see SearchIndexField::getMapping
+ *
+ * @return array
+ */
+ public function getMapping() {
+ return array(
+ 'type' => 'integer'
+ );
+ }
+
+ /**
+ * @see SearchIndexField::getFieldData
+ *
+ * @param EntityDocument $entity
+ *
+ * @return int
+ */
+ public function getFieldData( EntityDocument $entity ) {
+ if ( $entity instanceof Item ) {
+ return $entity->getSiteLinkList()->count();
+ }
+
+ return 0;
+ }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/StatementCountField.php
b/repo/includes/Search/Elastic/Fields/StatementCountField.php
new file mode 100644
index 0000000..ccde4f3
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/StatementCountField.php
@@ -0,0 +1,42 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+use Wikibase\DataModel\Statement\StatementListHolder;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class StatementCountField implements SearchIndexField {
+
+ /**
+ * @see SearchIndexField::getMapping
+ *
+ * @return array
+ */
+ public function getMapping() {
+ return array(
+ 'type' => 'integer'
+ );
+ }
+
+ /**
+ * @see SearchIndexField::getFieldData
+ *
+ * @param EntityDocument $entity
+ *
+ * @return int
+ */
+ public function getFieldData( EntityDocument $entity ) {
+ if ( $entity instanceof StatementListHolder ) {
+ return $entity->getStatements()->count();
+ }
+
+ return 0;
+ }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
new file mode 100644
index 0000000..43091df
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
@@ -0,0 +1,19 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+class WikibaseFieldDefinitions {
+
+ /**
+ * @return SearchIndexField[] Array key is field name.
+ */
+ public function getFields() {
+ $fields = array(
+ 'sitelink_count' => new SiteLinkCountField(),
+ 'statement_count' => new StatementCountField()
+ );
+
+ return $fields;
+ }
+
+}
diff --git a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
new file mode 100644
index 0000000..af2acc2
--- /dev/null
+++ b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
@@ -0,0 +1,156 @@
+<?php
+
+namespace Wikibase\Repo\Tests\Hooks;
+
+use Elastica\Document;
+use ParserOutput;
+use PHPUnit_Framework_TestCase;
+use Title;
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\DataModel\Snak\PropertyNoValueSnak;
+use Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions;
+use Wikibase\Repo\Hooks\CirrusSearchHookHandlers;
+use Wikibase\Repo\WikibaseRepo;
+
+/**
+ * @covers Wikibase\Repo\Hooks\CirrusSearchHookHandlers
+ *
+ * @since 0.5
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class CirrusSearchHookHandlersTest extends PHPUnit_Framework_TestCase {
+
+ protected function setUp() {
+ parent::setUp();
+
+ if ( !class_exists( 'CirrusSearch' ) ) {
+ $this->markTestSkipped( 'CirrusSearch is not available'
);
+ }
+ }
+
+ public function testOnCirrusSearchBuildDocumentParse() {
+ $connection = $this->getMockBuilder( 'CirrusSearch\Connection' )
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $document = new Document();
+
+ CirrusSearchHookHandlers::onCirrusSearchBuildDocumentParse(
+ $document,
+ Title::newFromText( 'Q1' ),
+ $this->getContent(),
+ new ParserOutput(),
+ $connection
+ );
+
+ $this->assertSame( 1, $document->get( 'sitelink_count' ),
'sitelink_count' );
+ $this->assertSame( 1, $document->get( 'statement_count' ),
'statement_count' );
+ }
+
+ public function testOnCirrusSearchMappingConfig() {
+ $mappingConfigBuilder = $this->getMockBuilder(
+ 'CirrusSearch\Maintenance\MappingConfigBuilder'
+ )
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $config = array(
+ 'page' => array(
+ 'properties' => array()
+ )
+ );
+
+ CirrusSearchHookHandlers::onCirrusSearchMappingConfig( $config,
$mappingConfigBuilder );
+
+ $this->assertSame(
+ array( 'sitelink_count', 'statement_count' ),
+ array_keys( $config['page']['properties'] )
+ );
+ }
+
+ public function testIndexExtraFields() {
+ $fieldDefinitions = $this->newFieldDefinitions();
+
+ $document = new Document();
+ $content = $this->getContent();
+
+ $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions
);
+ $hookHandlers->indexExtraFields( $document, $content );
+
+ $this->assertSame( 1, $document->get( 'sitelink_count' ),
'sitelink_count' );
+ $this->assertSame( 1, $document->get( 'statement_count' ),
'statement_count' );
+ }
+
+ public function testAddExtraFieldsToMappingConfig() {
+ $fieldDefinitions = $this->newFieldDefinitions();
+
+ $config = array(
+ 'page' => array(
+ 'properties' => array()
+ )
+ );
+
+ $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions
);
+ $hookHandlers->addExtraFieldsToMappingConfig( $config );
+
+ $expected = array(
+ 'page' => array(
+ 'properties' => array(
+ 'sitelink_count' => array(
+ 'type' => 'integer'
+ ),
+ 'statement_count' => array(
+ 'type' => 'integer'
+ )
+ )
+ )
+ );
+
+ $this->assertSame( $expected, $config );
+ }
+
+ public function
testAddExtraFields_throwsExceptionIfFieldNameAlreadySet() {
+ $fieldDefinitions = $this->newFieldDefinitions();
+
+ $config = array(
+ 'page' => array(
+ 'properties' => array(
+ 'sitelink_count' => array(
+ 'type' => 'long'
+ )
+ )
+ )
+ );
+
+ $this->setExpectedException( 'UnexpectedValueException' );
+
+ $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions
);
+ $hookHandlers->addExtraFieldsToMappingConfig( $config );
+ }
+
+ private function newFieldDefinitions() {
+ // when we add multilingual fields, then
WikibaseFieldDefinitions
+ // will take WikibaseContentLanguages as an argument.
+ return new WikibaseFieldDefinitions();
+ }
+
+ private function getContent() {
+ $item = new Item();
+ $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' );
+ $item->getStatements()->addNewStatement(
+ new PropertyNoValueSnak( new PropertyId( 'P1' ) )
+ );
+
+ $entityContentFactory =
WikibaseRepo::getDefaultInstance()->getEntityContentFactory();
+
+ return $entityContentFactory->newFromEntity( $item );
+ }
+
+}
diff --git
a/repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
b/repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
new file mode 100644
index 0000000..958711c
--- /dev/null
+++
b/repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
@@ -0,0 +1,52 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\Property;
+use Wikibase\Repo\Search\Elastic\Fields\SiteLinkCountField;
+
+/**
+ * @covers Wikibase\Repo\Elastic\Search\Fields\SiteLinkCountField
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class SiteLinkCountFieldTest extends \PHPUnit_Framework_TestCase {
+
+ public function testGetMapping() {
+ $siteLinkCountField = new SiteLinkCountField();
+
+ $expected = array(
+ 'type' => 'integer'
+ );
+
+ $this->assertSame( $expected, $siteLinkCountField->getMapping()
);
+ }
+
+ /**
+ * @dataProvider getFieldDataProvider
+ */
+ public function testGetFieldData( $expected, $entity ) {
+ $siteLinkCountField = new SiteLinkCountField();
+
+ $this->assertSame( $expected,
$siteLinkCountField->getFieldData( $entity ) );
+ }
+
+ public function getFieldDataProvider() {
+ $item = new Item();
+
+ $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' );
+ $item->getSiteLinkList()->addNewSiteLink( 'eswiki', 'Gato' );
+
+ return array(
+ array( 2, $item ),
+ array( 0, Property::newFromType( 'string' ) )
+ );
+ }
+
+}
diff --git
a/repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
b/repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
new file mode 100644
index 0000000..454a7ff
--- /dev/null
+++
b/repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
@@ -0,0 +1,48 @@
+<?php
+
+namespace Wikibase\Test;
+
+use DataValues\StringValue;
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\DataModel\Snak\PropertyValueSnak;
+use Wikibase\DataModel\Statement\StatementList;
+use Wikibase\Repo\Search\Elastic\Fields\StatementCountField;
+
+/**
+ * @covers Wikibase\Repo\Search\Elastic\Fields\StatementCountField
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class StatementCountFieldTest extends \PHPUnit_Framework_TestCase {
+
+ public function testGetMapping() {
+ $statementCountField = new StatementCountField();
+
+ $expected = array(
+ 'type' => 'integer'
+ );
+
+ $this->assertSame( $expected,
$statementCountField->getMapping() );
+ }
+
+ public function testGetFieldData() {
+ $statementCountField = new StatementCountField();
+
+ $statements = new StatementList();
+ $statements->addNewStatement(
+ new PropertyValueSnak( new PropertyId( 'P1' ), new
StringValue( 'o_O' ) )
+ );
+
+ $item = new Item();
+ $item->setStatements( $statements );
+
+ $this->assertSame( 1, $statementCountField->getFieldData( $item
) );
+ }
+
+}
diff --git
a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
new file mode 100644
index 0000000..d7830f5
--- /dev/null
+++
b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
@@ -0,0 +1,40 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions;
+
+/**
+ * @covers Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class WikibaseFieldDefinitionsTest extends \PHPUnit_Framework_TestCase {
+
+ public function testGetFields() {
+ $wikibaseFieldDefinitions = new WikibaseFieldDefinitions();
+ $fields = $wikibaseFieldDefinitions->getFields();
+
+ $expectedFieldNames = array( 'sitelink_count',
'statement_count' );
+
+ $this->assertSame( $expectedFieldNames, array_keys( $fields ) );
+ }
+
+ public function testGetFields_instanceOfSearchIndexField() {
+ $wikibaseFieldDefinitions = new WikibaseFieldDefinitions();
+
+ foreach ( $wikibaseFieldDefinitions->getFields() as $fieldName
=> $field ) {
+ $this->assertInstanceOf(
+
'Wikibase\Repo\Search\Elastic\Fields\SearchIndexField',
+ $field,
+ "$fieldName must be instance of
SearchIndexField"
+ );
+ }
+ }
+
+}
--
To view, visit https://gerrit.wikimedia.org/r/256023
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I34cce7281d10cc1c4176ebed6b9a3b90e10fe1da
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Aude <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits