jenkins-bot has submitted this change and it was merged.

Change subject: Introduce hook handlers for CirrusSearch
......................................................................


Introduce hook handlers for CirrusSearch

We use the CirrusSearchMappingConfig and the
CirrusSearchBuildDocumentParse hooks to add
fields that contain a count of site links and
count of statements.

These fields can potentially be considered when
ranking search results and boost some items with
a high number of site links and/or statements.

Bug: T119066
Change-Id: I34cce7281d10cc1c4176ebed6b9a3b90e10fe1da
---
M repo/Wikibase.php
A repo/includes/Hooks/CirrusSearchHookHandlers.php
A repo/includes/Search/Elastic/Fields/SearchIndexField.php
A repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
A repo/includes/Search/Elastic/Fields/StatementCountField.php
A repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
A repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
A repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
A repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
A 
repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
10 files changed, 563 insertions(+), 0 deletions(-)

Approvals:
  Daniel Kinzler: Looks good to me, but someone else must approve
  Thiemo Mättig (WMDE): Looks good to me, approved
  DCausse: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/repo/Wikibase.php b/repo/Wikibase.php
index a9202ca..8464e21 100644
--- a/repo/Wikibase.php
+++ b/repo/Wikibase.php
@@ -243,6 +243,10 @@
        $wgHooks['SkinMinervaDefaultModules'][] = 
'Wikibase\RepoHooks::onSkinMinervaDefaultModules';
        $wgHooks['ResourceLoaderRegisterModules'][] = 
'Wikibase\RepoHooks::onResourceLoaderRegisterModules';
 
+       // CirrusSearch hooks
+       $wgHooks['CirrusSearchMappingConfig'][] = 
'Wikibase\Repo\Hooks\CirrusSearchHookHandlers::onCirrusSearchMappingConfig';
+       $wgHooks['CirrusSearchBuildDocumentParse'][] = 
'Wikibase\Repo\Hooks\CirrusSearchHookHandlers::onCirrusSearchBuildDocumentParse';
+
        // update hooks
        $wgHooks['LoadExtensionSchemaUpdates'][] = 
'\Wikibase\Repo\Store\Sql\ChangesSubscriptionSchemaUpdater::onSchemaUpdate';
 
diff --git a/repo/includes/Hooks/CirrusSearchHookHandlers.php 
b/repo/includes/Hooks/CirrusSearchHookHandlers.php
new file mode 100644
index 0000000..e9ee6e2
--- /dev/null
+++ b/repo/includes/Hooks/CirrusSearchHookHandlers.php
@@ -0,0 +1,119 @@
+<?php
+
+namespace Wikibase\Repo\Hooks;
+
+use CirrusSearch\Connection;
+use CirrusSearch\Maintenance\MappingConfigBuilder;
+use Content;
+use Elastica\Document;
+use ParserOutput;
+use Title;
+use UnexpectedValueException;
+use Wikibase\EntityContent;
+use Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class CirrusSearchHookHandlers {
+
+       /**
+        * @var WikibaseFieldDefinitions
+        */
+       private $fieldDefinitions;
+
+       /**
+        * @param Document $document
+        * @param Title $title
+        * @param Content $content
+        * @param ParserOutput $parserOutput
+        * @param Connection $connection
+        *
+        * @return bool
+        */
+       public static function onCirrusSearchBuildDocumentParse(
+               Document $document,
+               Title $title,
+               Content $content,
+               ParserOutput $parserOutput,
+               Connection $connection
+       ) {
+               $hookHandler = self::newFromGlobalState();
+               $hookHandler->indexExtraFields( $document, $content );
+
+               return true;
+       }
+
+       /**
+        * @param array &$config
+        * @param MappingConfigBuilder $mappingConfigBuilder
+        *
+        * @return bool
+        */
+       public static function onCirrusSearchMappingConfig(
+               array &$config,
+               MappingConfigBuilder $mappingConfigBuilder
+       ) {
+               $handler = self::newFromGlobalState();
+               $handler->addExtraFieldsToMappingConfig( $config );
+
+               return true;
+       }
+
+       /**
+        * @return BuildDocumentParserHookHandler
+        */
+       public static function newFromGlobalState() {
+               return new self(
+                       new WikibaseFieldDefinitions()
+               );
+       }
+
+       /**
+        * @param WikibaseFieldDefinitions $fieldDefinitions
+        */
+       public function __construct(
+               WikibaseFieldDefinitions $fieldDefinitions
+       ) {
+               $this->fieldDefinitions = $fieldDefinitions;
+       }
+
+       /**
+        * @param Document $document
+        * @param Content $content
+        */
+       public function indexExtraFields( Document $document, Content $content 
) {
+               if ( !$content instanceof EntityContent || 
$content->isRedirect() === true ) {
+                       return;
+               }
+
+               $fields = $this->fieldDefinitions->getFields();
+               $entity = $content->getEntity();
+
+               foreach ( $fields as $fieldName => $field ) {
+                       $data = $field->getFieldData( $entity );
+                       $document->set( $fieldName, $data );
+               }
+       }
+
+       /**
+        * @param array &$config
+        *
+        * @throws UnexpectedValueException
+        */
+       public function addExtraFieldsToMappingConfig( array &$config ) {
+               $fields = $this->fieldDefinitions->getFields();
+
+               foreach ( $fields as $fieldName => $field ) {
+                       if ( array_key_exists( $fieldName, 
$config['page']['properties'] ) ) {
+                               throw new UnexpectedValueException( "$fieldName 
is already set in the mapping." );
+                       }
+
+                       $config['page']['properties'][$fieldName] = 
$field->getMapping();
+               }
+       }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/SearchIndexField.php 
b/repo/includes/Search/Elastic/Fields/SearchIndexField.php
new file mode 100644
index 0000000..4365c59
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/SearchIndexField.php
@@ -0,0 +1,41 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+
+/**
+ * Each field is intended to be by CirrusSearch as an
+ * additional property of a page.
+ *
+ * The data returned by the field must match the field
+ * type defined in the mapping. (e.g. nested must be array,
+ * integer field must get an int, etc)
+ *
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+interface SearchIndexField {
+
+       /**
+        * @return array The field mapping defines attributes of the field,
+        *               such as the field type (e.g. "string", "long", 
"nested")
+        *               and other attributes like "not_analyzed".
+        *
+        *               For detailed documentation about mapping of fields, 
see:
+        *               
https://www.elastic.co/guide/en/elasticsearch/guide/current/mapping-intro.html
+        */
+       public function getMapping();
+
+       /**
+        * @param EntityDocument $entity
+        *
+        * @return mixed Get the value of the field to be indexed when a 
page/document
+        *               is indexed. This might be an array with nested data, 
if the field
+        *               is defined with nested type or an int or string for 
simple field types.
+        */
+       public function getFieldData( EntityDocument $entity );
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/SiteLinkCountField.php 
b/repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
new file mode 100644
index 0000000..718befa
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/SiteLinkCountField.php
@@ -0,0 +1,42 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+use Wikibase\DataModel\Entity\Item;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class SiteLinkCountField implements SearchIndexField {
+
+       /**
+        * @see SearchIndexField::getMapping
+        *
+        * @return array
+        */
+       public function getMapping() {
+               return array(
+                       'type' => 'integer'
+               );
+       }
+
+       /**
+        * @see SearchIndexField::getFieldData
+        *
+        * @param EntityDocument $entity
+        *
+        * @return int
+        */
+       public function getFieldData( EntityDocument $entity ) {
+               if ( $entity instanceof Item ) {
+                       return $entity->getSiteLinkList()->count();
+               }
+
+               return 0;
+       }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/StatementCountField.php 
b/repo/includes/Search/Elastic/Fields/StatementCountField.php
new file mode 100644
index 0000000..ccde4f3
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/StatementCountField.php
@@ -0,0 +1,42 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+use Wikibase\DataModel\Entity\EntityDocument;
+use Wikibase\DataModel\Statement\StatementListHolder;
+
+/**
+ * @since 0.5
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class StatementCountField implements SearchIndexField {
+
+       /**
+        * @see SearchIndexField::getMapping
+        *
+        * @return array
+        */
+       public function getMapping() {
+               return array(
+                       'type' => 'integer'
+               );
+       }
+
+       /**
+        * @see SearchIndexField::getFieldData
+        *
+        * @param EntityDocument $entity
+        *
+        * @return int
+        */
+       public function getFieldData( EntityDocument $entity ) {
+               if ( $entity instanceof StatementListHolder ) {
+                       return $entity->getStatements()->count();
+               }
+
+               return 0;
+       }
+
+}
diff --git a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php 
b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
new file mode 100644
index 0000000..43091df
--- /dev/null
+++ b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php
@@ -0,0 +1,19 @@
+<?php
+
+namespace Wikibase\Repo\Search\Elastic\Fields;
+
+class WikibaseFieldDefinitions {
+
+       /**
+        * @return SearchIndexField[] Array key is field name.
+        */
+       public function getFields() {
+               $fields = array(
+                       'sitelink_count' => new SiteLinkCountField(),
+                       'statement_count' => new StatementCountField()
+               );
+
+               return $fields;
+       }
+
+}
diff --git a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php 
b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
new file mode 100644
index 0000000..af2acc2
--- /dev/null
+++ b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php
@@ -0,0 +1,156 @@
+<?php
+
+namespace Wikibase\Repo\Tests\Hooks;
+
+use Elastica\Document;
+use ParserOutput;
+use PHPUnit_Framework_TestCase;
+use Title;
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\DataModel\Snak\PropertyNoValueSnak;
+use Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions;
+use Wikibase\Repo\Hooks\CirrusSearchHookHandlers;
+use Wikibase\Repo\WikibaseRepo;
+
+/**
+ * @covers Wikibase\Repo\Hooks\CirrusSearchHookHandlers
+ *
+ * @since 0.5
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class CirrusSearchHookHandlersTest extends PHPUnit_Framework_TestCase {
+
+       protected function setUp() {
+               parent::setUp();
+
+               if ( !class_exists( 'CirrusSearch' ) ) {
+                       $this->markTestSkipped( 'CirrusSearch is not available' 
);
+               }
+       }
+
+       public function testOnCirrusSearchBuildDocumentParse() {
+               $connection = $this->getMockBuilder( 'CirrusSearch\Connection' )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $document = new Document();
+
+               CirrusSearchHookHandlers::onCirrusSearchBuildDocumentParse(
+                       $document,
+                       Title::newFromText( 'Q1' ),
+                       $this->getContent(),
+                       new ParserOutput(),
+                       $connection
+               );
+
+               $this->assertSame( 1, $document->get( 'sitelink_count' ), 
'sitelink_count' );
+               $this->assertSame( 1, $document->get( 'statement_count' ), 
'statement_count' );
+       }
+
+       public function testOnCirrusSearchMappingConfig() {
+               $mappingConfigBuilder = $this->getMockBuilder(
+                               'CirrusSearch\Maintenance\MappingConfigBuilder'
+                       )
+                       ->disableOriginalConstructor()
+                       ->getMock();
+
+               $config = array(
+                       'page' => array(
+                               'properties' => array()
+                       )
+               );
+
+               CirrusSearchHookHandlers::onCirrusSearchMappingConfig( $config, 
$mappingConfigBuilder );
+
+               $this->assertSame(
+                       array( 'sitelink_count', 'statement_count' ),
+                       array_keys( $config['page']['properties'] )
+               );
+       }
+
+       public function testIndexExtraFields() {
+               $fieldDefinitions = $this->newFieldDefinitions();
+
+               $document = new Document();
+               $content = $this->getContent();
+
+               $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions 
);
+               $hookHandlers->indexExtraFields( $document, $content );
+
+               $this->assertSame( 1, $document->get( 'sitelink_count' ), 
'sitelink_count' );
+               $this->assertSame( 1, $document->get( 'statement_count' ), 
'statement_count' );
+       }
+
+       public function testAddExtraFieldsToMappingConfig() {
+               $fieldDefinitions = $this->newFieldDefinitions();
+
+               $config = array(
+                       'page' => array(
+                               'properties' => array()
+                       )
+               );
+
+               $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions 
);
+               $hookHandlers->addExtraFieldsToMappingConfig( $config );
+
+               $expected = array(
+                       'page' => array(
+                               'properties' => array(
+                                       'sitelink_count' => array(
+                                               'type' => 'integer'
+                                       ),
+                                       'statement_count' => array(
+                                               'type' => 'integer'
+                                       )
+                               )
+                       )
+               );
+
+               $this->assertSame( $expected, $config );
+       }
+
+       public function 
testAddExtraFields_throwsExceptionIfFieldNameAlreadySet() {
+               $fieldDefinitions = $this->newFieldDefinitions();
+
+               $config = array(
+                       'page' => array(
+                               'properties' => array(
+                                       'sitelink_count' => array(
+                                               'type' => 'long'
+                                       )
+                               )
+                       )
+               );
+
+               $this->setExpectedException( 'UnexpectedValueException' );
+
+               $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions 
);
+               $hookHandlers->addExtraFieldsToMappingConfig( $config );
+       }
+
+       private function newFieldDefinitions() {
+               // when we add multilingual fields, then 
WikibaseFieldDefinitions
+               // will take WikibaseContentLanguages as an argument.
+               return new WikibaseFieldDefinitions();
+       }
+
+       private function getContent() {
+               $item = new Item();
+               $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' );
+               $item->getStatements()->addNewStatement(
+                       new PropertyNoValueSnak( new PropertyId( 'P1' ) )
+               );
+
+               $entityContentFactory = 
WikibaseRepo::getDefaultInstance()->getEntityContentFactory();
+
+               return $entityContentFactory->newFromEntity( $item );
+       }
+
+}
diff --git 
a/repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
new file mode 100644
index 0000000..958711c
--- /dev/null
+++ 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/SiteLinkCountFieldTest.php
@@ -0,0 +1,52 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\Property;
+use Wikibase\Repo\Search\Elastic\Fields\SiteLinkCountField;
+
+/**
+ * @covers Wikibase\Repo\Elastic\Search\Fields\SiteLinkCountField
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class SiteLinkCountFieldTest extends \PHPUnit_Framework_TestCase {
+
+       public function testGetMapping() {
+               $siteLinkCountField = new SiteLinkCountField();
+
+               $expected = array(
+                       'type' => 'integer'
+               );
+
+               $this->assertSame( $expected, $siteLinkCountField->getMapping() 
);
+       }
+
+       /**
+        * @dataProvider getFieldDataProvider
+        */
+       public function testGetFieldData( $expected, $entity ) {
+               $siteLinkCountField = new SiteLinkCountField();
+
+               $this->assertSame( $expected, 
$siteLinkCountField->getFieldData( $entity ) );
+       }
+
+       public function getFieldDataProvider() {
+               $item = new Item();
+
+               $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' );
+               $item->getSiteLinkList()->addNewSiteLink( 'eswiki', 'Gato' );
+
+               return array(
+                       array( 2, $item ),
+                       array( 0, Property::newFromType( 'string' ) )
+               );
+       }
+
+}
diff --git 
a/repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
new file mode 100644
index 0000000..454a7ff
--- /dev/null
+++ 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/StatementCountFieldTest.php
@@ -0,0 +1,48 @@
+<?php
+
+namespace Wikibase\Test;
+
+use DataValues\StringValue;
+use Wikibase\DataModel\Entity\Item;
+use Wikibase\DataModel\Entity\PropertyId;
+use Wikibase\DataModel\Snak\PropertyValueSnak;
+use Wikibase\DataModel\Statement\StatementList;
+use Wikibase\Repo\Search\Elastic\Fields\StatementCountField;
+
+/**
+ * @covers Wikibase\Repo\Search\Elastic\Fields\StatementCountField
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class StatementCountFieldTest extends \PHPUnit_Framework_TestCase {
+
+       public function testGetMapping() {
+               $statementCountField = new StatementCountField();
+
+               $expected = array(
+                       'type' => 'integer'
+               );
+
+               $this->assertSame( $expected, 
$statementCountField->getMapping() );
+       }
+
+       public function testGetFieldData() {
+               $statementCountField = new StatementCountField();
+
+               $statements = new StatementList();
+               $statements->addNewStatement(
+                       new PropertyValueSnak( new PropertyId( 'P1' ), new 
StringValue( 'o_O' ) )
+               );
+
+               $item = new Item();
+               $item->setStatements( $statements );
+
+               $this->assertSame( 1, $statementCountField->getFieldData( $item 
) );
+       }
+
+}
diff --git 
a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
new file mode 100644
index 0000000..d7830f5
--- /dev/null
+++ 
b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php
@@ -0,0 +1,40 @@
+<?php
+
+namespace Wikibase\Test;
+
+use Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions;
+
+/**
+ * @covers Wikibase\Repo\Search\Elastic\Fields\WikibaseFieldDefinitions
+ *
+ * @group WikibaseElastic
+ * @group WikibaseRepo
+ * @group Wikibase
+ *
+ * @licence GNU GPL v2+
+ * @author Katie Filbert < [email protected] >
+ */
+class WikibaseFieldDefinitionsTest extends \PHPUnit_Framework_TestCase {
+
+       public function testGetFields() {
+               $wikibaseFieldDefinitions = new WikibaseFieldDefinitions();
+               $fields = $wikibaseFieldDefinitions->getFields();
+
+               $expectedFieldNames = array( 'sitelink_count', 
'statement_count' );
+
+               $this->assertSame( $expectedFieldNames, array_keys( $fields ) );
+       }
+
+       public function testGetFields_instanceOfSearchIndexField() {
+               $wikibaseFieldDefinitions = new WikibaseFieldDefinitions();
+
+               foreach ( $wikibaseFieldDefinitions->getFields() as $fieldName 
=> $field ) {
+                       $this->assertInstanceOf(
+                               
'Wikibase\Repo\Search\Elastic\Fields\SearchIndexField',
+                               $field,
+                               "$fieldName must be instance of 
SearchIndexField"
+                       );
+               }
+       }
+
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/256023
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I34cce7281d10cc1c4176ebed6b9a3b90e10fe1da
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Aude <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to