jenkins-bot has submitted this change and it was merged. Change subject: Add label count field to search index ......................................................................
Add label count field to search index For items like Q6581097 (male) and Q6581072 (female), label count is probably also an important consideration. although these have a lot of incoming links, so maybe label count won't be essential. But think we should include it and have it as an option when trying various ways of rescoring search results. Implementation is done in a consistent way as the other fields we are introducing (sitelink and statement count). Change-Id: I9b9d5eb69883412627257c9e90688af50af967b5 --- A repo/includes/Search/Elastic/Fields/LabelCountField.php M repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php M repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php A repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php M repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php 5 files changed, 90 insertions(+), 2 deletions(-) Approvals: Daniel Kinzler: Looks good to me, approved Thiemo Mättig (WMDE): Looks good to me, but someone else must approve Addshore: Looks good to me, but someone else must approve DCausse: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/repo/includes/Search/Elastic/Fields/LabelCountField.php b/repo/includes/Search/Elastic/Fields/LabelCountField.php new file mode 100644 index 0000000..c473071 --- /dev/null +++ b/repo/includes/Search/Elastic/Fields/LabelCountField.php @@ -0,0 +1,42 @@ +<?php + +namespace Wikibase\Repo\Search\Elastic\Fields; + +use Wikibase\DataModel\Entity\EntityDocument; +use Wikibase\DataModel\Term\FingerprintProvider; + +/** + * @since 0.5 + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +class LabelCountField implements SearchIndexField { + + /** + * @see SearchIndexField::getMapping + * + * @return array + */ + public function getMapping() { + return array( + 'type' => 'integer' + ); + } + + /** + * @see SearchIndexField::getFieldData + * + * @param EntityDocument $entity + * + * @return int + */ + public function getFieldData( EntityDocument $entity ) { + if ( $entity instanceof FingerprintProvider ) { + return $entity->getFingerprint()->getLabels()->count(); + } + + return 0; + } + +} diff --git a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php index 43091df..89b42c2 100644 --- a/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php +++ b/repo/includes/Search/Elastic/Fields/WikibaseFieldDefinitions.php @@ -9,6 +9,7 @@ */ public function getFields() { $fields = array( + 'label_count' => new LabelCountField(), 'sitelink_count' => new SiteLinkCountField(), 'statement_count' => new StatementCountField() ); diff --git a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php index af2acc2..66dfab4 100644 --- a/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php +++ b/repo/tests/phpunit/includes/Hooks/CirrusSearchHookHandlersTest.php @@ -50,6 +50,7 @@ $connection ); + $this->assertSame( 1, $document->get( 'label_count' ), 'label_count' ); $this->assertSame( 1, $document->get( 'sitelink_count' ), 'sitelink_count' ); $this->assertSame( 1, $document->get( 'statement_count' ), 'statement_count' ); } @@ -70,7 +71,7 @@ CirrusSearchHookHandlers::onCirrusSearchMappingConfig( $config, $mappingConfigBuilder ); $this->assertSame( - array( 'sitelink_count', 'statement_count' ), + array( 'label_count', 'sitelink_count', 'statement_count' ), array_keys( $config['page']['properties'] ) ); } @@ -84,6 +85,7 @@ $hookHandlers = new CirrusSearchHookHandlers( $fieldDefinitions ); $hookHandlers->indexExtraFields( $document, $content ); + $this->assertSame( 1, $document->get( 'label_count' ), 'label_count' ); $this->assertSame( 1, $document->get( 'sitelink_count' ), 'sitelink_count' ); $this->assertSame( 1, $document->get( 'statement_count' ), 'statement_count' ); } @@ -103,6 +105,9 @@ $expected = array( 'page' => array( 'properties' => array( + 'label_count' => array( + 'type' => 'integer' + ), 'sitelink_count' => array( 'type' => 'integer' ), @@ -143,6 +148,7 @@ private function getContent() { $item = new Item(); + $item->getFingerprint()->setLabel( 'en', 'Kitten' ); $item->getSiteLinkList()->addNewSiteLink( 'enwiki', 'Kitten' ); $item->getStatements()->addNewStatement( new PropertyNoValueSnak( new PropertyId( 'P1' ) ) diff --git a/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php b/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php new file mode 100644 index 0000000..1192cc9 --- /dev/null +++ b/repo/tests/phpunit/includes/Search/Elastic/Fields/LabelCountFieldTest.php @@ -0,0 +1,39 @@ +<?php + +namespace Wikibase\Test; + +use Wikibase\DataModel\Entity\Item; +use Wikibase\Repo\Search\Elastic\Fields\LabelCountField; + +/** + * @covers Wikibase\Repo\Search\Elastic\Fields\LabelCountField + * + * @group WikibaseElastic + * @group WikibaseRepo + * @group Wikibase + * + * @licence GNU GPL v2+ + * @author Katie Filbert < [email protected] > + */ +class LabelCountFieldTest extends \PHPUnit_Framework_TestCase { + + public function testGetMapping() { + $labelCountField = new LabelCountField(); + + $expected = array( + 'type' => 'integer' + ); + + $this->assertSame( $expected, $labelCountField->getMapping() ); + } + + public function testGetFieldData() { + $labelCountField = new LabelCountField(); + + $item = new Item(); + $item->getFingerprint()->setLabel( 'es', 'Gato' ); + + $this->assertSame( 1, $labelCountField->getFieldData( $item ) ); + } + +} diff --git a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php index d7830f5..7b8a3ae 100644 --- a/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php +++ b/repo/tests/phpunit/includes/Search/Elastic/Fields/WikibaseFieldDefinitionsTest.php @@ -20,7 +20,7 @@ $wikibaseFieldDefinitions = new WikibaseFieldDefinitions(); $fields = $wikibaseFieldDefinitions->getFields(); - $expectedFieldNames = array( 'sitelink_count', 'statement_count' ); + $expectedFieldNames = array( 'label_count', 'sitelink_count', 'statement_count' ); $this->assertSame( $expectedFieldNames, array_keys( $fields ) ); } -- To view, visit https://gerrit.wikimedia.org/r/257291 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I9b9d5eb69883412627257c9e90688af50af967b5 Gerrit-PatchSet: 2 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Aude <[email protected]> Gerrit-Reviewer: Addshore <[email protected]> Gerrit-Reviewer: DCausse <[email protected]> Gerrit-Reviewer: Daniel Kinzler <[email protected]> Gerrit-Reviewer: EBernhardson <[email protected]> Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
