Addshore has uploaded a new change for review. https://gerrit.wikimedia.org/r/219168
Change subject: Use SearchInteractor in SearchEntities ...................................................................... Use SearchInteractor in SearchEntities Bug: T90692 Change-Id: Iedef04773521e295946121f134db074d26676c2e --- M repo/includes/api/SearchEntities.php M repo/tests/phpunit/includes/api/SearchEntitiesTest.php 2 files changed, 403 insertions(+), 459 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/68/219168/1 diff --git a/repo/includes/api/SearchEntities.php b/repo/includes/api/SearchEntities.php index 3571902..403396c 100644 --- a/repo/includes/api/SearchEntities.php +++ b/repo/includes/api/SearchEntities.php @@ -4,26 +4,23 @@ use ApiBase; use ApiMain; -use ApiResult; +use OutOfBoundsException; use Wikibase\DataModel\Entity\EntityId; use Wikibase\DataModel\Entity\EntityIdParser; use Wikibase\DataModel\Entity\EntityIdParsingException; +use Wikibase\DataModel\Term\Term; use Wikibase\Lib\ContentLanguages; -use Wikibase\LanguageFallbackChainFactory; use Wikibase\Lib\Store\EntityTitleLookup; +use Wikibase\Lib\Store\LabelDescriptionLookup; +use Wikibase\Lib\Store\LanguageFallbackLabelDescriptionLookup; +use Wikibase\Lib\Store\TermLookup; +use Wikibase\Repo\Interactors\TermIndexSearchInteractor; use Wikibase\Repo\WikibaseRepo; -use Wikibase\TermIndexEntry; use Wikibase\TermIndex; +use Wikibase\TermIndexEntry; /** * API module to search for Wikibase entities. - * - * FIXME: this module is doing to much work. Ranking terms is not its job and should be delegated - * FIXME: the continuation currently relies on the search order returned by the TermStore - * - * Note: Continuation only works for a rather small number of entities. It is assumed that a large - * number of entities will not be searched through by human editors, and that bots cannot search - * through them anyway. * * @since 0.2 * @@ -32,6 +29,7 @@ * @author Jens Ohlig < jens.oh...@wikimedia.de > * @author Tobias Gritschacher < tobias.gritschac...@wikimedia.de > * @author Thiemo Mättig + * @author Adam Shorland */ class SearchEntities extends ApiBase { @@ -46,14 +44,19 @@ private $idParser; /** + * @var TermIndexSearchInteractor + */ + private $termIndexSearchInteractor; + + /** * @var TermIndex */ private $termIndex; /** - * @var LanguageFallbackChainFactory + * @var LabelDescriptionLookup */ - private $languageFallbackChainFactory; + private $labelDescriptionLookup; /** * @var ContentLanguages @@ -77,79 +80,73 @@ $repo = WikibaseRepo::getDefaultInstance(); $this->setServices( - $repo->getStore()->getTermIndex(), $repo->getEntityTitleLookup(), $repo->getEntityIdParser(), $repo->getEntityFactory()->getEntityTypes(), $repo->getTermsLanguages(), - $repo->getLanguageFallbackChainFactory() + $repo->newTermSearchInteractor( $this->getLanguage()->getCode() ), + $repo->getStore()->getTermIndex(), + new LanguageFallbackLabelDescriptionLookup( + $repo->getTermLookup(), + $repo->getLanguageFallbackChainFactory() + ->newFromLanguageCode( $this->getLanguage()->getCode() ) + ) ); } /** * Override services, for use for testing. * - * @param TermIndex $termIndex * @param EntityTitleLookup $titleLookup * @param EntityIdParser $idParser * @param array $entityTypes * @param ContentLanguages $termLanguages - * @param LanguageFallbackChainFactory $languageFallbackChainFactory + * @param TermIndexSearchInteractor $termIndexSearchInteractor + * @param TermIndex $termIndex + * @param LabelDescriptionLookup $labelDescriptionLookup */ public function setServices( - TermIndex $termIndex, EntityTitleLookup $titleLookup, EntityIdParser $idParser, array $entityTypes, ContentLanguages $termLanguages, - LanguageFallbackChainFactory $languageFallbackChainFactory + TermIndexSearchInteractor $termIndexSearchInteractor, + TermIndex $termIndex, + LabelDescriptionLookup $labelDescriptionLookup ) { - $this->termIndex = $termIndex; $this->titleLookup = $titleLookup; $this->idParser = $idParser; $this->entityTypes = $entityTypes; $this->termsLanguages = $termLanguages; - $this->languageFallbackChainFactory = $languageFallbackChainFactory; + $this->termIndexSearchInteractor = $termIndexSearchInteractor; + $this->termIndex = $termIndex; + $this->labelDescriptionLookup = $labelDescriptionLookup; } /** - * Get the entities corresponding to the provided languages and term. - * Term means it is either a label or an alias. + * Wrapper around TermSearchInteractor::searchForTerms * - * @param string $term - * @param string|null $entityType - * @param string[] $languages + * @see TermSearchInteractor::searchForTerms + * + * @param string $text + * @param string $entityType + * @param string $languageCode * @param int $limit * @param bool $prefixSearch + * @param bool $strictlanguage * - * @return TermIndexEntry[] + * @return array[] */ - private function searchEntities( $term, $entityType, array $languages, $limit, $prefixSearch ) { - $termTemplates = array(); - - foreach ( $languages as $language ) { - $termTemplates[] = new TermIndexEntry( array( - 'termType' => TermIndexEntry::TYPE_LABEL, - 'termLanguage' => $language, - 'termText' => $term - ) ); - - $termTemplates[] = new TermIndexEntry( array( - 'termType' => TermIndexEntry::TYPE_ALIAS, - 'termLanguage' => $language, - 'termText' => $term - ) ); - } - - //TODO: use getMatchingTerms instead - return $this->termIndex->getMatchingIDs( - $termTemplates, + private function searchEntities( $text, $entityType, $languageCode, $limit, $prefixSearch, $strictlanguage ) { + $this->termIndexSearchInteractor->setLimit( $limit ); + $this->termIndexSearchInteractor->setIsPrefixSearch( $prefixSearch ); + $this->termIndexSearchInteractor->setIsCaseSensitive( false ); + $this->termIndexSearchInteractor->setUseLanguageFallback( !$strictlanguage ); + return $this->termIndexSearchInteractor->searchForTerms( + $text, + array( $languageCode ), $entityType, - array( - 'caseSensitive' => false, - 'prefixSearch' => $prefixSearch, - 'LIMIT' => $limit, - ) + array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_DESCRIPTION ) ); } @@ -164,36 +161,47 @@ * @return array[] */ private function getSearchEntries( array $params ) { - $ids = array(); - $required = $params['continue'] + $params['limit'] + 1; - - $languages = $this->getLanguages( $params ); - - $ids = array_merge( - $ids, - $this->getRankedMatches( $params['search'], $params['type'], $languages, $required ) + $matches = $this->getRankedMatches( + $params['search'], + $params['type'], + $params['language'], + $params['continue'] + $params['limit'] + 1, + $params['strictlanguage'] ); - $ids = array_unique( $ids ); - return $this->getEntries( $ids, $params['search'], $languages ); + $entries = array(); + foreach ( $matches as $match ) { + //TODO: use EntityInfoBuilder, EntityInfoTermLookup + $title = $this->titleLookup->getTitleForId( + $this->idParser->parse( $match['entityId'] ) + ); + $entry = array();; + $entry['id'] = $match['entityId']; + $entry['url'] = $title->getFullUrl(); + $entry = array_merge( $entry, $this->termsToArray( $match['displayTerms'] ) ); + $entry['matched'] = array_unique( $this->termsToArray( $match['matchedTerms'] ) ); + $entries[] = $entry; + } + return $entries; } - private function getLanguages( array $params ) { - $lang = $params['language']; - - if ( !$params['strictlanguage'] ) { - $fallbackMode = ( - LanguageFallbackChainFactory::FALLBACK_VARIANTS - | LanguageFallbackChainFactory::FALLBACK_OTHERS - | LanguageFallbackChainFactory::FALLBACK_SELF ); - - $fallbackChain = $this->languageFallbackChainFactory - ->newFromLanguageCode( $lang, $fallbackMode ); - - return $languages = $fallbackChain->getFetchLanguageCodes(); - } else { - return array( $lang ); + /** + * @param Term[]|array[]|string[] $terms + * + * @return array[] + */ + private function termsToArray( array $terms ) { + $termArray = array(); + foreach( $terms as $key => $term ) { + if( $term instanceof Term ) { + $termArray[$key] = $term->getText(); + } elseif ( is_array( $term ) ) { + $termArray[$key] = $this->termsToArray( $term ); + } else { + $termArray[$key] = $term; + } } + return $termArray; } /** @@ -222,146 +230,103 @@ /** * Gets exact matches. If there are not enough exact matches, it gets prefixed matches. * - * @param string $term - * @param string|null $entityType - * @param string[] $languages + * @param string $text + * @param string $entityType + * @param string $languageCode * @param int $limit + * @param bool $strictlanguage * - * @return EntityId[] + * @return array[] Key: string Serialized EntityId + * Value: array( displayTerms => Term[], matchedTerms => Term[] ) + * Note: displayTerms has possible keys Wikibase\TermIndexEntry::TYPE_* + * matchedTerms has no keys / integer keys only */ - private function getRankedMatches( $term, $entityType, array $languages, $limit ) { - if ( empty( $languages ) ) { - return array(); - } + private function getRankedMatches( $text, $entityType, $languageCode, $limit, $strictlanguage ) { + $allSearchResults = array(); + $entityIdMap = array(); + $nextSearchKey = 0; - /** - * @var EntityId[] $ids - */ - $ids = array(); - - // If $term is the ID of an existing item, include it in the result. - $entityId = $this->getExactMatchForEntityId( $term, $entityType ); + // If $text is the ID of an existing item, include it in the result. + $entityId = $this->getExactMatchForEntityId( $text, $entityType ); if ( $entityId !== null ) { - $ids[] = $entityId; + $allSearchResults[$nextSearchKey] = array( + 'entityId' => $entityId->getSerialization(), + 'matchedTerms' => array( $entityId->getSerialization() ), + 'displayTerms' => $this->termsToArray( $this->getDisplayTerms( $entityId ) ), + ); + $entityIdMap[$entityId->getSerialization()] = $nextSearchKey; + $nextSearchKey++; } - // If not enough matches yet, search for full term matches (for all languages at once). - // No preference is applied to any language. - $missing = $limit - count( $ids ); - if ( $missing > 0 ) { - $matchedIds = $this->searchEntities( $term, $entityType, $languages, $missing, false ); - $ids = array_unique( array_merge( $ids, $matchedIds ) ); - } - - // If not enough matches yet, search for prefix matches, one language at a time. - // This causes multiple queries for cases with few or no matches, but only one - // with a single language if there are many results (e.g. for a short prefix, - // as is common for type-ahead suggestions). This way, languages are preferred - // according to the language fallback chain, and database load is hopefully - // reduced. - foreach ( $languages as $lang ) { - $missing = $limit - count( $ids ); + // If not matched enough then search for full term matches + // If still not enough matched then search for prefix matches + foreach( array( false, true ) as $prefixSearch ) { + $missing = $limit - count( $allSearchResults ); if ( $missing <= 0 ) { - break; + continue; } - - $matchedIds = $this->searchEntities( $term, $entityType, array( $lang ), $missing, true ); - $ids = array_unique( array_merge( $ids, $matchedIds ) ); + $searchResults = $this->searchEntities( $text, $entityType, $languageCode, $missing, $prefixSearch, $strictlanguage ); + foreach( $searchResults as $searchResult ) { + /** @var EntityId $entityId */ + $entityId = $searchResult['entityId']; + $entityIdString = $entityId->getSerialization(); + if( !array_key_exists( $entityIdString, $entityIdMap ) ) { + $allSearchResults[$nextSearchKey] = array( + 'entityId' => $entityIdString, + 'matchedTerms' => array( $searchResult['matchedTerm'] ), + 'displayTerms' => $searchResult['displayTerms'], + ); + $entityIdMap[$entityIdString] = $nextSearchKey; + $nextSearchKey++; + } else { + $allSearchResults[$entityIdMap[$entityIdString]]['matchedTerms'][] = $searchResult['matchedTerm']; + } + } } - - // Clip overflow, if any - $ids = array_slice( $ids, 0, $limit ); - - return $ids; + return $allSearchResults; } /** - * @param EntityId[] $entityIds - * @param string $search - * @param string[] $languages + * @param EntityId $entityId * - * @return array[] + * @return Term[] array with possible keys TermIndexEntry::TYPE_* */ - private function getEntries( array $entityIds, $search, $languages ) { - /** - * @var array[] $entries - */ - $entries = array(); - - //TODO: do not re-implement language fallback here! - //TODO: use EntityInfoBuilder, EntityInfoTermLookup, and LanguageFallbackLabelDescriptionLookup - foreach ( $entityIds as $id ) { - $key = $id->getSerialization(); - $title = $this->titleLookup->getTitleForId( $id ); - $entries[ $key ] = array( - 'id' => $id->getSerialization(), - 'url' => $title->getFullUrl() - ); + private function getDisplayTerms( EntityId $entityId ) { + $displayTerms = array(); + try{ + $displayTerms[TermIndexEntry::TYPE_LABEL] = $this->labelDescriptionLookup->getLabel( $entityId ); + } catch( OutOfBoundsException $e ) { + // Ignore + }; + try{ + $displayTerms[TermIndexEntry::TYPE_DESCRIPTION] = $this->labelDescriptionLookup->getDescription( $entityId ); + } catch( OutOfBoundsException $e ) { + // Ignore + }; + $aliasTerms = $this->getTermsFromTermIndexEntries( + $this->termIndex->getTermsOfEntity( + $entityId, + array( TermIndexEntry::TYPE_ALIAS ), + array( $this->getLanguage()->getCode() ) + ) + ); + if( !empty( $aliasTerms ) ) { + $displayTerms[TermIndexEntry::TYPE_ALIAS] = $aliasTerms; } + return $displayTerms; + } - $termTypes = array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_DESCRIPTION, TermIndexEntry::TYPE_ALIAS ); - - // Find all the remaining terms for the given entities - $terms = $this->termIndex->getTermsOfEntities( - $entityIds, $termTypes, $languages ); - // TODO: This needs to be rethought when a different search engine is used - $termPattern = '/^' . preg_quote( $search, '/' ) . '/i'; - - // ranks for fallback - $languageRanks = array_flip( $languages ); - $languageRanks[''] = PHP_INT_MAX; // no language is worst - - // track "best" language seen for each entity and term type - $bestLangPerSlot = array(); - - foreach ( $terms as $term ) { - $key = $term->getEntityId()->getSerialization(); - if ( !isset( $entries[$key] ) ) { - continue; - } - - $type = $term->getType(); - $bestLang = isset( $bestLangPerSlot[$key][$type] ) ? $bestLangPerSlot[$key][$type] : ''; - $currentLang = $term->getLanguage(); - - // we already have a "better" language for this slot - if ( $languageRanks[$bestLang] < $languageRanks[$currentLang] ) { - continue; - } - - $entry = $entries[$key]; - - switch ( $type ) { - case TermIndexEntry::TYPE_LABEL: - $entry['label'] = $term->getText(); - $bestLangPerSlot[$key][$type] = $currentLang; - break; - case TermIndexEntry::TYPE_DESCRIPTION: - $entry['description'] = $term->getText(); - $bestLangPerSlot[$key][$type] = $currentLang; - break; - case TermIndexEntry::TYPE_ALIAS: - // Only include matching aliases - if ( preg_match( $termPattern, $term->getText() ) ) { - if ( !isset( $entry['aliases'] ) ) { - $entry['aliases'] = array(); - ApiResult::setIndexedTagName( $entry['aliases'], 'alias' ); - } - $entry['aliases'][] = $term->getText(); - $bestLangPerSlot[$key][$type] = $currentLang; - } - break; - } - - $entries[$key] = $entry; + /** + * @param TermIndexEntry[] $termIndexEntries + * + * @return Term[] + */ + private function getTermsFromTermIndexEntries( array $termIndexEntries ) { + $terms = array(); + foreach( $termIndexEntries as $indexEntry ) { + $terms[] = $indexEntry->getTerm(); } - - //TODO: If we show a non-matching label for lang1 but the match was for the label in lang2, - // treat the lang2 label like an alias, so there is an indication what term matched. - - $entries = array_values( $entries ); - - return $entries; + return $terms; } /** @@ -370,7 +335,6 @@ public function execute() { $params = $this->extractRequestParams(); - //TODO: factor search logic out into a new class (TermSearchInteractor), re-use in SpecialTermDisambiguation. $entries = $this->getSearchEntries( $params ); $this->getResult()->addValue( @@ -426,7 +390,6 @@ * @see ApiBase::getAllowedParams */ protected function getAllowedParams() { - return array( 'search' => array( ApiBase::PARAM_TYPE => 'string', diff --git a/repo/tests/phpunit/includes/api/SearchEntitiesTest.php b/repo/tests/phpunit/includes/api/SearchEntitiesTest.php index 593950e..db554e6 100644 --- a/repo/tests/phpunit/includes/api/SearchEntitiesTest.php +++ b/repo/tests/phpunit/includes/api/SearchEntitiesTest.php @@ -4,15 +4,20 @@ use ApiMain; use FauxRequest; +use Language; use PHPUnit_Framework_TestCase; use RequestContext; use Title; use Wikibase\DataModel\Entity\EntityId; +use Wikibase\DataModel\Entity\ItemId; +use Wikibase\DataModel\Entity\PropertyId; +use Wikibase\DataModel\Term\Term; use Wikibase\Lib\ContentLanguages; use Wikibase\Lib\Store\EntityTitleLookup; -use Wikibase\Repo\WikibaseRepo; +use Wikibase\Lib\Store\LanguageLabelDescriptionLookup; +use Wikibase\Repo\Interactors\TermIndexSearchInteractor; +use Wikibase\Repo\Interactors\TermSearchInteractor; use Wikibase\TermIndexEntry; -use Wikibase\TermIndex; use Wikibase\Api\SearchEntities; use Wikibase\DataModel\Entity\BasicEntityIdParser; use Wikibase\Test\MockTermIndex; @@ -33,100 +38,6 @@ */ class SearchEntitiesTest extends PHPUnit_Framework_TestCase { - private static $terms = array( - 'Berlin' => array( - 'id' => 'Q64', - 'labels' => array( - array( 'language' => 'en', 'value' => 'Berlin' ), - array( 'language' => 'de', 'value' => 'Berlin' ), - ), - 'aliases' => array( - array( array( 'language' => 'de', 'value' => 'Dickes B' ) ), - ), - 'descriptions' => array( - array( 'language' => 'en', 'value' => 'Capital city and a federated state of the Federal Republic of Germany.' ), - array( 'language' => 'de', 'value' => 'Bundeshauptstadt und Regierungssitz der Bundesrepublik Deutschland.' ), - ), - ), - 'Bern' => array( - 'id' => 'Q45', - 'labels' => array( - array( 'language' => 'en', 'value' => 'Bern' ), - array( 'language' => 'de', 'value' => 'Bern' ), - array( 'language' => 'fr', 'value' => 'Berne' ), - array( 'language' => 'it', 'value' => 'Berna' ), - ), - 'aliases' => array( - ), - 'descriptions' => array( - array( 'language' => 'en', 'value' => 'City in Switzerland.' ), - array( 'language' => 'de', 'value' => 'Stadt in der Schweiz.' ), - ), - ), - 'Guangzhou' => array( - 'id' => 'Q231', - 'labels' => array( - array( 'language' => 'en', 'value' => 'Guangzhou' ), - array( 'language' => 'yue', 'value' => '廣州' ), - array( 'language' => 'zh-cn', 'value' => '广州市' ), - ), - 'aliases' => array( - ), - 'descriptions' => array( - array( 'language' => 'en', 'value' => 'Capital of Guangdong.' ), - array( 'language' => 'zh-hk', 'value' => '廣東的省會。' ), - ), - ), - 'X1' => array( - 'id' => 'Q1001', - 'labels' => array( - array( 'language' => 'en', 'value' => 'label:x1:en' ), - ), - 'aliases' => array( - array( array( 'language' => 'en', 'value' => 'alias1:x1:en' ) ), - ), - 'descriptions' => array( - array( 'language' => 'en', 'value' => 'description:x1:en' ), - ), - ), - 'X2' => array( - 'id' => 'Q1002', - 'labels' => array( - array( 'language' => 'en', 'value' => 'label:x2:en' ), - array( 'language' => 'de', 'value' => 'label:x2:de' ), - ), - 'aliases' => array( - array( array( 'language' => 'en', 'value' => 'alias1:x2:en' ) ), - ), - 'descriptions' => array( - array( 'language' => 'en', 'value' => 'description:x2:en' ), - ), - ), - 'X3' => array( - 'id' => 'Q1003', - 'labels' => array( - array( 'language' => 'en', 'value' => 'label:x3:en' ), - array( 'language' => 'de', 'value' => 'label:x3:de' ), - array( 'language' => 'de-ch', 'value' => 'label:x3:de-ch' ), - ), - 'aliases' => array( - array( array( 'language' => 'en', 'value' => 'alias1:x3:en' ) ), - array( array( 'language' => 'en', 'value' => 'description:x3:en' ) ), - array( array( 'language' => 'de', 'value' => 'description:x3:de' ) ), - array( array( 'language' => 'de-ch', 'value' => 'description:x3:de-ch' ) ), - ), - 'descriptions' => array( - array( 'language' => 'en', 'value' => 'description:x3:en' ), - array( 'language' => 'de', 'value' => 'description:x3:de' ), - array( 'language' => 'de-ch', 'value' => 'description:x3:de-ch' ), - ), - ), - ); - - private function getEntityId( $handle ) { - return self::$terms[$handle]['id']; - } - /** * @param array $params * @@ -134,95 +45,148 @@ */ private function getApiMain( array $params ) { $context = new RequestContext(); + $context->setLanguage( 'en-ca' ); $context->setRequest( new FauxRequest( $params, true ) ); - $main = new ApiMain( $context ); return $main; } /** - * @return EntityTitleLookup + * @return EntityTitleLookup|\PHPUnit_Framework_MockObject_MockObject */ - private function getTitleLookup() { + private function getMockTitleLookup() { $titleLookup = $this->getMock( 'Wikibase\Lib\Store\EntityTitleLookup' ); + $testCase = $this; $titleLookup->expects( $this->any() )->method( 'getTitleForId' ) - ->will( $this->returnCallback( function( EntityId $id ) { - $title = Title::makeTitle( NS_MAIN, $id->getEntityType() . ':' . $id->getSerialization() ); - $title->resetArticleID( $id->getNumericId() ); - return $title; - } ) ); - - return $titleLookup; - } - - /** - * @return ContentLanguages - */ - private function getContentLanguages() { - $titleLookup = $this->getMock( 'Wikibase\Lib\ContentLanguages' ); - $titleLookup->expects( $this->any() )->method( 'getLanguages' ) - ->will( $this->returnValue( array( 'de', 'de-ch', 'en', 'ii', 'nn', 'ru', 'zh-cn' ) ) ); - - return $titleLookup; - } - - /** - * @return TermIndex - */ - private function getTermIndex() { - $idParser = new BasicEntityIdParser(); - $termObjects = array(); - foreach ( self::$terms as $entity ) { - $id = $idParser->parse( $entity['id'] ); - - foreach ( $entity['labels'] as $row ) { - $termObjects[] = $this->newTermFromDataRow( $id, TermIndexEntry::TYPE_LABEL, $row ); - } - - foreach ( $entity['descriptions'] as $row ) { - $termObjects[] = $this->newTermFromDataRow( $id, TermIndexEntry::TYPE_DESCRIPTION, $row ); - } - - foreach ( $entity['aliases'] as $rows ) { - foreach ( $rows as $row ) { - $termObjects[] = $this->newTermFromDataRow( $id, TermIndexEntry::TYPE_ALIAS, $row ); + ->will( $this->returnCallback( function( EntityId $id ) use ( $testCase ) { + if( $id->getSerialization() === 'Q111' ) { + return $testCase->getMockTitle( true ); + } else { + return $testCase->getMockTitle( false ); } - } - } - - $termIndex = new MockTermIndex( $termObjects ); - - return $termIndex; + } ) ); + return $titleLookup; } - private function newTermFromDataRow( EntityId $entityId, $type, $row ) { - return new TermIndexEntry( array( - 'termType' => $type, - 'termLanguage' => $row['language'], - 'termText' => $row['value'], - 'entityType' => $entityId->getEntityType(), - 'entityId' => $entityId->getNumericId() - ) ); + /** + * @param bool $exists + * + * @return Title|\PHPUnit_Framework_MockObject_MockObject + */ + private function getMockTitle( $exists ) { + $mock = $this->getMockBuilder( '\Title' ) + ->disableOriginalConstructor() + ->getMock(); + $mock->expects( $this->any() ) + ->method( 'exists' ) + ->will( $this->returnValue( $exists ) ); + $mock->expects( $this->any() ) + ->method( 'getFullUrl' ) + ->will( $this->returnValue( 'http://fullTitleUrl' ) ); + return $mock; + } + + /** + * @return ContentLanguages|\PHPUnit_Framework_MockObject_MockObject + */ + private function getMockContentLanguages() { + $contentLanguages = $this->getMock( 'Wikibase\Lib\ContentLanguages' ); + $contentLanguages->expects( $this->any() )->method( 'getLanguages' ) + ->will( $this->returnValue( array( 'de', 'de-ch', 'en', 'ii', 'nn', 'ru', 'zh-cn' ) ) ); + return $contentLanguages; } /** * @param array $params + * @param array $returnResults + * + * @return TermIndexSearchInteractor|\PHPUnit_Framework_MockObject_MockObject + */ + private function getMockSearchInteractor( $params, $returnResults = array() ) { + $mock = $this->getMockBuilder( 'Wikibase\Repo\Interactors\TermIndexSearchInteractor' ) + ->disableOriginalConstructor() + ->getMock(); + $mock->expects( $this->atLeastOnce() ) + ->method( 'searchForTerms' ) + ->with( + $this->equalTo( $params['search'] ), + $this->equalTo( array( $params['language'] ) ), + $this->equalTo( $params['type'] ), + $this->equalTo( array( TermIndexEntry::TYPE_LABEL, TermIndexEntry::TYPE_DESCRIPTION ) ) + ) + ->will( $this->returnValue( $returnResults ) ); + return $mock; + } + + /** + * Get a lookup that always returns a pt label and description suffixed by the entity ID + * + * @return LanguageLabelDescriptionLookup + */ + private function getMockLabelDescriptionLookup() { + $mock = $this->getMockBuilder( 'Wikibase\Lib\Store\LabelDescriptionLookup' ) + ->disableOriginalConstructor() + ->getMock(); + $mock->expects( $this->any() ) + ->method( 'getLabel' ) + ->will( $this->returnValue( new Term( 'pt', 'ptLabel' ) ) ); + $mock->expects( $this->any() ) + ->method( 'getDescription' ) + ->will( $this->returnValue( new Term( 'pt', 'ptDescription' ) ) ); + return $mock; + } + + /** + * @param string $text + * @param string $languageCode + * @param string $termType + * @param EntityId|ItemId|PropertyId $entityId + * + * @returns TermIndexEntry + */ + private function getTermIndexEntry( $text, $languageCode, $termType, EntityId $entityId ) { + return new TermIndexEntry( array( + 'termText' => $text, + 'termLanguage' => $languageCode, + 'termType' => $termType, + 'entityId' => $entityId->getNumericId(), + 'entityType' => $entityId->getEntityType(), + ) ); + } + + private function getMockTermIndex() { + return new MockTermIndex( + array( + // Only have a single alias + $this->getTermIndexEntry( 'Foooooo', 'en-ca', TermIndexEntry::TYPE_ALIAS, new ItemId( 'Q111' ) ), + ) + ); + } + + /** + * @param array $params + * @param TermSearchInteractor|null $searchInteractor * * @return array[] */ - private function callApiModule( array $params ) { + private function callApiModule( array $params, $searchInteractor = null ) { $module = new SearchEntities( $this->getApiMain( $params ), 'wbsearchentities' ); + if( $searchInteractor == null ) { + $searchInteractor = $this->getMockSearchInteractor( $params ); + } + $module->setServices( - $this->getTermIndex(), - $this->getTitleLookup(), + $this->getMockTitleLookup(), new BasicEntityIdParser(), array( 'item', 'property' ), - $this->getContentLanguages(), - WikibaseRepo::getDefaultInstance()->getLanguageFallbackChainFactory() + $this->getMockContentLanguages(), + $searchInteractor, + $this->getMockTermIndex(), + $this->getMockLabelDescriptionLookup() ); $module->execute(); @@ -235,96 +199,137 @@ ) ); } - public function provideData() { + public function provideBooleanValues() { return array( - //Search via full Labels - 'en:Berlin' => array( array( 'search' => 'Berlin', 'language' => 'en' ), array( array( 'label' => 'Berlin' ) ) ), - 'en:bERliN' => array( array( 'search' => 'bERliN', 'language' => 'en' ), array( array( 'label' => 'Berlin' ) ) ), - 'zh-cn:广州市' => array( array( 'search' => '广州市', 'language' => 'zh-cn' ), array( array( 'label' => '广州市' ) ) ), - - //Search via partial Labels - 'de:Guang' => array( array( 'search' => 'Guang', 'language' => 'de' ), array( array( 'label' => 'Guangzhou' ) ) ), - 'zh-cn:广' => array( array( 'search' => '广', 'language' => 'zh-cn' ), array( array( 'label' => '广州市' ) ) ), - - //Match alias - 'de:Dickes' => array( array( 'search' => 'Dickes', 'language' => 'de' ), array( array( 'label' => 'Berlin', 'aliases' => array( 'Dickes B' ) ) ) ), - - //Multi-match language fallback - 'de:x' => array( array( 'search' => 'alias1:x', 'language' => 'de-ch' ), array( - array( 'label' => 'label:x1:en' ), - array( 'label' => 'label:x2:de' ), - array( 'label' => 'label:x3:de-ch' ), - ) ), + array( true ), + array( false ), ); } /** - * @dataProvider provideData + * @dataProvider provideBooleanValues */ - public function testSearchEntities( $params, $expected ) { - $params['action'] = 'wbsearchentities'; - - $result = $this->callApiModule( $params ); - - $this->assertResultLooksGood( $result ); - $this->assertResultSet( $expected, $result['search'] ); - } - - public function testSearchExactMatch() { + public function testSearchStrictLanguage_passedToSearchInteractor( $boolean ) { $params = array( 'action' => 'wbsearchentities', - 'search' => $this->getEntityId( 'Berlin' ), + 'search' => 'Foo', + 'type' => 'item', + 'language' => 'de-ch', + ); + if( $boolean ) { + $params['strictlanguage'] = true; + } + + $searchInteractor = $this->getMockSearchInteractor( $params ); + $searchInteractor->expects( $this->atLeastOnce() ) + ->method( 'setUseLanguageFallback' ) + ->with( $this->equalTo( !$boolean ) ); + + $this->callApiModule( $params, $searchInteractor ); + } + + public function provideTestSearchEntities() { + $multipleInteractorReturnValues = array( + array( + 'entityId' => new ItemId( 'Q222' ), + 'matchedTerm' => new Term( 'en-gb', 'Fooooo' ), + 'displayTerms' => array( + TermIndexEntry::TYPE_LABEL => new Term( 'en-gb', 'FooHeHe' ), + TermIndexEntry::TYPE_DESCRIPTION => new Term( 'en', 'FooHeHe en description' ), + ), + ), + array( + 'entityId' => new ItemId( 'Q222' ), + 'matchedTerm' => new Term( 'en-gb', 'FoooooSecondMatch' ), + 'displayTerms' => array( + TermIndexEntry::TYPE_LABEL => new Term( 'en-gb', 'FooHeHe' ), + TermIndexEntry::TYPE_DESCRIPTION => new Term( 'en', 'FooHeHe en description' ), + ), + ), + array( + 'entityId' => new ItemId( 'Q333' ), + 'matchedTerm' => new Term( 'de', 'AMatchedTerm' ), + 'displayTerms' => array( + TermIndexEntry::TYPE_LABEL => new Term( 'fr', 'ADisplayLabel' ), + TermIndexEntry::TYPE_ALIAS => array( + new Term( 'de', 'Alias1' ), + new Term( 'de', 'Alias2' ), + ), + ), + ), + ); + $q222Result = array( + 'id' => 'Q222', + 'url' => 'http://fullTitleUrl', + TermIndexEntry::TYPE_LABEL => 'FooHeHe', + TermIndexEntry::TYPE_DESCRIPTION => 'FooHeHe en description', + 'matched' => array( 'Fooooo', 'FoooooSecondMatch' ), + ); + $q333Result = array( + 'id' => 'Q333', + 'url' => 'http://fullTitleUrl', + TermIndexEntry::TYPE_LABEL => 'ADisplayLabel', + TermIndexEntry::TYPE_ALIAS => array( 'Alias1', 'Alias2' ), + 'matched' => array( 'AMatchedTerm' ), + ); + return array( + 'No exact match' => array( + array( 'search' => 'Q999' ), + array(), + array(), + ), + 'Exact EntityId match' => array( + array( 'search' => 'Q111' ), + array(), + array( + array( + 'id' => 'Q111', + 'url' => 'http://fullTitleUrl', + 'label' => 'ptLabel', + 'description' => 'ptDescription', + 'alias' => array( 'Foooooo' ), + 'matched' => array( 'Q111' ), + ), + ), + ), + 'Multiple Results' => array( + array(), + $multipleInteractorReturnValues, + array( $q222Result, $q333Result ), + ), + 'Multiple Results (limited)' => array( + array( 'limit' => 1 ), + $multipleInteractorReturnValues, + array( $q222Result ), + ), + 'Multiple Results (limited-continue)' => array( + array( 'limit' => 1, 'continue' => 1 ), + $multipleInteractorReturnValues, + array( $q333Result ), + ), + ); + } + + /** + * @dataProvider provideTestSearchEntities + */ + public function testSearchEntities( array $overrideParams, array $interactorReturn, array $expected ) { + $params = array( + 'action' => 'wbsearchentities', + 'search' => 'Foo', + 'type' => 'item', 'language' => 'en' ); + foreach( $overrideParams as $key => $param ) { + $params[$key] = $param; + } - $expected = array( array( - 'label' => 'Berlin', - 'description' => 'Capital city and a federated state of the Federal Republic of Germany.', - ) ); + $searchInteractor = $this->getMockSearchInteractor( $params, $interactorReturn ); - $result = $this->callApiModule( $params ); - $this->assertResultSet( $expected, $result['search'] ); - } + $result = $this->callApiModule( $params, $searchInteractor ); - - public function testSearchFallback() { - $params = array( - 'action' => 'wbsearchentities', - 'search' => 'BERN', - 'language' => 'de-ch', - ); - - $result = $this->callApiModule( $params ); - $this->assertCount( 1, $result['search'] ); - - $resultEntry = reset( $result['search'] ); - $this->assertEquals( 'Bern', $resultEntry['label'] ); - $this->assertEquals( 'Stadt in der Schweiz.', $resultEntry['description'] ); - } - - public function testSearchStrictLanguage() { - $params = array( - 'action' => 'wbsearchentities', - 'search' => 'Berlin', - 'language' => 'de-ch', - 'strictlanguage' => true - ); - - $result = $this->callApiModule( $params ); - $this->assertEmpty( $result['search'] ); - } - - public function testSearchContinue() { - $params = array( - 'action' => 'wbsearchentities', - 'search' => 'B', - 'language' => 'de', - 'limit' => 1 - ); - - $result = $this->callApiModule( $params ); - - $this->assertArrayHasKey( 'search-continue', $result ); + $this->assertResultLooksGood( $result ); + $this->assertEquals( $expected, $result['search'] ); } private function assertResultLooksGood( $result ) { @@ -337,30 +342,6 @@ $this->assertArrayHasKey( 'id', $searchresult ); $this->assertArrayHasKey( 'url', $searchresult ); } - - } - - private function assertResultSet( $expected, $actual ) { - reset( $actual ); - foreach ( $expected as $expectedEntry ) { - $actualEntry = current( $actual ); - next( $actual ); - - $this->assertTrue( $actualEntry !== false, 'missing result entry ' . var_export( $expectedEntry, true ) ); - $this->assertResultEntry( $expectedEntry, $actualEntry ); - } - - $actualEntry = next( $actual ); - $this->assertFalse( $actualEntry, 'extra result entry ' . var_export( $actualEntry, true ) ); - } - - private function assertResultEntry( $expected, $actual ) { - $actual = array_intersect_key( $actual, $expected ); - - ksort( $expected ); - ksort( $actual ); - - $this->assertEquals( $expected, $actual ); } } -- To view, visit https://gerrit.wikimedia.org/r/219168 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iedef04773521e295946121f134db074d26676c2e Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Addshore <addshorew...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits