Daniel Kinzler has uploaded a new change for review. https://gerrit.wikimedia.org/r/179905
Change subject: Trim redundant code from TermIndex ...................................................................... Trim redundant code from TermIndex DEPLOYMENT: heads up to ops: keep an eye on wb_terms, some quries are now constructed differently. This affects only queries via Special:ItemDisambiguation, not wbentitysearch, so it should not have much impact. Change-Id: I47098dd3163a2a7962f2d14d6181181793b9e501 --- M lib/includes/store/TermIndex.php M lib/includes/store/sql/TermSqlIndex.php M lib/tests/phpunit/store/TermIndexTest.php M repo/includes/specials/SpecialItemDisambiguation.php 4 files changed, 95 insertions(+), 259 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/05/179905/1 diff --git a/lib/includes/store/TermIndex.php b/lib/includes/store/TermIndex.php index 4550337..7c6cc9b 100644 --- a/lib/includes/store/TermIndex.php +++ b/lib/includes/store/TermIndex.php @@ -17,22 +17,6 @@ interface TermIndex extends LabelConflictFinder { /** - * Returns the type, id tuples for the entities with the provided label in the specified language. - * - * @since 0.1 - * - * @param string $label - * @param string|null $languageCode - * @param string|null $entityType - * @param bool $fuzzySearch if false, only exact matches are returned, otherwise more relaxed search . Defaults to false. - * - * @return EntityId[] - * - * TODO: update to use Term interface - */ - public function getEntityIdsForLabel( $label, $languageCode = null, $entityType = null, $fuzzySearch = false ); - - /** * Saves the terms of the provided entity in the term cache. * * @since 0.1 @@ -77,24 +61,12 @@ * @param string $entityType * @param string[]|null $termTypes The types of terms to return, e.g. "label", "description", * or "alias". Compare the Term::TYPE_XXX constants. If null, all types are returned. + * @param string[]|null $languageCodes The desired languages, given as language codes. + * If null, all languages are returned. * * @return Term[] */ public function getTermsOfEntities( array $ids, $entityType, array $termTypes = null, array $languageCodes = null ); - - /** - * Returns if a term with the specified parameters exists. - * - * @since 0.1 - * - * @param string $termValue - * @param string|null $termType - * @param string|null $termLanguage Language code - * @param string|null $entityType - * - * @return boolean - */ - public function termExists( $termValue, $termType = null, $termLanguage = null, $entityType = null ); /** * Returns the terms that match the provided conditions. diff --git a/lib/includes/store/sql/TermSqlIndex.php b/lib/includes/store/sql/TermSqlIndex.php index 38248d2..21ae5b2 100644 --- a/lib/includes/store/sql/TermSqlIndex.php +++ b/lib/includes/store/sql/TermSqlIndex.php @@ -383,51 +383,13 @@ * @return Term[] */ public function getTermsOfEntity( EntityId $entityId, array $termTypes = null, array $languageCodes = null ) { - if ( $termTypes !== null && empty( $termTypes ) ) { - return array(); - } - - if ( $languageCodes !== null && empty( $languageCodes ) ) { - return array(); - } - - wfProfileIn( __METHOD__ ); - - $conditions = array( - 'term_entity_id' => $entityId->getNumericId(), - 'term_entity_type' => $entityId->getEntityType() - ); - - if ( $termTypes !== null ) { - $conditions['term_type'] = $termTypes; - } - - if ( $languageCodes !== null ) { - $conditions['term_language'] = $languageCodes; - } - $fields = array( 'term_language', 'term_type', 'term_text', ); - $dbr = $this->getReadDb(); - - $res = $dbr->select( - $this->tableName, - $fields, - $conditions, - __METHOD__ - ); - - $terms = $this->buildTermResult( $res ); - - $this->releaseConnection( $dbr ); - - wfProfileOut( __METHOD__ ); - - return $terms; + return $this->fetchTerms( array( $entityId ), $entityId->getEntityType(), $fields, $termTypes, $languageCodes ); } /** @@ -444,6 +406,28 @@ * @return Term[] */ public function getTermsOfEntities( array $entityIds, $entityType, array $termTypes = null, array $languageCodes = null ) { + $fields = array( + 'term_entity_id', + 'term_entity_type', + 'term_language', + 'term_type', + 'term_text', + ); + + return $this->fetchTerms( $entityIds, $entityType, $fields, $termTypes, $languageCodes ); + } + + /** + * @param EntityId[] $entityIds + * @param string|null $entityType + * @param string[] $fields + * @param string[]|null $termTypes + * @param string[]|null $languageCodes + * + * @throws \MWException + * @return array + */ + private function fetchTerms( array $entityIds, $entityType, array $fields, array $termTypes = null, array $languageCodes = null ) { if ( $entityIds !== null && empty( $entityIds ) ) { return array(); } @@ -472,7 +456,9 @@ $numericIds = array(); foreach ( $entityIds as $entityId ) { - if ( $entityId->getEntityType() !== $entityType ) { + if( $entityType === null ) { + $entityType = $entityId->getEntityType(); + } elseif ( $entityId->getEntityType() !== $entityType ) { throw new MWException( 'ID ' . $entityId->getSerialization() . " does not refer to an entity of type $entityType." ); } @@ -481,14 +467,6 @@ } $entityIdentifiers['term_entity_id'] = $numericIds; - - $fields = array( - 'term_entity_id', - 'term_entity_type', - 'term_language', - 'term_type', - 'term_text', - ); $dbr = $this->getReadDb(); @@ -528,116 +506,6 @@ */ public function getWriteDb() { return $this->getConnection( DB_MASTER ); - } - - /** - * @see TermIndex::termExists - * - * @since 0.1 - * - * @param string $termValue - * @param string|null $termType - * @param string|null $termLanguage Language code - * @param string|null $entityType - * - * @return boolean - */ - public function termExists( $termValue, $termType = null, $termLanguage = null, $entityType = null ) { - wfProfileIn( __METHOD__ ); - - $conditions = array( - 'term_text' => $termValue, - ); - - if ( $termType !== null ) { - $conditions['term_type'] = $termType; - } - - if ( $termLanguage !== null ) { - $conditions['term_language'] = $termLanguage; - } - - if ( $entityType !== null ) { - $conditions['term_entity_type'] = $entityType; - } - - $dbr = $this->getReadDb(); - - $result = $dbr->selectRow( - $this->tableName, - array( - 'term_entity_id', - ), - $conditions, - __METHOD__ - ); - - $this->releaseConnection( $dbr ); - - wfProfileOut( __METHOD__ ); - - return $result !== false; - } - - /** - * @see TermIndex::getEntityIdsForLabel - * - * @since 0.1 - * - * @param string $label - * @param string|null $languageCode - * @param string|null $entityType - * @param bool $fuzzySearch if false, only exact matches are returned, otherwise more relaxed search . Defaults to false. - * - * @return EntityId[] - */ - public function getEntityIdsForLabel( $label, $languageCode = null, $entityType = null, $fuzzySearch = false ) { - wfProfileIn( __METHOD__ ); - - $fuzzySearch = false; // TODO switched off for now until we have a solution for limiting the results - $db = $this->getReadDb(); - - $tables = array( 'terms0' => $this->tableName ); - - $conds = array( 'terms0.term_type' => Term::TYPE_LABEL ); - if ( $fuzzySearch ) { - $conds[] = 'terms0.term_text' . $db->buildLike( $label, $db->anyString() ); - } else { - $conds['terms0.term_text'] = $label; - } - - $joinConds = array(); - - if ( !is_null( $languageCode ) ) { - $conds['terms0.term_language'] = $languageCode; - } - - if ( !is_null( $entityType ) ) { - $conds['terms0.term_entity_type'] = $entityType; - } - - $entities = $db->select( - $tables, - array( 'terms0.term_entity_id', 'terms0.term_entity_type' ), - $conds, - __METHOD__, - array( 'DISTINCT' ), - $joinConds - ); - - $this->releaseConnection( $db ); - - $entityIds = array_map( - function( $entity ) { - // FIXME: this only works for items and properties - return LegacyIdInterpreter::newIdFromTypeAndNumber( $entity->term_entity_type, $entity->term_entity_id ); - }, - iterator_to_array( $entities ) - ); - - wfProfileOut( __METHOD__ ); - - return $entityIds; } /** diff --git a/lib/tests/phpunit/store/TermIndexTest.php b/lib/tests/phpunit/store/TermIndexTest.php index 455a1fa..b206372 100644 --- a/lib/tests/phpunit/store/TermIndexTest.php +++ b/lib/tests/phpunit/store/TermIndexTest.php @@ -27,7 +27,7 @@ */ public abstract function getTermIndex(); - public function testGetEntityIdsForLabel() { + public function testGetMatchingIDs() { $lookup = $this->getTermIndex(); $item0 = Item::newEmpty(); @@ -47,65 +47,24 @@ $item1->setDescription( 'en', 'foo bar baz' ); $lookup->saveTermsOfEntity( $item1 ); - $ids = $lookup->getEntityIdsForLabel( 'foobar' ); + $foobar = new Term( array( 'termType' => Term::TYPE_LABEL, 'termText' => 'foobar' ) ); + $bazNl= new Term( array( 'termType' => Term::TYPE_LABEL, 'termText' => 'baz', 'termLanguage' => 'nl' ) ); + $froggerNl = new Term( array( 'termType' => Term::TYPE_LABEL, 'termText' => 'o_O', 'termLanguage' => 'nl' ) ); + + $ids = $lookup->getMatchingIDs( array( $foobar ), Item::ENTITY_TYPE ); $this->assertInternalType( 'array', $ids ); $this->assertContainsOnlyInstancesOf( '\Wikibase\DataModel\Entity\ItemId', $ids ); $this->assertArrayEquals( array( $id0, $id1 ), $ids ); - $ids = $lookup->getEntityIdsForLabel( 'baz', 'nl' ); + $ids = $lookup->getMatchingIDs( array( $bazNl ), Item::ENTITY_TYPE ); $this->assertInternalType( 'array', $ids ); $this->assertContainsOnlyInstancesOf( '\Wikibase\DataModel\Entity\ItemId', $ids ); $this->assertArrayEquals( array( $id0 ), $ids ); - $ids = $lookup->getEntityIdsForLabel( 'o_O', 'nl' ); + $ids = $lookup->getMatchingIDs( array( $froggerNl ), Item::ENTITY_TYPE ); $this->assertInternalType( 'array', $ids ); $this->assertContainsOnlyInstancesOf( '\Wikibase\DataModel\Entity\ItemId', $ids ); $this->assertArrayEquals( array( $id1 ), $ids ); - } - - public function testTermExists() { - $lookup = $this->getTermIndex(); - - $item = Item::newEmpty(); - $item->setId( new ItemId( 'Q1234' ) ); - - $item->setLabel( 'en', 'foobarz' ); - $item->setLabel( 'de', 'foobarz' ); - $item->setLabel( 'nl', 'bazz' ); - $item->setDescription( 'en', 'foobarz' ); - $item->setDescription( 'fr', 'fooz barz bazz' ); - $item->setAliases( 'nl', array( 'a42', 'b42', 'c42' ) ); - - $lookup->saveTermsOfEntity( $item ); - - $this->assertFalse( $lookup->termExists( 'foobarz', 'does-not-exist' ) ); - $this->assertFalse( $lookup->termExists( 'foobarz', null, 'does-not-exist' ) ); - $this->assertFalse( $lookup->termExists( 'foobarz', null, null, 'does-not-exist' ) ); - - $this->assertTrue( $lookup->termExists( 'foobarz' ) ); - $this->assertTrue( $lookup->termExists( 'foobarz', Term::TYPE_LABEL ) ); - $this->assertTrue( $lookup->termExists( 'foobarz', Term::TYPE_LABEL, 'en' ) ); - $this->assertTrue( $lookup->termExists( 'foobarz', Term::TYPE_LABEL, 'de' ) ); - $this->assertTrue( $lookup->termExists( 'foobarz', Term::TYPE_LABEL, 'de', $item::ENTITY_TYPE ) ); - - $this->assertFalse( $lookup->termExists( 'foobarz', Term::TYPE_LABEL, 'de', Property::ENTITY_TYPE ) ); - $this->assertFalse( $lookup->termExists( 'foobarz', Term::TYPE_LABEL, 'nl' ) ); - $this->assertFalse( $lookup->termExists( 'foobarz', Term::TYPE_DESCRIPTION, 'de' ) ); - $this->assertFalse( $lookup->termExists( 'foobarz', Term::TYPE_DESCRIPTION, null, Property::ENTITY_TYPE ) ); - $this->assertFalse( $lookup->termExists( 'dzxfzdtrgfdrtgryfth', Term::TYPE_LABEL ) ); - - $this->assertTrue( $lookup->termExists( 'foobarz', Term::TYPE_DESCRIPTION ) ); - $this->assertTrue( $lookup->termExists( 'foobarz', Term::TYPE_DESCRIPTION, 'en' ) ); - $this->assertFalse( $lookup->termExists( 'foobarz', Term::TYPE_DESCRIPTION, 'fr' ) ); - - $this->assertFalse( $lookup->termExists( 'a42', Term::TYPE_DESCRIPTION ) ); - $this->assertFalse( $lookup->termExists( 'b42', Term::TYPE_LABEL ) ); - $this->assertTrue( $lookup->termExists( 'a42' ) ); - $this->assertTrue( $lookup->termExists( 'b42' ) ); - $this->assertTrue( $lookup->termExists( 'a42', Term::TYPE_ALIAS ) ); - $this->assertTrue( $lookup->termExists( 'b42', Term::TYPE_ALIAS ) ); - $this->assertFalse( $lookup->termExists( 'b42', Term::TYPE_ALIAS, 'de' ) ); - $this->assertTrue( $lookup->termExists( 'b42', null, 'nl' ) ); } public function testGetMatchingTerms() { @@ -251,13 +210,14 @@ $item->setId( $id ); $lookup->saveTermsOfEntity( $item ); - $this->assertTrue( $lookup->termExists( 'testDeleteTermsForEntity' ) ); + $this->assertTermExists( $lookup, 'testDeleteTermsForEntity' ); $this->assertTrue( $lookup->deleteTermsOfEntity( $item->getId() ) !== false ); - $this->assertFalse( $lookup->termExists( 'testDeleteTermsForEntity' ) ); + $this->assertNotTermExists( $lookup, 'testDeleteTermsForEntity' ); - $ids = $lookup->getEntityIdsForLabel( 'abc' ); + $abc = new Term( array( 'termType' => Term::TYPE_LABEL, 'termText' => 'abc' ) ); + $ids = $lookup->getMatchingIDs( array( $abc ), Item::ENTITY_TYPE ); $this->assertNotContains( $id, $ids ); } @@ -276,77 +236,77 @@ $this->assertTrue( $lookup->saveTermsOfEntity( $item ) ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'testDeleteTermsForEntity', Term::TYPE_DESCRIPTION, 'en', Item::ENTITY_TYPE - ) ); + ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'ghi', Term::TYPE_LABEL, 'nl', Item::ENTITY_TYPE - ) ); + ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'o', Term::TYPE_ALIAS, 'fr', Item::ENTITY_TYPE - ) ); + ); // save again - this should hit an optimized code path // that avoids re-saving the terms if they are the same as before. $this->assertTrue( $lookup->saveTermsOfEntity( $item ) ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'testDeleteTermsForEntity', Term::TYPE_DESCRIPTION, 'en', Item::ENTITY_TYPE - ) ); + ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'ghi', Term::TYPE_LABEL, 'nl', Item::ENTITY_TYPE - ) ); + ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'o', Term::TYPE_ALIAS, 'fr', Item::ENTITY_TYPE - ) ); + ); // modify and save again - this should NOT skip saving, // and make sure the modified term is in the database. $item->setLabel( 'nl', 'xyz' ); $this->assertTrue( $lookup->saveTermsOfEntity( $item ) ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'testDeleteTermsForEntity', Term::TYPE_DESCRIPTION, 'en', Item::ENTITY_TYPE - ) ); + ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'xyz', Term::TYPE_LABEL, 'nl', Item::ENTITY_TYPE - ) ); + ); - $this->assertTrue( $lookup->termExists( + $this->assertTermExists( $lookup, 'o', Term::TYPE_ALIAS, 'fr', Item::ENTITY_TYPE - ) ); + ); } public function testUpdateTermsOfEntity() { @@ -654,4 +614,23 @@ "expected to find $k in terms for item" ); } + protected function assertTermExists( TermIndex $termIndex, $text, $termType = null, $language = null, $entityType = null ) { + $this->assertTrue( $this->termExists( $termIndex, $text, $termType, $language, $entityType ) ); + } + + protected function assertNotTermExists( TermIndex $termIndex, $text, $termType = null, $language = null, $entityType = null ) { + $this->assertFalse( $this->termExists( $termIndex, $text, $termType, $language, $entityType ) ); + } + + private function termExists( TermIndex $termIndex, $text, $termType = null, $language = null, $entityType = null ) { + $termFields = array(); + $termFields['termText'] = $text; + + if ( $language !== null ) { + $termFields['termLanguage'] = $language; + } + + $matches = $termIndex->getMatchingTerms( array( new Term( $termFields ) ), $termType, $entityType ); + return !empty( $matches ); + } } diff --git a/repo/includes/specials/SpecialItemDisambiguation.php b/repo/includes/specials/SpecialItemDisambiguation.php index 3f8c9ef..b34cd6c 100644 --- a/repo/includes/specials/SpecialItemDisambiguation.php +++ b/repo/includes/specials/SpecialItemDisambiguation.php @@ -14,6 +14,7 @@ use Wikibase\Lib\Store\EntityTitleLookup; use Wikibase\Lib\Store\LanguageLabelLookup; use Wikibase\Repo\WikibaseRepo; +use Wikibase\Term; use Wikibase\TermIndex; /** @@ -273,7 +274,23 @@ * @return Item[] */ private function findLabelUsage( $languageCode, $label ) { - $entityIds = $this->termIndex->getEntityIdsForLabel( $label, $languageCode, Item::ENTITY_TYPE, true ); + //@todo: optionally + $protoTerms = array( + new Term( array( + 'termType' => Term::TYPE_LABEL, + 'termLanguage' => $languageCode, + 'termText' => $label + ) ), + ); + + //@todo: expose options + $options = array( + 'caseSensitive' => true, + 'prefixSearch' => false, + 'LIMIT' => $this->limit + ); + + $entityIds = $this->termIndex->getMatchingIDs( $protoTerms, Item::ENTITY_TYPE, $options ); $entities = array(); $count = 0; -- To view, visit https://gerrit.wikimedia.org/r/179905 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I47098dd3163a2a7962f2d14d6181181793b9e501 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits