Daniel Kinzler has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/179905

Change subject: Trim redundant code from TermIndex
......................................................................

Trim redundant code from TermIndex

DEPLOYMENT: heads up to ops: keep an eye on wb_terms,
some quries are now constructed differently. This
affects only queries via Special:ItemDisambiguation,
not wbentitysearch, so it should not have much impact.

Change-Id: I47098dd3163a2a7962f2d14d6181181793b9e501
---
M lib/includes/store/TermIndex.php
M lib/includes/store/sql/TermSqlIndex.php
M lib/tests/phpunit/store/TermIndexTest.php
M repo/includes/specials/SpecialItemDisambiguation.php
4 files changed, 95 insertions(+), 259 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/05/179905/1

diff --git a/lib/includes/store/TermIndex.php b/lib/includes/store/TermIndex.php
index 4550337..7c6cc9b 100644
--- a/lib/includes/store/TermIndex.php
+++ b/lib/includes/store/TermIndex.php
@@ -17,22 +17,6 @@
 interface TermIndex extends LabelConflictFinder {
 
        /**
-        * Returns the type, id tuples for the entities with the provided label 
in the specified language.
-        *
-        * @since 0.1
-        *
-        * @param string $label
-        * @param string|null $languageCode
-        * @param string|null $entityType
-        * @param bool $fuzzySearch if false, only exact matches are returned, 
otherwise more relaxed search . Defaults to false.
-        *
-        * @return EntityId[]
-        *
-        * TODO: update to use Term interface
-        */
-       public function getEntityIdsForLabel( $label, $languageCode = null, 
$entityType = null, $fuzzySearch = false );
-
-       /**
         * Saves the terms of the provided entity in the term cache.
         *
         * @since 0.1
@@ -77,24 +61,12 @@
         * @param string $entityType
         * @param string[]|null $termTypes The types of terms to return, e.g. 
"label", "description",
         *        or "alias". Compare the Term::TYPE_XXX constants. If null, 
all types are returned.
+        * @param string[]|null $languageCodes The desired languages, given as 
language codes.
+        *        If null, all languages are returned.
         *
         * @return Term[]
         */
        public function getTermsOfEntities( array $ids, $entityType, array 
$termTypes = null, array $languageCodes = null );
-
-       /**
-        * Returns if a term with the specified parameters exists.
-        *
-        * @since 0.1
-        *
-        * @param string $termValue
-        * @param string|null $termType
-        * @param string|null $termLanguage Language code
-        * @param string|null $entityType
-        *
-        * @return boolean
-        */
-       public function termExists( $termValue, $termType = null, $termLanguage 
= null, $entityType = null );
 
        /**
         * Returns the terms that match the provided conditions.
diff --git a/lib/includes/store/sql/TermSqlIndex.php 
b/lib/includes/store/sql/TermSqlIndex.php
index 38248d2..21ae5b2 100644
--- a/lib/includes/store/sql/TermSqlIndex.php
+++ b/lib/includes/store/sql/TermSqlIndex.php
@@ -383,51 +383,13 @@
         * @return Term[]
         */
        public function getTermsOfEntity( EntityId $entityId, array $termTypes 
= null, array $languageCodes = null ) {
-               if ( $termTypes !== null && empty( $termTypes ) ) {
-                       return array();
-               }
-
-               if ( $languageCodes !== null && empty( $languageCodes ) ) {
-                       return array();
-               }
-
-               wfProfileIn( __METHOD__ );
-
-               $conditions = array(
-                       'term_entity_id' => $entityId->getNumericId(),
-                       'term_entity_type' => $entityId->getEntityType()
-               );
-
-               if ( $termTypes !== null ) {
-                       $conditions['term_type'] = $termTypes;
-               }
-
-               if ( $languageCodes !== null ) {
-                       $conditions['term_language'] = $languageCodes;
-               }
-
                $fields = array(
                        'term_language',
                        'term_type',
                        'term_text',
                );
 
-               $dbr = $this->getReadDb();
-
-               $res = $dbr->select(
-                       $this->tableName,
-                       $fields,
-                       $conditions,
-                       __METHOD__
-               );
-
-               $terms = $this->buildTermResult( $res );
-
-               $this->releaseConnection( $dbr );
-
-               wfProfileOut( __METHOD__ );
-
-               return $terms;
+               return $this->fetchTerms( array( $entityId ), 
$entityId->getEntityType(), $fields, $termTypes, $languageCodes );
        }
 
        /**
@@ -444,6 +406,28 @@
         * @return Term[]
         */
        public function getTermsOfEntities( array $entityIds, $entityType, 
array $termTypes = null, array $languageCodes = null ) {
+               $fields = array(
+                       'term_entity_id',
+                       'term_entity_type',
+                       'term_language',
+                       'term_type',
+                       'term_text',
+               );
+
+               return $this->fetchTerms( $entityIds, $entityType, $fields, 
$termTypes, $languageCodes );
+       }
+
+       /**
+        * @param EntityId[] $entityIds
+        * @param string|null $entityType
+        * @param string[] $fields
+        * @param string[]|null $termTypes
+        * @param string[]|null $languageCodes
+        *
+        * @throws \MWException
+        * @return array
+        */
+       private function fetchTerms( array $entityIds, $entityType, array 
$fields, array $termTypes = null, array $languageCodes = null ) {
                if ( $entityIds !== null && empty( $entityIds ) ) {
                        return array();
                }
@@ -472,7 +456,9 @@
 
                $numericIds = array();
                foreach ( $entityIds as $entityId ) {
-                       if ( $entityId->getEntityType() !== $entityType ) {
+                       if( $entityType === null ) {
+                               $entityType = $entityId->getEntityType();
+                       } elseif ( $entityId->getEntityType() !== $entityType ) 
{
                                throw new MWException( 'ID ' . 
$entityId->getSerialization()
                                        . " does not refer to an entity of type 
$entityType." );
                        }
@@ -481,14 +467,6 @@
                }
 
                $entityIdentifiers['term_entity_id'] = $numericIds;
-
-               $fields = array(
-                       'term_entity_id',
-                       'term_entity_type',
-                       'term_language',
-                       'term_type',
-                       'term_text',
-               );
 
                $dbr = $this->getReadDb();
 
@@ -528,116 +506,6 @@
         */
        public function getWriteDb() {
                return $this->getConnection( DB_MASTER );
-       }
-
-       /**
-        * @see TermIndex::termExists
-        *
-        * @since 0.1
-        *
-        * @param string $termValue
-        * @param string|null $termType
-        * @param string|null $termLanguage Language code
-        * @param string|null $entityType
-        *
-        * @return boolean
-        */
-       public function termExists( $termValue, $termType = null, $termLanguage 
= null, $entityType = null ) {
-               wfProfileIn( __METHOD__ );
-
-               $conditions = array(
-                       'term_text' => $termValue,
-               );
-
-               if ( $termType !== null ) {
-                       $conditions['term_type'] = $termType;
-               }
-
-               if ( $termLanguage !== null ) {
-                       $conditions['term_language'] = $termLanguage;
-               }
-
-               if ( $entityType !== null ) {
-                       $conditions['term_entity_type'] = $entityType;
-               }
-
-               $dbr = $this->getReadDb();
-
-               $result = $dbr->selectRow(
-                       $this->tableName,
-                       array(
-                               'term_entity_id',
-                       ),
-                       $conditions,
-                       __METHOD__
-               );
-
-               $this->releaseConnection( $dbr );
-
-               wfProfileOut( __METHOD__ );
-
-               return $result !== false;
-       }
-
-       /**
-        * @see TermIndex::getEntityIdsForLabel
-        *
-        * @since 0.1
-        *
-        * @param string $label
-        * @param string|null $languageCode
-        * @param string|null $entityType
-        * @param bool $fuzzySearch if false, only exact matches are returned, 
otherwise more relaxed search . Defaults to false.
-        *
-        * @return EntityId[]
-        */
-       public function getEntityIdsForLabel( $label, $languageCode = null, 
$entityType = null, $fuzzySearch = false ) {
-               wfProfileIn( __METHOD__ );
-
-               $fuzzySearch = false; // TODO switched off for now until we 
have a solution for limiting the results
-               $db = $this->getReadDb();
-
-               $tables = array( 'terms0' => $this->tableName );
-
-               $conds = array( 'terms0.term_type' => Term::TYPE_LABEL );
-               if ( $fuzzySearch ) {
-                       $conds[] = 'terms0.term_text' . $db->buildLike( $label, 
$db->anyString() );
-               } else {
-                       $conds['terms0.term_text'] = $label;
-               }
-
-               $joinConds = array();
-
-               if ( !is_null( $languageCode ) ) {
-                       $conds['terms0.term_language'] = $languageCode;
-               }
-
-               if ( !is_null( $entityType ) ) {
-                       $conds['terms0.term_entity_type'] = $entityType;
-               }
-
-               $entities = $db->select(
-                       $tables,
-                       array( 'terms0.term_entity_id', 
'terms0.term_entity_type' ),
-                       $conds,
-                       __METHOD__,
-                       array( 'DISTINCT' ),
-                       $joinConds
-               );
-
-               $this->releaseConnection( $db );
-
-               $entityIds = array_map(
-                       function( $entity ) {
-                               // FIXME: this only works for items and 
properties
-                               return 
LegacyIdInterpreter::newIdFromTypeAndNumber( $entity->term_entity_type, 
$entity->term_entity_id );
-                       },
-                       iterator_to_array( $entities )
-               );
-
-               wfProfileOut( __METHOD__ );
-
-               return $entityIds;
        }
 
        /**
diff --git a/lib/tests/phpunit/store/TermIndexTest.php 
b/lib/tests/phpunit/store/TermIndexTest.php
index 455a1fa..b206372 100644
--- a/lib/tests/phpunit/store/TermIndexTest.php
+++ b/lib/tests/phpunit/store/TermIndexTest.php
@@ -27,7 +27,7 @@
         */
        public abstract function getTermIndex();
 
-       public function testGetEntityIdsForLabel() {
+       public function testGetMatchingIDs() {
                $lookup = $this->getTermIndex();
 
                $item0 = Item::newEmpty();
@@ -47,65 +47,24 @@
                $item1->setDescription( 'en', 'foo bar baz' );
                $lookup->saveTermsOfEntity( $item1 );
 
-               $ids = $lookup->getEntityIdsForLabel( 'foobar' );
+               $foobar = new Term( array( 'termType' => Term::TYPE_LABEL, 
'termText' => 'foobar' ) );
+               $bazNl= new Term( array( 'termType' => Term::TYPE_LABEL, 
'termText' => 'baz', 'termLanguage' => 'nl' ) );
+               $froggerNl = new Term( array( 'termType' => Term::TYPE_LABEL, 
'termText' => 'o_O', 'termLanguage' => 'nl' ) );
+
+               $ids = $lookup->getMatchingIDs( array( $foobar ), 
Item::ENTITY_TYPE );
                $this->assertInternalType( 'array', $ids );
                $this->assertContainsOnlyInstancesOf( 
'\Wikibase\DataModel\Entity\ItemId', $ids );
                $this->assertArrayEquals( array( $id0, $id1 ), $ids );
 
-               $ids = $lookup->getEntityIdsForLabel( 'baz', 'nl' );
+               $ids = $lookup->getMatchingIDs( array( $bazNl ), 
Item::ENTITY_TYPE );
                $this->assertInternalType( 'array', $ids );
                $this->assertContainsOnlyInstancesOf( 
'\Wikibase\DataModel\Entity\ItemId', $ids );
                $this->assertArrayEquals( array( $id0 ), $ids );
 
-               $ids = $lookup->getEntityIdsForLabel( 'o_O', 'nl' );
+               $ids = $lookup->getMatchingIDs( array( $froggerNl ), 
Item::ENTITY_TYPE );
                $this->assertInternalType( 'array', $ids );
                $this->assertContainsOnlyInstancesOf( 
'\Wikibase\DataModel\Entity\ItemId', $ids );
                $this->assertArrayEquals( array( $id1 ), $ids );
-       }
-
-       public function testTermExists() {
-               $lookup = $this->getTermIndex();
-
-               $item = Item::newEmpty();
-               $item->setId( new ItemId( 'Q1234' )  );
-
-               $item->setLabel( 'en', 'foobarz' );
-               $item->setLabel( 'de', 'foobarz' );
-               $item->setLabel( 'nl', 'bazz' );
-               $item->setDescription( 'en', 'foobarz' );
-               $item->setDescription( 'fr', 'fooz barz bazz' );
-               $item->setAliases( 'nl', array( 'a42', 'b42', 'c42' ) );
-
-               $lookup->saveTermsOfEntity( $item );
-
-               $this->assertFalse( $lookup->termExists( 'foobarz', 
'does-not-exist' ) );
-               $this->assertFalse( $lookup->termExists( 'foobarz', null, 
'does-not-exist' ) );
-               $this->assertFalse( $lookup->termExists( 'foobarz', null, null, 
'does-not-exist' ) );
-
-               $this->assertTrue( $lookup->termExists( 'foobarz' ) );
-               $this->assertTrue( $lookup->termExists( 'foobarz', 
Term::TYPE_LABEL ) );
-               $this->assertTrue( $lookup->termExists( 'foobarz', 
Term::TYPE_LABEL, 'en' ) );
-               $this->assertTrue( $lookup->termExists( 'foobarz', 
Term::TYPE_LABEL, 'de' ) );
-               $this->assertTrue( $lookup->termExists( 'foobarz', 
Term::TYPE_LABEL, 'de', $item::ENTITY_TYPE ) );
-
-               $this->assertFalse( $lookup->termExists( 'foobarz', 
Term::TYPE_LABEL, 'de', Property::ENTITY_TYPE ) );
-               $this->assertFalse( $lookup->termExists( 'foobarz', 
Term::TYPE_LABEL, 'nl' ) );
-               $this->assertFalse( $lookup->termExists( 'foobarz', 
Term::TYPE_DESCRIPTION, 'de' ) );
-               $this->assertFalse( $lookup->termExists( 'foobarz', 
Term::TYPE_DESCRIPTION, null, Property::ENTITY_TYPE ) );
-               $this->assertFalse( $lookup->termExists( 'dzxfzdtrgfdrtgryfth', 
Term::TYPE_LABEL ) );
-
-               $this->assertTrue( $lookup->termExists( 'foobarz', 
Term::TYPE_DESCRIPTION ) );
-               $this->assertTrue( $lookup->termExists( 'foobarz', 
Term::TYPE_DESCRIPTION, 'en' ) );
-               $this->assertFalse( $lookup->termExists( 'foobarz', 
Term::TYPE_DESCRIPTION, 'fr' ) );
-
-               $this->assertFalse( $lookup->termExists( 'a42', 
Term::TYPE_DESCRIPTION ) );
-               $this->assertFalse( $lookup->termExists( 'b42', 
Term::TYPE_LABEL ) );
-               $this->assertTrue( $lookup->termExists( 'a42' ) );
-               $this->assertTrue( $lookup->termExists( 'b42' ) );
-               $this->assertTrue( $lookup->termExists( 'a42', Term::TYPE_ALIAS 
) );
-               $this->assertTrue( $lookup->termExists( 'b42', Term::TYPE_ALIAS 
) );
-               $this->assertFalse( $lookup->termExists( 'b42', 
Term::TYPE_ALIAS, 'de' ) );
-               $this->assertTrue( $lookup->termExists( 'b42', null, 'nl' ) );
        }
 
        public function testGetMatchingTerms() {
@@ -251,13 +210,14 @@
                $item->setId( $id );
                $lookup->saveTermsOfEntity( $item );
 
-               $this->assertTrue( $lookup->termExists( 
'testDeleteTermsForEntity' ) );
+               $this->assertTermExists( $lookup, 'testDeleteTermsForEntity' );
 
                $this->assertTrue( $lookup->deleteTermsOfEntity( $item->getId() 
) !== false );
 
-               $this->assertFalse( $lookup->termExists( 
'testDeleteTermsForEntity' ) );
+               $this->assertNotTermExists( $lookup, 'testDeleteTermsForEntity' 
);
 
-               $ids = $lookup->getEntityIdsForLabel( 'abc' );
+               $abc = new Term( array( 'termType' => Term::TYPE_LABEL, 
'termText' => 'abc' ) );
+               $ids = $lookup->getMatchingIDs( array( $abc ), 
Item::ENTITY_TYPE );
 
                $this->assertNotContains( $id, $ids );
        }
@@ -276,77 +236,77 @@
 
                $this->assertTrue( $lookup->saveTermsOfEntity( $item ) );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'testDeleteTermsForEntity',
                        Term::TYPE_DESCRIPTION,
                        'en',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'ghi',
                        Term::TYPE_LABEL,
                        'nl',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'o',
                        Term::TYPE_ALIAS,
                        'fr',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
                // save again - this should hit an optimized code path
                // that avoids re-saving the terms if they are the same as 
before.
                $this->assertTrue( $lookup->saveTermsOfEntity( $item ) );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'testDeleteTermsForEntity',
                        Term::TYPE_DESCRIPTION,
                        'en',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'ghi',
                        Term::TYPE_LABEL,
                        'nl',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'o',
                        Term::TYPE_ALIAS,
                        'fr',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
                // modify and save again - this should NOT skip saving,
                // and make sure the modified term is in the database.
                $item->setLabel( 'nl', 'xyz' );
                $this->assertTrue( $lookup->saveTermsOfEntity( $item ) );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'testDeleteTermsForEntity',
                        Term::TYPE_DESCRIPTION,
                        'en',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'xyz',
                        Term::TYPE_LABEL,
                        'nl',
                        Item::ENTITY_TYPE
-               ) );
+               );
 
-               $this->assertTrue( $lookup->termExists(
+               $this->assertTermExists( $lookup,
                        'o',
                        Term::TYPE_ALIAS,
                        'fr',
                        Item::ENTITY_TYPE
-               ) );
+               );
        }
 
        public function testUpdateTermsOfEntity() {
@@ -654,4 +614,23 @@
                        "expected to find $k in terms for item" );
        }
 
+       protected function assertTermExists( TermIndex $termIndex, $text, 
$termType = null, $language = null, $entityType = null ) {
+               $this->assertTrue( $this->termExists( $termIndex, $text, 
$termType, $language, $entityType ) );
+       }
+
+       protected function assertNotTermExists( TermIndex $termIndex, $text, 
$termType = null, $language = null, $entityType = null ) {
+               $this->assertFalse( $this->termExists( $termIndex, $text, 
$termType, $language, $entityType ) );
+       }
+
+       private function termExists( TermIndex $termIndex, $text, $termType = 
null, $language = null, $entityType = null ) {
+               $termFields = array();
+               $termFields['termText'] = $text;
+
+               if ( $language !== null ) {
+                       $termFields['termLanguage'] = $language;
+               }
+
+               $matches = $termIndex->getMatchingTerms( array( new Term( 
$termFields ) ), $termType, $entityType );
+               return !empty( $matches );
+       }
 }
diff --git a/repo/includes/specials/SpecialItemDisambiguation.php 
b/repo/includes/specials/SpecialItemDisambiguation.php
index 3f8c9ef..b34cd6c 100644
--- a/repo/includes/specials/SpecialItemDisambiguation.php
+++ b/repo/includes/specials/SpecialItemDisambiguation.php
@@ -14,6 +14,7 @@
 use Wikibase\Lib\Store\EntityTitleLookup;
 use Wikibase\Lib\Store\LanguageLabelLookup;
 use Wikibase\Repo\WikibaseRepo;
+use Wikibase\Term;
 use Wikibase\TermIndex;
 
 /**
@@ -273,7 +274,23 @@
         * @return Item[]
         */
        private function findLabelUsage( $languageCode, $label ) {
-               $entityIds = $this->termIndex->getEntityIdsForLabel( $label, 
$languageCode, Item::ENTITY_TYPE, true );
+               //@todo: optionally
+               $protoTerms = array(
+                       new Term( array(
+                               'termType'              => Term::TYPE_LABEL,
+                               'termLanguage'  => $languageCode,
+                               'termText'              => $label
+                       ) ),
+               );
+
+               //@todo: expose options
+               $options = array(
+                       'caseSensitive' => true,
+                       'prefixSearch' => false,
+                       'LIMIT' => $this->limit
+               );
+
+               $entityIds = $this->termIndex->getMatchingIDs( $protoTerms, 
Item::ENTITY_TYPE, $options );
                $entities = array();
 
                $count = 0;

-- 
To view, visit https://gerrit.wikimedia.org/r/179905
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I47098dd3163a2a7962f2d14d6181181793b9e501
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to