Daniel Kinzler has uploaded a new change for review.
https://gerrit.wikimedia.org/r/179905
Change subject: Trim redundant code from TermIndex
......................................................................
Trim redundant code from TermIndex
DEPLOYMENT: heads up to ops: keep an eye on wb_terms,
some quries are now constructed differently. This
affects only queries via Special:ItemDisambiguation,
not wbentitysearch, so it should not have much impact.
Change-Id: I47098dd3163a2a7962f2d14d6181181793b9e501
---
M lib/includes/store/TermIndex.php
M lib/includes/store/sql/TermSqlIndex.php
M lib/tests/phpunit/store/TermIndexTest.php
M repo/includes/specials/SpecialItemDisambiguation.php
4 files changed, 95 insertions(+), 259 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase
refs/changes/05/179905/1
diff --git a/lib/includes/store/TermIndex.php b/lib/includes/store/TermIndex.php
index 4550337..7c6cc9b 100644
--- a/lib/includes/store/TermIndex.php
+++ b/lib/includes/store/TermIndex.php
@@ -17,22 +17,6 @@
interface TermIndex extends LabelConflictFinder {
/**
- * Returns the type, id tuples for the entities with the provided label
in the specified language.
- *
- * @since 0.1
- *
- * @param string $label
- * @param string|null $languageCode
- * @param string|null $entityType
- * @param bool $fuzzySearch if false, only exact matches are returned,
otherwise more relaxed search . Defaults to false.
- *
- * @return EntityId[]
- *
- * TODO: update to use Term interface
- */
- public function getEntityIdsForLabel( $label, $languageCode = null,
$entityType = null, $fuzzySearch = false );
-
- /**
* Saves the terms of the provided entity in the term cache.
*
* @since 0.1
@@ -77,24 +61,12 @@
* @param string $entityType
* @param string[]|null $termTypes The types of terms to return, e.g.
"label", "description",
* or "alias". Compare the Term::TYPE_XXX constants. If null,
all types are returned.
+ * @param string[]|null $languageCodes The desired languages, given as
language codes.
+ * If null, all languages are returned.
*
* @return Term[]
*/
public function getTermsOfEntities( array $ids, $entityType, array
$termTypes = null, array $languageCodes = null );
-
- /**
- * Returns if a term with the specified parameters exists.
- *
- * @since 0.1
- *
- * @param string $termValue
- * @param string|null $termType
- * @param string|null $termLanguage Language code
- * @param string|null $entityType
- *
- * @return boolean
- */
- public function termExists( $termValue, $termType = null, $termLanguage
= null, $entityType = null );
/**
* Returns the terms that match the provided conditions.
diff --git a/lib/includes/store/sql/TermSqlIndex.php
b/lib/includes/store/sql/TermSqlIndex.php
index 38248d2..21ae5b2 100644
--- a/lib/includes/store/sql/TermSqlIndex.php
+++ b/lib/includes/store/sql/TermSqlIndex.php
@@ -383,51 +383,13 @@
* @return Term[]
*/
public function getTermsOfEntity( EntityId $entityId, array $termTypes
= null, array $languageCodes = null ) {
- if ( $termTypes !== null && empty( $termTypes ) ) {
- return array();
- }
-
- if ( $languageCodes !== null && empty( $languageCodes ) ) {
- return array();
- }
-
- wfProfileIn( __METHOD__ );
-
- $conditions = array(
- 'term_entity_id' => $entityId->getNumericId(),
- 'term_entity_type' => $entityId->getEntityType()
- );
-
- if ( $termTypes !== null ) {
- $conditions['term_type'] = $termTypes;
- }
-
- if ( $languageCodes !== null ) {
- $conditions['term_language'] = $languageCodes;
- }
-
$fields = array(
'term_language',
'term_type',
'term_text',
);
- $dbr = $this->getReadDb();
-
- $res = $dbr->select(
- $this->tableName,
- $fields,
- $conditions,
- __METHOD__
- );
-
- $terms = $this->buildTermResult( $res );
-
- $this->releaseConnection( $dbr );
-
- wfProfileOut( __METHOD__ );
-
- return $terms;
+ return $this->fetchTerms( array( $entityId ),
$entityId->getEntityType(), $fields, $termTypes, $languageCodes );
}
/**
@@ -444,6 +406,28 @@
* @return Term[]
*/
public function getTermsOfEntities( array $entityIds, $entityType,
array $termTypes = null, array $languageCodes = null ) {
+ $fields = array(
+ 'term_entity_id',
+ 'term_entity_type',
+ 'term_language',
+ 'term_type',
+ 'term_text',
+ );
+
+ return $this->fetchTerms( $entityIds, $entityType, $fields,
$termTypes, $languageCodes );
+ }
+
+ /**
+ * @param EntityId[] $entityIds
+ * @param string|null $entityType
+ * @param string[] $fields
+ * @param string[]|null $termTypes
+ * @param string[]|null $languageCodes
+ *
+ * @throws \MWException
+ * @return array
+ */
+ private function fetchTerms( array $entityIds, $entityType, array
$fields, array $termTypes = null, array $languageCodes = null ) {
if ( $entityIds !== null && empty( $entityIds ) ) {
return array();
}
@@ -472,7 +456,9 @@
$numericIds = array();
foreach ( $entityIds as $entityId ) {
- if ( $entityId->getEntityType() !== $entityType ) {
+ if( $entityType === null ) {
+ $entityType = $entityId->getEntityType();
+ } elseif ( $entityId->getEntityType() !== $entityType )
{
throw new MWException( 'ID ' .
$entityId->getSerialization()
. " does not refer to an entity of type
$entityType." );
}
@@ -481,14 +467,6 @@
}
$entityIdentifiers['term_entity_id'] = $numericIds;
-
- $fields = array(
- 'term_entity_id',
- 'term_entity_type',
- 'term_language',
- 'term_type',
- 'term_text',
- );
$dbr = $this->getReadDb();
@@ -528,116 +506,6 @@
*/
public function getWriteDb() {
return $this->getConnection( DB_MASTER );
- }
-
- /**
- * @see TermIndex::termExists
- *
- * @since 0.1
- *
- * @param string $termValue
- * @param string|null $termType
- * @param string|null $termLanguage Language code
- * @param string|null $entityType
- *
- * @return boolean
- */
- public function termExists( $termValue, $termType = null, $termLanguage
= null, $entityType = null ) {
- wfProfileIn( __METHOD__ );
-
- $conditions = array(
- 'term_text' => $termValue,
- );
-
- if ( $termType !== null ) {
- $conditions['term_type'] = $termType;
- }
-
- if ( $termLanguage !== null ) {
- $conditions['term_language'] = $termLanguage;
- }
-
- if ( $entityType !== null ) {
- $conditions['term_entity_type'] = $entityType;
- }
-
- $dbr = $this->getReadDb();
-
- $result = $dbr->selectRow(
- $this->tableName,
- array(
- 'term_entity_id',
- ),
- $conditions,
- __METHOD__
- );
-
- $this->releaseConnection( $dbr );
-
- wfProfileOut( __METHOD__ );
-
- return $result !== false;
- }
-
- /**
- * @see TermIndex::getEntityIdsForLabel
- *
- * @since 0.1
- *
- * @param string $label
- * @param string|null $languageCode
- * @param string|null $entityType
- * @param bool $fuzzySearch if false, only exact matches are returned,
otherwise more relaxed search . Defaults to false.
- *
- * @return EntityId[]
- */
- public function getEntityIdsForLabel( $label, $languageCode = null,
$entityType = null, $fuzzySearch = false ) {
- wfProfileIn( __METHOD__ );
-
- $fuzzySearch = false; // TODO switched off for now until we
have a solution for limiting the results
- $db = $this->getReadDb();
-
- $tables = array( 'terms0' => $this->tableName );
-
- $conds = array( 'terms0.term_type' => Term::TYPE_LABEL );
- if ( $fuzzySearch ) {
- $conds[] = 'terms0.term_text' . $db->buildLike( $label,
$db->anyString() );
- } else {
- $conds['terms0.term_text'] = $label;
- }
-
- $joinConds = array();
-
- if ( !is_null( $languageCode ) ) {
- $conds['terms0.term_language'] = $languageCode;
- }
-
- if ( !is_null( $entityType ) ) {
- $conds['terms0.term_entity_type'] = $entityType;
- }
-
- $entities = $db->select(
- $tables,
- array( 'terms0.term_entity_id',
'terms0.term_entity_type' ),
- $conds,
- __METHOD__,
- array( 'DISTINCT' ),
- $joinConds
- );
-
- $this->releaseConnection( $db );
-
- $entityIds = array_map(
- function( $entity ) {
- // FIXME: this only works for items and
properties
- return
LegacyIdInterpreter::newIdFromTypeAndNumber( $entity->term_entity_type,
$entity->term_entity_id );
- },
- iterator_to_array( $entities )
- );
-
- wfProfileOut( __METHOD__ );
-
- return $entityIds;
}
/**
diff --git a/lib/tests/phpunit/store/TermIndexTest.php
b/lib/tests/phpunit/store/TermIndexTest.php
index 455a1fa..b206372 100644
--- a/lib/tests/phpunit/store/TermIndexTest.php
+++ b/lib/tests/phpunit/store/TermIndexTest.php
@@ -27,7 +27,7 @@
*/
public abstract function getTermIndex();
- public function testGetEntityIdsForLabel() {
+ public function testGetMatchingIDs() {
$lookup = $this->getTermIndex();
$item0 = Item::newEmpty();
@@ -47,65 +47,24 @@
$item1->setDescription( 'en', 'foo bar baz' );
$lookup->saveTermsOfEntity( $item1 );
- $ids = $lookup->getEntityIdsForLabel( 'foobar' );
+ $foobar = new Term( array( 'termType' => Term::TYPE_LABEL,
'termText' => 'foobar' ) );
+ $bazNl= new Term( array( 'termType' => Term::TYPE_LABEL,
'termText' => 'baz', 'termLanguage' => 'nl' ) );
+ $froggerNl = new Term( array( 'termType' => Term::TYPE_LABEL,
'termText' => 'o_O', 'termLanguage' => 'nl' ) );
+
+ $ids = $lookup->getMatchingIDs( array( $foobar ),
Item::ENTITY_TYPE );
$this->assertInternalType( 'array', $ids );
$this->assertContainsOnlyInstancesOf(
'\Wikibase\DataModel\Entity\ItemId', $ids );
$this->assertArrayEquals( array( $id0, $id1 ), $ids );
- $ids = $lookup->getEntityIdsForLabel( 'baz', 'nl' );
+ $ids = $lookup->getMatchingIDs( array( $bazNl ),
Item::ENTITY_TYPE );
$this->assertInternalType( 'array', $ids );
$this->assertContainsOnlyInstancesOf(
'\Wikibase\DataModel\Entity\ItemId', $ids );
$this->assertArrayEquals( array( $id0 ), $ids );
- $ids = $lookup->getEntityIdsForLabel( 'o_O', 'nl' );
+ $ids = $lookup->getMatchingIDs( array( $froggerNl ),
Item::ENTITY_TYPE );
$this->assertInternalType( 'array', $ids );
$this->assertContainsOnlyInstancesOf(
'\Wikibase\DataModel\Entity\ItemId', $ids );
$this->assertArrayEquals( array( $id1 ), $ids );
- }
-
- public function testTermExists() {
- $lookup = $this->getTermIndex();
-
- $item = Item::newEmpty();
- $item->setId( new ItemId( 'Q1234' ) );
-
- $item->setLabel( 'en', 'foobarz' );
- $item->setLabel( 'de', 'foobarz' );
- $item->setLabel( 'nl', 'bazz' );
- $item->setDescription( 'en', 'foobarz' );
- $item->setDescription( 'fr', 'fooz barz bazz' );
- $item->setAliases( 'nl', array( 'a42', 'b42', 'c42' ) );
-
- $lookup->saveTermsOfEntity( $item );
-
- $this->assertFalse( $lookup->termExists( 'foobarz',
'does-not-exist' ) );
- $this->assertFalse( $lookup->termExists( 'foobarz', null,
'does-not-exist' ) );
- $this->assertFalse( $lookup->termExists( 'foobarz', null, null,
'does-not-exist' ) );
-
- $this->assertTrue( $lookup->termExists( 'foobarz' ) );
- $this->assertTrue( $lookup->termExists( 'foobarz',
Term::TYPE_LABEL ) );
- $this->assertTrue( $lookup->termExists( 'foobarz',
Term::TYPE_LABEL, 'en' ) );
- $this->assertTrue( $lookup->termExists( 'foobarz',
Term::TYPE_LABEL, 'de' ) );
- $this->assertTrue( $lookup->termExists( 'foobarz',
Term::TYPE_LABEL, 'de', $item::ENTITY_TYPE ) );
-
- $this->assertFalse( $lookup->termExists( 'foobarz',
Term::TYPE_LABEL, 'de', Property::ENTITY_TYPE ) );
- $this->assertFalse( $lookup->termExists( 'foobarz',
Term::TYPE_LABEL, 'nl' ) );
- $this->assertFalse( $lookup->termExists( 'foobarz',
Term::TYPE_DESCRIPTION, 'de' ) );
- $this->assertFalse( $lookup->termExists( 'foobarz',
Term::TYPE_DESCRIPTION, null, Property::ENTITY_TYPE ) );
- $this->assertFalse( $lookup->termExists( 'dzxfzdtrgfdrtgryfth',
Term::TYPE_LABEL ) );
-
- $this->assertTrue( $lookup->termExists( 'foobarz',
Term::TYPE_DESCRIPTION ) );
- $this->assertTrue( $lookup->termExists( 'foobarz',
Term::TYPE_DESCRIPTION, 'en' ) );
- $this->assertFalse( $lookup->termExists( 'foobarz',
Term::TYPE_DESCRIPTION, 'fr' ) );
-
- $this->assertFalse( $lookup->termExists( 'a42',
Term::TYPE_DESCRIPTION ) );
- $this->assertFalse( $lookup->termExists( 'b42',
Term::TYPE_LABEL ) );
- $this->assertTrue( $lookup->termExists( 'a42' ) );
- $this->assertTrue( $lookup->termExists( 'b42' ) );
- $this->assertTrue( $lookup->termExists( 'a42', Term::TYPE_ALIAS
) );
- $this->assertTrue( $lookup->termExists( 'b42', Term::TYPE_ALIAS
) );
- $this->assertFalse( $lookup->termExists( 'b42',
Term::TYPE_ALIAS, 'de' ) );
- $this->assertTrue( $lookup->termExists( 'b42', null, 'nl' ) );
}
public function testGetMatchingTerms() {
@@ -251,13 +210,14 @@
$item->setId( $id );
$lookup->saveTermsOfEntity( $item );
- $this->assertTrue( $lookup->termExists(
'testDeleteTermsForEntity' ) );
+ $this->assertTermExists( $lookup, 'testDeleteTermsForEntity' );
$this->assertTrue( $lookup->deleteTermsOfEntity( $item->getId()
) !== false );
- $this->assertFalse( $lookup->termExists(
'testDeleteTermsForEntity' ) );
+ $this->assertNotTermExists( $lookup, 'testDeleteTermsForEntity'
);
- $ids = $lookup->getEntityIdsForLabel( 'abc' );
+ $abc = new Term( array( 'termType' => Term::TYPE_LABEL,
'termText' => 'abc' ) );
+ $ids = $lookup->getMatchingIDs( array( $abc ),
Item::ENTITY_TYPE );
$this->assertNotContains( $id, $ids );
}
@@ -276,77 +236,77 @@
$this->assertTrue( $lookup->saveTermsOfEntity( $item ) );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'testDeleteTermsForEntity',
Term::TYPE_DESCRIPTION,
'en',
Item::ENTITY_TYPE
- ) );
+ );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'ghi',
Term::TYPE_LABEL,
'nl',
Item::ENTITY_TYPE
- ) );
+ );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'o',
Term::TYPE_ALIAS,
'fr',
Item::ENTITY_TYPE
- ) );
+ );
// save again - this should hit an optimized code path
// that avoids re-saving the terms if they are the same as
before.
$this->assertTrue( $lookup->saveTermsOfEntity( $item ) );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'testDeleteTermsForEntity',
Term::TYPE_DESCRIPTION,
'en',
Item::ENTITY_TYPE
- ) );
+ );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'ghi',
Term::TYPE_LABEL,
'nl',
Item::ENTITY_TYPE
- ) );
+ );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'o',
Term::TYPE_ALIAS,
'fr',
Item::ENTITY_TYPE
- ) );
+ );
// modify and save again - this should NOT skip saving,
// and make sure the modified term is in the database.
$item->setLabel( 'nl', 'xyz' );
$this->assertTrue( $lookup->saveTermsOfEntity( $item ) );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'testDeleteTermsForEntity',
Term::TYPE_DESCRIPTION,
'en',
Item::ENTITY_TYPE
- ) );
+ );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'xyz',
Term::TYPE_LABEL,
'nl',
Item::ENTITY_TYPE
- ) );
+ );
- $this->assertTrue( $lookup->termExists(
+ $this->assertTermExists( $lookup,
'o',
Term::TYPE_ALIAS,
'fr',
Item::ENTITY_TYPE
- ) );
+ );
}
public function testUpdateTermsOfEntity() {
@@ -654,4 +614,23 @@
"expected to find $k in terms for item" );
}
+ protected function assertTermExists( TermIndex $termIndex, $text,
$termType = null, $language = null, $entityType = null ) {
+ $this->assertTrue( $this->termExists( $termIndex, $text,
$termType, $language, $entityType ) );
+ }
+
+ protected function assertNotTermExists( TermIndex $termIndex, $text,
$termType = null, $language = null, $entityType = null ) {
+ $this->assertFalse( $this->termExists( $termIndex, $text,
$termType, $language, $entityType ) );
+ }
+
+ private function termExists( TermIndex $termIndex, $text, $termType =
null, $language = null, $entityType = null ) {
+ $termFields = array();
+ $termFields['termText'] = $text;
+
+ if ( $language !== null ) {
+ $termFields['termLanguage'] = $language;
+ }
+
+ $matches = $termIndex->getMatchingTerms( array( new Term(
$termFields ) ), $termType, $entityType );
+ return !empty( $matches );
+ }
}
diff --git a/repo/includes/specials/SpecialItemDisambiguation.php
b/repo/includes/specials/SpecialItemDisambiguation.php
index 3f8c9ef..b34cd6c 100644
--- a/repo/includes/specials/SpecialItemDisambiguation.php
+++ b/repo/includes/specials/SpecialItemDisambiguation.php
@@ -14,6 +14,7 @@
use Wikibase\Lib\Store\EntityTitleLookup;
use Wikibase\Lib\Store\LanguageLabelLookup;
use Wikibase\Repo\WikibaseRepo;
+use Wikibase\Term;
use Wikibase\TermIndex;
/**
@@ -273,7 +274,23 @@
* @return Item[]
*/
private function findLabelUsage( $languageCode, $label ) {
- $entityIds = $this->termIndex->getEntityIdsForLabel( $label,
$languageCode, Item::ENTITY_TYPE, true );
+ //@todo: optionally
+ $protoTerms = array(
+ new Term( array(
+ 'termType' => Term::TYPE_LABEL,
+ 'termLanguage' => $languageCode,
+ 'termText' => $label
+ ) ),
+ );
+
+ //@todo: expose options
+ $options = array(
+ 'caseSensitive' => true,
+ 'prefixSearch' => false,
+ 'LIMIT' => $this->limit
+ );
+
+ $entityIds = $this->termIndex->getMatchingIDs( $protoTerms,
Item::ENTITY_TYPE, $options );
$entities = array();
$count = 0;
--
To view, visit https://gerrit.wikimedia.org/r/179905
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I47098dd3163a2a7962f2d14d6181181793b9e501
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits