jenkins-bot has submitted this change and it was merged.
Change subject: Use max( |sitelinks|, |labels| ) for term weight.
......................................................................
Use max( |sitelinks|, |labels| ) for term weight.
Considering the number of labels for the term weight boosts
the ranking of items that have no/few sitelinks, but many labels.
This may be true for "structural" items like Q6581097, which
have no corresponding wikipedia pages.
Bug: T94404
Change-Id: I17ad13eb0496e5c90cf6c47749aa5523c1954728
---
M lib/includes/store/sql/TermSqlIndex.php
M repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php
2 files changed, 13 insertions(+), 7 deletions(-)
Approvals:
Hoo man: Looks good to me, but someone else must approve
Aude: Looks good to me, approved
Thiemo Mättig (WMDE): Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/lib/includes/store/sql/TermSqlIndex.php
b/lib/includes/store/sql/TermSqlIndex.php
index 535d2ec..8c3cae1 100644
--- a/lib/includes/store/sql/TermSqlIndex.php
+++ b/lib/includes/store/sql/TermSqlIndex.php
@@ -288,8 +288,7 @@
/**
* Calculate a weight the given entity to be used for ranking. Should
be normalized
* between 0 and 1, but that's not a strong constraint.
- * This implementation relies on sitelinks, and simply takes the number
of sitelinks
- * as the weight.
+ * This implementation uses the max of the number of labels and the
number of sitelinks.
*
* TODO Should be moved to its own object and be added via dependency
injection
*
@@ -300,11 +299,17 @@
private function getWeight( EntityDocument $entity ) {
// FIXME: OCP violation. No support for new types of entities
can be registered
- if ( $entity instanceof Item ) {
- return $entity->getSiteLinkList()->count() / 1000.0;
+ $weight = 0.0;
+
+ if ( $entity instanceof FingerprintProvider ) {
+ $weight = max( $weight,
$entity->getFingerprint()->getLabels()->count() / 1000.0 );
}
- return 0.0;
+ if ( $entity instanceof Item ) {
+ $weight = max( $weight,
$entity->getSiteLinkList()->count() / 1000.0 );
+ }
+
+ return $weight;
}
/**
diff --git a/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php
b/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php
index b6c437a..ad7752e 100644
--- a/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php
+++ b/repo/tests/phpunit/includes/store/sql/TermSqlIndexTest.php
@@ -200,10 +200,11 @@
$termIndex->saveTermsOfEntity( $item2 );
+ // The number of labels counts too
$item3 = new Item( new ItemId( 'Q108' ) );
$item3->setLabel( $languageCode, $termText );
- $item3->getSiteLinkList()->addNewSiteLink( 'hrwiki', 'C' );
- $item3->getSiteLinkList()->addNewSiteLink( 'uzwiki', 'C' );
+ $item3->setLabel( 'qxy', $termText );
+ $item3->setLabel( 'qxz', $termText );
$termIndex->saveTermsOfEntity( $item3 );
--
To view, visit https://gerrit.wikimedia.org/r/202456
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I17ad13eb0496e5c90cf6c47749aa5523c1954728
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <[email protected]>
Gerrit-Reviewer: Aude <[email protected]>
Gerrit-Reviewer: Hoo man <[email protected]>
Gerrit-Reviewer: Thiemo Mättig (WMDE) <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits