jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/326903 )
Change subject: Use related-articles in translation recommendation
......................................................................
Use related-articles in translation recommendation
Bug: T151793
Change-Id: Iff6b932606cebb7fc239fdb1d64703525069c782
---
M recommendation/api/external_data/fetcher.py
M recommendation/api/external_data/wikidata.py
M recommendation/api/types/translation/candidate_finders.py
M recommendation/api/types/translation/translation.py
M recommendation/data/labs_setup.sh
5 files changed, 31 insertions(+), 4 deletions(-)
Approvals:
Nschaaf: Looks good to me, approved
jenkins-bot: Verified
diff --git a/recommendation/api/external_data/fetcher.py
b/recommendation/api/external_data/fetcher.py
index 76e8b6b..6d6fdf7 100644
--- a/recommendation/api/external_data/fetcher.py
+++ b/recommendation/api/external_data/fetcher.py
@@ -108,3 +108,8 @@
seed = 'morelike:' + seed
params['srsearch'] = seed
return endpoint, params
+
+
+def get_related_articles(source, seed):
+ return
get('http://recommend-related-articles.wmflabs.org/types/related_articles/v1/articles',
+ dict(source=source, seed=seed, count=500))
diff --git a/recommendation/api/external_data/wikidata.py
b/recommendation/api/external_data/wikidata.py
index 82f7178..7359fbc 100644
--- a/recommendation/api/external_data/wikidata.py
+++ b/recommendation/api/external_data/wikidata.py
@@ -9,11 +9,11 @@
WikidataItem = collections.namedtuple('WikidataItem', ['id', 'title', 'url'])
-def query(params):
+def query(params, expected_sitelinks=1):
"""
Query the wikidata endpoint and return a list of WikidataItem
- This only includes items that have exactly 1 sitelink
+ This only includes items that have exactly expected_sitelinks sitelink
"""
endpoint = configuration.get_config_value('endpoints', 'wikidata')
try:
@@ -28,7 +28,7 @@
for id, entity in entities.items():
sitelinks = entity.get('sitelinks', {})
- if len(sitelinks.keys()) != 1:
+ if len(sitelinks.keys()) != expected_sitelinks:
continue
sitelink = sitelinks.popitem()[1]
@@ -43,7 +43,12 @@
def get_items_in_source_missing_in_target_by_titles(source, target, titles):
params = configuration.get_config_dict('wikidata_titles_to_items_params')
params['sites'] = params['sites'].format(source=source)
+ # We want the sitefilter to include both the source and target
+ # wikis. This sets up the scenario where if there is only 1 sitelink
+ # present, that means that the article is missing in the target (since
+ # the title will have come from the source wiki)
params['sitefilter'] = params['sitefilter'].format(target=target)
+ params['sitefilter'] += '|{}wiki'.format(source)
params['titles'] = '|'.join(titles)
items = query(params)
diff --git a/recommendation/api/types/translation/candidate_finders.py
b/recommendation/api/types/translation/candidate_finders.py
index 70ae772..ba73bf2 100644
--- a/recommendation/api/types/translation/candidate_finders.py
+++ b/recommendation/api/types/translation/candidate_finders.py
@@ -112,3 +112,19 @@
articles.append(a)
return articles[:n]
+
+
+class RelatedArticleFinder(CandidateFinder):
+ def get_candidates(self, s, seed, n):
+ results = fetcher.get_related_articles(s, seed)
+ if len(results) == 0:
+ return MorelikeCandidateFinder().get_candidates(s, seed, n)
+
+ articles = []
+ for item in results:
+ a = Article(item['title'])
+ a.wikidata_id = item['wikidata_id']
+ a.rank = item['score']
+ articles.append(a)
+
+ return articles[:n]
diff --git a/recommendation/api/types/translation/translation.py
b/recommendation/api/types/translation/translation.py
index d271c50..f22a880 100644
--- a/recommendation/api/types/translation/translation.py
+++ b/recommendation/api/types/translation/translation.py
@@ -166,6 +166,7 @@
'morelike': candidate_finders.MorelikeCandidateFinder(),
'wiki': candidate_finders.MorelikeCandidateFinder(),
'mostpopular': candidate_finders.PageviewCandidateFinder(),
+ 'related_articles': candidate_finders.RelatedArticleFinder()
}
diff --git a/recommendation/data/labs_setup.sh
b/recommendation/data/labs_setup.sh
index 8290c97..377b2d1 100755
--- a/recommendation/data/labs_setup.sh
+++ b/recommendation/data/labs_setup.sh
@@ -31,7 +31,7 @@
cp ${TMP_PATH}/recommendation-api/recommendation/data/* ${ETC_PATH}
cp ${ETC_PATH}/recommendation.nginx /etc/nginx/sites-available/recommendation
ln -s /etc/nginx/sites-available/recommendation /etc/nginx/sites-enabled/
-cp ${ETC_PATH}/recommendation.service
/etc/systemd/system/multi-user.target/wants/
+cp ${ETC_PATH}/recommendation.service
/etc/systemd/system/multi-user.target.wants/
systemctl enable recommendation.service
systemctl daemon-reload
--
To view, visit https://gerrit.wikimedia.org/r/326903
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Iff6b932606cebb7fc239fdb1d64703525069c782
Gerrit-PatchSet: 3
Gerrit-Project: research/recommendation-api
Gerrit-Branch: master
Gerrit-Owner: Nschaaf <[email protected]>
Gerrit-Reviewer: Nschaaf <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits