Matěj Suchánek has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/342588 )
Change subject: [IMPR] Make preloading generators work with arbitrary entity types ...................................................................... [IMPR] Make preloading generators work with arbitrary entity types Bug: T160397 Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d --- M pywikibot/pagegenerators.py M pywikibot/site.py 2 files changed, 47 insertions(+), 19 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/88/342588/2 diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 13618af..fab81fc 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -14,7 +14,7 @@ ¶ms; """ # -# (C) Pywikibot team, 2008-2017 +# (C) Pywikibot team, 2008-2018 # # Distributed under the terms of the MIT license. # @@ -488,7 +488,7 @@ dupfiltergen, self.subpage_max_depth) if self.claimfilter_list: - dupfiltergen = PreloadingItemGenerator(dupfiltergen) + dupfiltergen = PreloadingEntityGenerator(dupfiltergen) for claim in self.claimfilter_list: dupfiltergen = ItemClaimFilterPageGenerator(dupfiltergen, claim[0], claim[1], @@ -1948,11 +1948,11 @@ @deprecated_args(step='groupsize') -def PreloadingItemGenerator(generator, groupsize=50): +def PreloadingEntityGenerator(generator, groupsize=50): """ Yield preloaded pages taken from another generator. - Function basically is copied from above, but for ItemPage's + Function basically is copied from above, but for Wikibase entites. @param generator: pages to iterate over @param groupsize: how many pages to preload at once @@ -1960,26 +1960,16 @@ """ sites = {} for page in generator: - if not isinstance(page, pywikibot.page.WikibasePage): - datasite = page.site.data_repository() - if page.namespace() != datasite.item_namespace: - pywikibot.output( - u'PreloadingItemGenerator skipping %s as it is not in %s' - % (page, datasite.item_namespace)) - continue - - page = pywikibot.ItemPage(datasite, page.title()) - site = page.site sites.setdefault(site, []).append(page) if len(sites[site]) >= groupsize: # if this site is at the groupsize, process it group = sites.pop(site) - for i in site.preloaditempages(group, groupsize): + for i in site.preload_entities(group, groupsize): yield i for site, pages in sites.items(): # process any leftover sites that never reached the groupsize - for i in site.preloaditempages(pages, groupsize): + for i in site.preload_entities(pages, groupsize): yield i @@ -2880,6 +2870,8 @@ yield page +PreloadingItemGenerator = redirect_func(PreloadingEntityGenerator, + old_name='PreloadingItemGenerator') # Deprecated old names available for compatibility with compat. ImageGenerator = redirect_func(PageClassGenerator, old_name='ImageGenerator') FileGenerator = redirect_func(PageClassGenerator, old_name='FileGenerator') diff --git a/pywikibot/site.py b/pywikibot/site.py index c715902..7451931 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -6,7 +6,7 @@ groups of wikis on the same topic in different languages. """ # -# (C) Pywikibot team, 2008-2017 +# (C) Pywikibot team, 2008-2018 # # Distributed under the terms of the MIT license. # @@ -7056,6 +7056,11 @@ class DataSite(APISite): + type_to_class = { + 'item': pywikibot.page.ItemPage, + 'property': pywikibot.page.PropertyPage, + } + """Wikibase data capable site.""" def __init__(self, *args, **kwargs): @@ -7255,9 +7260,9 @@ raise api.APIError(data['errors']) return data['entities'] - def preloaditempages(self, pagelist, groupsize=50): + def preload_entities(self, pagelist, groupsize=50): """ - Yield ItemPages with content prefilled. + Yield sublasses of WikibasePages with content prefilled. Note that pages will be iterated in a different order than in the underlying pagelist. @@ -7275,6 +7280,37 @@ for key in ident: req[key].append(ident[key]) else: + if p.site == self and p.namespace() in ( + self.item_namespace, self.property_namespace): + req['ids'].append(p.title(withNamespace=False)) + else: + assert p.site.has_data_repository, \ + 'Site must have a data repository' + req['sites'].append(p.site.dbName()) + req['titles'].append(p._link._text) + + req = self._simple_request(action='wbgetentities', **req) + data = req.submit() + for entity in data['entities']: + if 'missing' in data['entities'][entity]: + continue + cls = self.type_to_class[data['entities'][entity]['type']] + page = cls(self, entity) + page._content = data['entities'][entity] + page.get() + yield page + + @deprecated('DataSite.preload_entities') + def preloaditempages(self, pagelist, groupsize=50): + """DEPRECATED""" + for sublist in itergroup(pagelist, groupsize): + req = {'ids': [], 'titles': [], 'sites': []} + for p in sublist: + if isinstance(p, pywikibot.page.WikibasePage): + ident = p._defined_by() + for key in ident: + req[key].append(ident[key]) + else: assert p.site.has_data_repository, \ 'Site must have a data repository' if (p.site == p.site.data_repository() and -- To view, visit https://gerrit.wikimedia.org/r/342588 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d Gerrit-PatchSet: 2 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Matěj Suchánek <matejsuchane...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits