jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/342588 )
Change subject: [IMPR] Make preloading generators work with arbitrary entity types ...................................................................... [IMPR] Make preloading generators work with arbitrary entity types Bug: T160397 Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d --- M pywikibot/pagegenerators.py M pywikibot/site.py M tests/site_tests.py 3 files changed, 47 insertions(+), 32 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index ab4ed51..429336a 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -14,7 +14,7 @@ ¶ms; """ # -# (C) Pywikibot team, 2008-2017 +# (C) Pywikibot team, 2008-2018 # # Distributed under the terms of the MIT license. # @@ -2073,11 +2073,11 @@ @deprecated_args(step='groupsize') -def PreloadingItemGenerator(generator, groupsize=50): +def PreloadingEntityGenerator(generator, groupsize=50): """ Yield preloaded pages taken from another generator. - Function basically is copied from above, but for ItemPage's + Function basically is copied from above, but for Wikibase entities. @param generator: pages to iterate over @param groupsize: how many pages to preload at once @@ -2085,26 +2085,16 @@ """ sites = {} for page in generator: - if not isinstance(page, pywikibot.page.WikibasePage): - datasite = page.site.data_repository() - if page.namespace() != datasite.item_namespace: - pywikibot.output( - u'PreloadingItemGenerator skipping %s as it is not in %s' - % (page, datasite.item_namespace)) - continue - - page = pywikibot.ItemPage(datasite, page.title()) - site = page.site sites.setdefault(site, []).append(page) if len(sites[site]) >= groupsize: # if this site is at the groupsize, process it group = sites.pop(site) - for i in site.preloaditempages(group, groupsize): + for i in site.preload_entities(group, groupsize): yield i for site, pages in sites.items(): # process any leftover sites that never reached the groupsize - for i in site.preloaditempages(pages, groupsize): + for i in site.preload_entities(pages, groupsize): yield i @@ -2982,6 +2972,8 @@ yield page +PreloadingItemGenerator = redirect_func(PreloadingEntityGenerator, + old_name='PreloadingItemGenerator') # Deprecated old names available for compatibility with compat. ImageGenerator = redirect_func(PageClassGenerator, old_name='ImageGenerator') FileGenerator = redirect_func(PageClassGenerator, old_name='FileGenerator') diff --git a/pywikibot/site.py b/pywikibot/site.py index 1eff176..152da7c 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -6,7 +6,7 @@ groups of wikis on the same topic in different languages. """ # -# (C) Pywikibot team, 2008-2017 +# (C) Pywikibot team, 2008-2018 # # Distributed under the terms of the MIT license. # @@ -7351,6 +7351,10 @@ super(DataSite, self).__init__(*args, **kwargs) self._item_namespace = None self._property_namespace = None + self._type_to_class = { + 'item': pywikibot.ItemPage, + 'property': pywikibot.PropertyPage, + } def _cache_entity_namespaces(self): """Find namespaces for each known wikibase entity type.""" @@ -7587,9 +7591,9 @@ raise api.APIError(data['errors']) return data['entities'] - def preloaditempages(self, pagelist, groupsize=50): + def preload_entities(self, pagelist, groupsize=50): """ - Yield ItemPages with content prefilled. + Yield subclasses of WikibasePage's with content prefilled. Note that pages will be iterated in a different order than in the underlying pagelist. @@ -7607,23 +7611,34 @@ for key in ident: req[key].append(ident[key]) else: - assert p.site.has_data_repository, \ - 'Site must have a data repository' - if (p.site == p.site.data_repository() and - p.namespace() == p.data_repository.item_namespace): + if p.site == self and p.namespace() in ( + self.item_namespace, self.property_namespace): req['ids'].append(p.title(withNamespace=False)) else: + assert p.site.has_data_repository, \ + 'Site must have a data repository' req['sites'].append(p.site.dbName()) req['titles'].append(p._link._text) req = self._simple_request(action='wbgetentities', **req) data = req.submit() - for qid in data['entities']: - item = pywikibot.ItemPage(self, qid) - item._content = data['entities'][qid] + for entity in data['entities']: + if 'missing' in data['entities'][entity]: + continue + cls = self._type_to_class[data['entities'][entity]['type']] + page = cls(self, entity) # No api call is made because item._content is given - item.get(get_redirect=True) - yield item + page._content = data['entities'][entity] + try: + page.get() # cannot provide get_redirect=True (T145971) + except pywikibot.IsRedirectPage: + pass + yield page + + @deprecated('DataSite.preload_entities') + def preloaditempages(self, pagelist, groupsize=50): + """DEPRECATED.""" + return self.preload_entities(pagelist, groupsize) def getPropertyType(self, prop): """ diff --git a/tests/site_tests.py b/tests/site_tests.py index f2dd8ef..81700ca 100644 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -3007,7 +3007,7 @@ class TestDataSitePreloading(WikidataTestCase): - """Test DataSite.preloaditempages for repo pages.""" + """Test DataSite.preload_entities for repo pages.""" def test_item(self): """Test that ItemPage preloading works for Item objects.""" @@ -3016,7 +3016,7 @@ for num in range(1, 6)] seen = [] - for item in datasite.preloaditempages(items): + for item in datasite.preload_entities(items): self.assertIsInstance(item, pywikibot.ItemPage) self.assertTrue(hasattr(item, '_content')) self.assertNotIn(item, seen) @@ -3031,24 +3031,32 @@ for num in range(1, 6)] seen = [] - for item in datasite.preloaditempages(pages): + for item in datasite.preload_entities(pages): self.assertIsInstance(item, pywikibot.ItemPage) self.assertTrue(hasattr(item, '_content')) self.assertNotIn(item, seen) seen.append(item) self.assertEqual(len(seen), 5) + def test_property(self): + """Test that preloading works for properties.""" + datasite = self.get_repo() + page = pywikibot.Page(datasite, 'P6') + property_page = next(datasite.preload_entities([page])) + self.assertIsInstance(property_page, pywikibot.PropertyPage) + self.assertTrue(hasattr(property_page, '_content')) + class TestDataSiteClientPreloading(DefaultWikidataClientTestCase): - """Test DataSite.preloaditempages for client pages.""" + """Test DataSite.preload_entities for client pages.""" def test_non_item(self): """Test that ItemPage preloading works with Page generator.""" mainpage = self.get_mainpage() datasite = self.get_repo() - item = next(datasite.preloaditempages([mainpage])) + item = next(datasite.preload_entities([mainpage])) self.assertIsInstance(item, pywikibot.ItemPage) self.assertTrue(hasattr(item, '_content')) self.assertEqual(item.id, 'Q5296') -- To view, visit https://gerrit.wikimedia.org/r/342588 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-MessageType: merged Gerrit-Change-Id: Ie068ca3427063ff13ba4545544a1b3965ab7d88d Gerrit-Change-Number: 342588 Gerrit-PatchSet: 9 Gerrit-Owner: Matěj Suchánek <matejsuchane...@gmail.com> Gerrit-Reviewer: Dalba <dalba.w...@gmail.com> Gerrit-Reviewer: Mpaa <mpaa.w...@gmail.com> Gerrit-Reviewer: Multichill <maar...@mdammers.nl> Gerrit-Reviewer: Xqt <i...@gno.de> Gerrit-Reviewer: jenkins-bot <>
_______________________________________________ Pywikibot-commits mailing list Pywikibot-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits