jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/904576 )
Change subject: [IMPR] Improvements for APISite.preloadpages ...................................................................... [IMPR] Improvements for APISite.preloadpages - Don't show the "Retrieving <num> pages from site" message by default: add a "quiet" parameter to Site.preloadpages to suppress the message by default - add this parameter to PreloadingGenerator and DequePreloadingGenerator but enable the message here by default - also enable the message within interwiki.py - set default value of groupsize parameter to None; in this case APISite.maxlimit is used as default value - use this default value in archivebot.py - update tests Change-Id: Ie534a1360329463b9c8df27b41e61d78da816968 --- M scripts/interwiki.py M tests/site_generators_tests.py M pywikibot/site/_generators.py M scripts/archivebot.py M pywikibot/pagegenerators/__init__.py 5 files changed, 69 insertions(+), 29 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/pywikibot/pagegenerators/__init__.py b/pywikibot/pagegenerators/__init__.py index 4608b6c..661dacb 100644 --- a/pywikibot/pagegenerators/__init__.py +++ b/pywikibot/pagegenerators/__init__.py @@ -13,7 +13,7 @@ ¶ms; """ # -# (C) Pywikibot team, 2008-2022 +# (C) Pywikibot team, 2008-2023 # # Distributed under the terms of the MIT license. # @@ -613,13 +613,15 @@ def PreloadingGenerator(generator: Iterable['pywikibot.page.Page'], - groupsize: int = 50 + groupsize: int = 50, + quiet: bool = False ) -> Iterator['pywikibot.page.Page']: - """ - Yield preloaded pages taken from another generator. + """Yield preloaded pages taken from another generator. :param generator: pages to iterate over :param groupsize: how many pages to preload at once + :param quiet: If False (default), show the "Retrieving pages" + message """ # pages may be on more than one site, for example if an interwiki # generator is used, so use a separate preloader for each site @@ -633,17 +635,25 @@ if len(sites[site]) >= groupsize: # if this site is at the groupsize, process it group = sites.pop(site) - yield from site.preloadpages(group, groupsize=groupsize) + yield from site.preloadpages(group, groupsize=groupsize, + quiet=quiet) for site, pages in sites.items(): # process any leftover sites that never reached the groupsize - yield from site.preloadpages(pages, groupsize=groupsize) + yield from site.preloadpages(pages, groupsize=groupsize, quiet=quiet) def DequePreloadingGenerator(generator: Iterable['pywikibot.page.Page'], - groupsize: int = 50 + groupsize: int = 50, + quiet: bool = False ) -> Iterator['pywikibot.page.Page']: - """Preload generator of type DequeGenerator.""" + """Preload generator of type DequeGenerator. + + :param generator: pages to iterate over + :param groupsize: how many pages to preload at once + :param quiet: If False (default), show the "Retrieving pages" + message + """ assert isinstance(generator, DequeGenerator), \ 'generator must be a DequeGenerator object' @@ -652,7 +662,7 @@ if not page_count: return - yield from PreloadingGenerator(generator, page_count) + yield from PreloadingGenerator(generator, page_count, quiet) def PreloadingEntityGenerator(generator: Iterable['pywikibot.page.Page'], diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py index 966c76e..2cb414a 100644 --- a/pywikibot/site/_generators.py +++ b/pywikibot/site/_generators.py @@ -89,32 +89,43 @@ self, pagelist, *, - groupsize: int = 50, + groupsize: Optional[int] = None, templates: bool = False, langlinks: bool = False, pageprops: bool = False, categories: bool = False, - content: bool = True + content: bool = True, + quiet: bool = True ): """Return a generator to a list of preloaded pages. - Pages are iterated in the same order than in the underlying pagelist. - In case of duplicates in a groupsize batch, return the first entry. + Pages are iterated in the same order than in the underlying + pagelist. In case of duplicates in a groupsize batch, return the + first entry. .. versionchanged:: 7.6 *content* parameter was added. .. versionchanged:: 7.7 *categories* parameter was added. + .. versionchanged:: 8.1 + *groupsize* is maxlimit by default. *quiet* parameter was + added. No longer show the "Retrieving pages from site" + message by default. :param pagelist: an iterable that returns Page objects - :param groupsize: how many Pages to query at a time - :param templates: preload pages (typically templates) transcluded in - the provided pages - :param langlinks: preload all language links from the provided pages - to other languages - :param pageprops: preload various properties defined in page content + :param groupsize: how many Pages to query at a time. If None + (default), :attr:`maxlimit + <pywikibot.site._apisite.APISite.maxlimit>` is used. + :param templates: preload pages (typically templates) + transcluded in the provided pages + :param langlinks: preload all language links from the provided + pages to other languages + :param pageprops: preload various properties defined in page + content :param categories: preload page categories :param content: preload page content + :param quiet: If True (default), do not show the "Retrieving + pages" message """ props = 'revisions|info|categoryinfo' if templates: @@ -126,7 +137,8 @@ if categories: props += '|categories' - for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)): + groupsize = min(groupsize or self.maxlimit, self.maxlimit) + for sublist in itergroup(pagelist, groupsize): # Do not use p.pageid property as it will force page loading. pageids = [str(p._pageid) for p in sublist if hasattr(p, '_pageid') and p._pageid > 0] @@ -151,8 +163,8 @@ else: rvgen.request['titles'] = list(cache.keys()) rvgen.request['rvprop'] = self._rvprops(content=content) - pywikibot.info('Retrieving {} pages from {}.' - .format(len(cache), self)) + if not quiet: + pywikibot.info(f'Retrieving {len(cache)} pages from {self}.') for pagedata in rvgen: pywikibot.debug(f'Preloading {pagedata}') diff --git a/scripts/archivebot.py b/scripts/archivebot.py index 05a36ef..07f4771 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -717,9 +717,7 @@ for c in range(counter): params = self.get_params(thread.timestamp, c + 1) self.get_archive_page(pattern % params, params) - list(self.site.preloadpages( - self.archives.values(), - groupsize=self.site.maxlimit)) + list(self.site.preloadpages(self.archives.values())) while not counter_found and counter > 1 \ and not archive.exists(): diff --git a/scripts/interwiki.py b/scripts/interwiki.py index d503d69..a8cb116 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -328,7 +328,7 @@ """ # -# (C) Pywikibot team, 2003-2022 +# (C) Pywikibot team, 2003-2023 # # Distributed under the terms of the MIT license. # @@ -1992,7 +1992,7 @@ # Get the content of the assembled list in one blow gen = site.preloadpages(pageGroup, templates=True, langlinks=True, - pageprops=True) + pageprops=True, quiet=False) for _ in gen: # we don't want to do anything with them now. The # page contents will be read via the Subject class. diff --git a/tests/site_generators_tests.py b/tests/site_generators_tests.py index 86fcb78..2f5a68a 100755 --- a/tests/site_generators_tests.py +++ b/tests/site_generators_tests.py @@ -2203,8 +2203,8 @@ links = list(mysite.pagelinks(mainpage, total=20)) with suppress_warnings(WARN_SITE_CODE, category=UserWarning): - gen = mysite.preloadpages(links, groupsize=5, langlinks=True) - pages = list(gen) + pages = list(mysite.preloadpages(links, groupsize=5, + langlinks=True, quiet=False)) self.assertLength(links, pages) for page in pages: -- To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/904576 To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Change-Id: Ie534a1360329463b9c8df27b41e61d78da816968 Gerrit-Change-Number: 904576 Gerrit-PatchSet: 7 Gerrit-Owner: Xqt <i...@gno.de> Gerrit-Reviewer: Matěj Suchánek <matejsuchane...@gmail.com> Gerrit-Reviewer: Mpaa <mpaa.w...@gmail.com> Gerrit-Reviewer: Xqt <i...@gno.de> Gerrit-Reviewer: jenkins-bot Gerrit-MessageType: merged
_______________________________________________ Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org