John Vandenberg has uploaded a new change for review. https://gerrit.wikimedia.org/r/179071
Change subject: Use cmtype to restrict page type for Cat.articles ...................................................................... Use cmtype to restrict page type for Cat.articles Bug: T67981 Change-Id: I46a53497a86c3ab3761f9166b6c268c7a3c5f6be --- M pywikibot/page.py M pywikibot/site.py 2 files changed, 50 insertions(+), 9 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/71/179071/1 diff --git a/pywikibot/page.py b/pywikibot/page.py index 192d3e1..8c42050 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -49,7 +49,8 @@ SiteDefinitionError ) from pywikibot.tools import ( - ComparableMixin, deprecated, deprecate_arg, deprecated_args, + ComparableMixin, MediaWikiVersion as LV, + deprecated, deprecate_arg, deprecated_args, remove_last_args ) from pywikibot import textlib @@ -2062,7 +2063,7 @@ @deprecate_arg("startFrom", "startsort") def articles(self, recurse=False, step=None, total=None, - content=False, namespaces=None, sortby="", + content=False, namespaces=None, sortby=None, starttime=None, endtime=None, startsort=None, endsort=None): """ @@ -2102,9 +2103,6 @@ @type endsort: str """ - if namespaces is None: - namespaces = [x for x in self.site.namespaces() - if x >= 0 and x != 14] for member in self.site.categorymembers(self, namespaces=namespaces, step=step, total=total, @@ -2112,7 +2110,8 @@ starttime=starttime, endtime=endtime, startsort=startsort, - endsort=endsort + endsort=endsort, + member_type=['page', 'file'] ): yield member if total is not None: diff --git a/pywikibot/site.py b/pywikibot/site.py index 3c2c782..82427be 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -2788,10 +2788,10 @@ step=step, total=total, g_content=content) return tlgen - def categorymembers(self, category, namespaces=None, sortby="", + def categorymembers(self, category, namespaces=None, sortby=None, reverse=False, starttime=None, endtime=None, startsort=None, endsort=None, step=None, total=None, - content=False): + content=False, member_type=None): """Iterate members of specified category. @param category: The Category to iterate. @@ -2822,7 +2822,9 @@ @type endsort: str @param content: if True, load the current content of each iterated page (default False) - + @type content: bool + @param member_type: member type + @type member_type: str or iterable of str; values: page, subcat, file """ if category.namespace() != 14: raise Error( @@ -2844,6 +2846,46 @@ if startsort and endsort and startsort > endsort: raise ValueError( "categorymembers: startsort must be less than endsort") + + assert(not member_type or sortby != 'timestamp') + if isinstance(member_type, basestring): + member_type = set([member_type]) + if member_type and LV(self.site.version()) < LV("1.12"): + # Retrofit cmtype/member_type, available on WM API 1.12+, + # to use namespaces available on earlier versions. + if 'page' in member_type: + excluded_namespaces = set() + if 'file' not in member_type: + excluded_namespaces.add(6) + if 'subcat' not in member_type: + excluded_namespaces.add(14) + + if namespaces: + for exclude_ns in excluded_namespaces: + if exclude_ns in namespaces: + raise ValueError( + 'incompatible namespaces %r and member_type %r' + % (namespaces, member_type)) + else: + # if the number of namespaces is greater than permitted by + # the API, it will issue a warning and use the namespaces + # up until the limit, which will usually be sufficient. + # TODO: QueryGenerator should detect when the number of + # namespaces requested is higher than available, and split + # the request into several batches. + namespaces = [x for x in self.namespaces() + if x >= 0 and x not in excluded_namespaces] + else: + namespaces = [] + if 'file' in member_type: + namespaces.append(6) + if 'subcat' in member_type: + namespaces.append(14) + + member_type = None + if member_type: + cmargs['gcmtype'] = member_type + if reverse: cmargs["gcmdir"] = "desc" # API wants start/end params in opposite order if using descending -- To view, visit https://gerrit.wikimedia.org/r/179071 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I46a53497a86c3ab3761f9166b6c268c7a3c5f6be Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: John Vandenberg <jay...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits