vitaly-zdanevich created this task.
vitaly-zdanevich added a project: Pywikibot.
Restricted Application added subscribers: pywikibot-bugs-list, Aklapper.

TASK DESCRIPTION
  Hi, yesterday I uploaded 1916 files into 
https://commons.wikimedia.org/wiki/Category:Mogilyovskiye_gubernskiye_vedomosti_1902
  
  This Pywikibot count also returns 1019:
  
    import pywikibot
    
    site = pywikibot.Site('commons', 'commons')
    
    cat = pywikibot.Category(site, 'Category:Mogilyovskiye gubernskiye 
vedomosti 1902')
    print('cat created')
    
    count = 0
    for page in cat.articles(namespaces=[6]):  # File namespace only
        print('.', end='')
        count += 1
    
    print(count)
  
  Nobody changed the categories - according to this script that shows unique 
categories for the last 1916 uploads:
  
    #!/usr/bin/env python3
    '''Print unique categories from the latest files uploaded by a Commons 
user.'''
    
    from __future__ import annotations
    
    import argparse
    import sys
    from typing import Iterable, Optional
    
    import pywikibot
    
    EXCLUDED_CATEGORY_FRAGMENTS = (
        'machine-readable author',
        'lacking author information',
        'license',
    )
    
    
    def iter_upload_events(
        site: pywikibot.site.APISite, username: str, limit: Optional[int]
    ) -> Iterable[pywikibot.logentries.UploadEntry]:
        '''Yield the newest upload log events.'''
        yield from site.logevents(logtype='upload', user=username, total=limit)
    
    
    def get_file_page(site: pywikibot.site.APISite, event: 
pywikibot.logentries.UploadEntry) -> pywikibot.FilePage:
        '''Return a FilePage for an upload log event.'''
        page = event.page()
        if isinstance(page, pywikibot.FilePage):
            return page
        return pywikibot.FilePage(site, page.title())
    
    
    def get_filtered_categories(file_page: pywikibot.FilePage) -> list[str]:
        '''Return filtered categories for a file.'''
        filtered_categories = []
        for category in file_page.categories():
            title = category.title(with_ns=False)
            title_lower = title.casefold()
            if any(fragment in title_lower for fragment in 
EXCLUDED_CATEGORY_FRAGMENTS):
                continue
            filtered_categories.append(title)
        return filtered_categories
    
    
    def parse_args() -> argparse.Namespace:
        parser = argparse.ArgumentParser(
            description='Print unique categories from the latest files uploaded 
by a Commons user.'
        )
        parser.add_argument(
            '--limit',
            type=int,
            default=2000,
            help='Number of latest upload log entries to process (default: 
2000).',
        )
        parser.add_argument(
            '--include-reuploads',
            action='store_true',
            help='Include overwrite/reupload log actions (default: only new 
uploads).',
        )
        parser.add_argument(
            '--user',
            default=None,
            help='Commons username. Defaults to the currently logged-in 
Pywikibot user.',
        )
        return parser.parse_args(pywikibot.handle_args())
    
    
    def main() -> int:
        args = parse_args()
        if args.limit <= 0:
            print('--limit must be a positive integer', file=sys.stderr)
            return 2
    
        site = pywikibot.Site('commons', 'commons')
        username = args.user or site.user()
        if not username:
            print('Could not detect the current user. Pass --user USERNAME.', 
file=sys.stderr)
            return 2
    
        processed = 0
        unique_categories: set[str] = set()
        event_limit = args.limit if args.include_reuploads else None
        for event in iter_upload_events(site, username, event_limit):
            if not args.include_reuploads and event.action() != 'upload':
                continue
    
            try:
                file_page = get_file_page(site, event)
                unique_categories.update(get_filtered_categories(file_page))
                processed += 1
                if processed % 10 == 0:
                    print('.', end='', flush=True, file=sys.stderr)
            except Exception as exc:  # pragma: no cover - runtime resilience
                pywikibot.error(f'Failed to process log event: {exc}')
    
            if processed >= args.limit:
                break
    
        if processed >= 10:
            print(file=sys.stderr)
    
        for category in sorted(unique_categories, key=str.casefold):
            print(category)
    
        return 0
    
    
    if __name__ == '__main__':
        raise SystemExit(main())
  
  So, it looks like an issue with Commons...

TASK DETAIL
  https://phabricator.wikimedia.org/T418064

EMAIL PREFERENCES
  https://phabricator.wikimedia.org/settings/panel/emailpreferences/

_______________________________________________
pywikibot-bugs mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to