Is there a place to send new pywikibot scripts? I wrote one, "cathash", which is rather useful to me, and I think it might be useful to other people as well:
#!/usr/bin/env python3 r"""Compares the contents of a Mediawiki files category to local files. Usage: pwb.py cathash category [localfiles...] where: category is the name of a Mediawiki category localfiles is an optional list of local files Hereinafter, "local files" means any file whose name is in "localfiles". "Remote files" means any file which is a member of the named category. We normalise each name as follows: - Spaces in remote names are replaced with underscores. - "File:" at the start of remote names is stripped. - The pathname of local files is ignored. Then we print the union of all local and remote filenames to stdout, each preceded by one of these prefixes: remote: there is a remote file by this name; the name was either not given as a local filename, or such a local file does not exist. local: there is a local file by this name, but no corresponding remote file. neither: neither a remote file nor a local file exists by this name. same: there are both a remote file and a local file by this name, and their SHA1 hashes match differ: there are both a remote file and a local file by this name, but their SHA1 hashes do not match If you need to pipe the output to another command, you can use cut -d: -f2 to remove the prefixes. """ # # (C) Pywikibot team, 2022-2025 # # Distributed under the terms of the MIT license. # from __future__ import annotations import argparse import hashlib import glob import os from collections import defaultdict from contextlib import suppress from pathlib import Path import pywikibot from pywikibot import config from pywikibot.bot import SingleSiteBot class CathashBot(SingleSiteBot): @staticmethod def setup_args(ap): """Declares arguments.""" ap.add_argument('cat') ap.add_argument('localfiles', nargs='*') def __init__(self, args: argparse.Namespace) -> None: """Initializer.""" super().__init__() self.args = args cat_title = vars(args)['cat'] self.locals = {} for localfile in args.localfiles: localname = os.path.basename(localfile) if localname in self.locals: raise ValueError( f"{localname} was requested more than once" ) self.locals[localname] = localfile pywikibot.info(f'Scanning {cat_title!r}') self.cat = pywikibot.Category(self.site, cat_title) def run(self) -> None: remotes = {} results = {} for f in self.cat.members(): title = f.title().replace(' ', '_') if title.startswith('File:'): title = title[5:] remotes[title] = f.latest_file_info.sha1 for remotefile, remote_sha1 in remotes.items(): if ( remotefile in self.locals and os.path.exists(self.locals[remotefile]) ): with open(self.locals[remotefile], 'rb') as f: contents = f.read() local_sha1 = hashlib.sha1(contents).hexdigest() if remote_sha1==local_sha1: results[remotefile] = 'same' else: results[remotefile] = 'differ' else: results[remotefile] = 'remote' for localfile, fullpath in self.locals.items(): if localfile not in remotes: if os.path.exists(fullpath): results[localfile] = 'local' else: results[localfile] = 'neither' for f,r in sorted(results.items()): print(f'{r}:{f}') def main(*args: str) -> None: """Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ ap = argparse.ArgumentParser(add_help=False) CathashBot.setup_args(ap) local_args = pywikibot.handle_args() args, rest = ap.parse_known_args(local_args) bot = CathashBot(args) bot.run() if __name__ == '__main__': main() _______________________________________________ pywikibot mailing list -- pywikibot@lists.wikimedia.org Public archives at https://lists.wikimedia.org/hyperkitty/list/pywikibot@lists.wikimedia.org/message/3YLHRCDDHMRL4TMRUQNTGONUMXDA3NPM/ To unsubscribe send an email to pywikibot-le...@lists.wikimedia.org