jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1061456?usp=email )
Change subject: [IMPR] add -category option to delinker.py ...................................................................... [IMPR] add -category option to delinker.py The -category option works as follows: - retrieve pages from "Pages with missing files" listed on wikibase with item Q4989282. Any other category can be given with this option. - for every page found in this category process their image links - skip further processing if the FilePage exists locally or in the image repository - also skip further processing if the file was not deleted. In that case there is an invalid link found on the source page. - finally delink the found image link Bug: T372206 Change-Id: I49d9260f2cbcb7e98f1916da82b191119a0bf127 --- M scripts/delinker.py 1 file changed, 119 insertions(+), 36 deletions(-) Approvals: jenkins-bot: Verified Aram: Looks good to me, but someone else must approve Xqt: Looks good to me, approved diff --git a/scripts/delinker.py b/scripts/delinker.py index 975d424..cfb83a4 100755 --- a/scripts/delinker.py +++ b/scripts/delinker.py @@ -7,6 +7,11 @@ The following parameters are supported: +-category: Retrieve pages to delink from "Pages with missing files" + category. Usually the category is found on Q4989282 wikibase + item but can be overwritten by giving the category title + with that option. *-since* option is ignored. + -exclude: If the deletion log contains this pattern, the file is not delinked (default is 'no-delink'). @@ -27,6 +32,8 @@ .. versionadded:: 7.2 This script is completely rewriten from compat branch. +.. versionchanged:: 9.4 + *-category* option was added. """ # # (C) Pywikibot team, 2006-2024 @@ -52,14 +59,120 @@ class CommonsDelinker(SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot): - """Bot to delink deleted images.""" + """Base Delinker Bot.""" + + summary_key = 'delinker-delink' + + def skip_page(self, page) -> bool: + """Skip pages which neither exists locally nor on shared repository.""" + pywikibot.info('.', newline=False) + if page.exists() or page.file_is_shared(): + return True + return super().skip_page(page) + + def treat(self, file_page): + """Set page to current page and delink that page.""" + # use image_regex from image.py + namespace = file_page.site.namespaces[6] + escaped = case_escape(namespace.case, + file_page.title(with_ns=False), + underscore=True) + self.image_regex = re.compile( + r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|' + r'(?:[^\[\]]|\[\[[^\]]+\]\]|\[[^\]]+\])*|) *\]\]' + .format('|'.join(ignore_case(s) for s in namespace), escaped)) + + shown = False + for page in file_page.using_pages( + content=True, namespaces=self.site.namespaces.MAIN): + if not shown: + pywikibot.info('\n>>> Delinking <<lightgreen>>' + f'{file_page.title()}<<default>> <<<') + shown = True + super().treat(page) + + def treat_page(self): + """Delink a single page.""" + new = re.sub(self.image_regex, '', self.current_page.text) + self.put_current(new) + + +class DelinkerFromCategory(CommonsDelinker): + + """Bot to delink deleted images from pages found in category.""" + + pages_with_missing_files = 'Q4989282' + + update_options = { + 'exclude': 'no-delink', + 'localonly': False, + 'category': True, + } + + @property + def generator(self): + """Retrieve pages with missing files and yield there image links.""" + if self.opt.category is True: + cat = self.site.page_from_repository(self.pages_with_missing_files) + else: + cat = pywikibot.Category(self.site, self.opt.category) + if not cat.exists(): + cat = None + + if not cat: + pywikibot.warning('No valid category given for generator') + return + + for article in cat.articles(namespaces=self.site.namespaces.MAIN): + yield from article.imagelinks() + + def init_page(self, item) -> pywikibot.page.FilePage: + """Upcast logevent to FilePage and combine edit summary.""" + return pywikibot.FilePage(item, ignore_extension=True) + + def skip_page(self, page) -> pywikibot.page.FilePage: + """Skip pages which aren't deleted on any repository.""" + if super().skip_page(page): + return True + + params = { + 'logtype': 'delete', + 'reverse': True, + 'page': 'File:' + page.title(underscore=True, with_ns=False), + } + try: + entry = next(self.site.logevents(**params)) + except StopIteration: + try: + entry = next(self.site.image_repository().logevents(**params)) + except StopIteration: + pywikibot.info() + pywikibot.warning( + f'unable to delink missing {page.title(as_link=True)}') + found = list(self.site.search( + page.title(), + namespaces=self.site.namespaces.MAIN, + total=1 + )) + if found: + pywikibot.info('probably <<lightblue>>' + f'{found[0].title(as_link=True)}' + '<<default>> is meant') + return True + + self.summary_parameters = dict(entry) + return False + + +class DelinkerFromLog(CommonsDelinker): + + """Bot to delink deleted images from deletion log.""" update_options = { 'exclude': 'no-delink', 'localonly': False, 'since': '', } - summary_key = 'delinker-delink' @property def generator(self): @@ -90,38 +203,6 @@ self.summary_parameters = dict(item) return pywikibot.FilePage(item.page(), ignore_extension=True) - def skip_page(self, page) -> bool: - """Skip pages which neither exists locally nor on shared repository.""" - pywikibot.info('.', newline=False) - if page.exists() or page.file_is_shared(): - return True - return super().skip_page(page) - - def treat(self, file_page): - """Set page to current page and delink that page.""" - # use image_regex from image.py - namespace = file_page.site.namespaces[6] - escaped = case_escape(namespace.case, - file_page.title(with_ns=False), - underscore=True) - self.image_regex = re.compile( - r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|' - r'(?:[^\[\]]|\[\[[^\]]+\]\]|\[[^\]]+\])*|) *\]\]' - .format('|'.join(ignore_case(s) for s in namespace), escaped)) - - shown = False - for page in file_page.using_pages(content=True, namespaces=0): - if not shown: - pywikibot.info('\n>>> <<lightgreen>>Delinking ' - f'{file_page.title()}<<default>> <<<') - shown = True - super().treat(page) - - def treat_page(self): - """Delink a single page.""" - new = re.sub(self.image_regex, '', self.current_page.text) - self.put_current(new) - def teardown(self): """Save the last used logevent timestamp.""" if not hasattr(self, 'last_ts'): @@ -153,11 +234,13 @@ opt = removeprefix(opt, '-') if opt == 'localonly': options[opt] = True + elif opt == 'category': + options[opt] = value or True else: options[opt] = value - bot = CommonsDelinker(site=pywikibot.Site(), **options) - bot.run() + bot = DelinkerFromCategory if options.get('category') else DelinkerFromLog + bot(**options).run() if __name__ == '__main__': -- To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1061456?usp=email To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Change-Id: I49d9260f2cbcb7e98f1916da82b191119a0bf127 Gerrit-Change-Number: 1061456 Gerrit-PatchSet: 3 Gerrit-Owner: Xqt <i...@gno.de> Gerrit-Reviewer: Aram <arambakr1...@gmail.com> Gerrit-Reviewer: D3r1ck01 <dalangi-...@wikimedia.org> Gerrit-Reviewer: Xqt <i...@gno.de> Gerrit-Reviewer: jenkins-bot
_______________________________________________ Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org