jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1061456?usp=email )

Change subject: [IMPR] add -category option to delinker.py
......................................................................

[IMPR] add -category option to delinker.py

The -category option works as follows:
- retrieve pages from "Pages with missing files" listed on wikibase with
  item Q4989282. Any other category can be given with this option.
- for every page found in this category process their image links
- skip further processing if the FilePage exists locally or in the
  image repository
- also skip further processing if the file was not deleted. In that case
  there is an invalid link found on the source page.
- finally delink the found image link

Bug: T372206
Change-Id: I49d9260f2cbcb7e98f1916da82b191119a0bf127
---
M scripts/delinker.py
1 file changed, 119 insertions(+), 36 deletions(-)

Approvals:
  jenkins-bot: Verified
  Aram: Looks good to me, but someone else must approve
  Xqt: Looks good to me, approved




diff --git a/scripts/delinker.py b/scripts/delinker.py
index 975d424..cfb83a4 100755
--- a/scripts/delinker.py
+++ b/scripts/delinker.py
@@ -7,6 +7,11 @@

 The following parameters are supported:

+-category:  Retrieve pages to delink from "Pages with missing files"
+            category. Usually the category is found on Q4989282 wikibase
+            item but can be overwritten by giving the category title
+            with that option. *-since* option is ignored.
+
 -exclude:   If the deletion log contains this pattern, the file is not
             delinked (default is 'no-delink').

@@ -27,6 +32,8 @@

 .. versionadded:: 7.2
    This script is completely rewriten from compat branch.
+.. versionchanged:: 9.4
+   *-category* option was added.
 """
 #
 # (C) Pywikibot team, 2006-2024
@@ -52,14 +59,120 @@

 class CommonsDelinker(SingleSiteBot, ConfigParserBot, AutomaticTWSummaryBot):

-    """Bot to delink deleted images."""
+    """Base Delinker Bot."""
+
+    summary_key = 'delinker-delink'
+
+    def skip_page(self, page) -> bool:
+        """Skip pages which neither exists locally nor on shared repository."""
+        pywikibot.info('.', newline=False)
+        if page.exists() or page.file_is_shared():
+            return True
+        return super().skip_page(page)
+
+    def treat(self, file_page):
+        """Set page to current page and delink that page."""
+        # use image_regex from image.py
+        namespace = file_page.site.namespaces[6]
+        escaped = case_escape(namespace.case,
+                              file_page.title(with_ns=False),
+                              underscore=True)
+        self.image_regex = re.compile(
+            r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|'
+            r'(?:[^\[\]]|\[\[[^\]]+\]\]|\[[^\]]+\])*|) *\]\]'
+            .format('|'.join(ignore_case(s) for s in namespace), escaped))
+
+        shown = False
+        for page in file_page.using_pages(
+                content=True, namespaces=self.site.namespaces.MAIN):
+            if not shown:
+                pywikibot.info('\n>>> Delinking <<lightgreen>>'
+                               f'{file_page.title()}<<default>> <<<')
+                shown = True
+            super().treat(page)
+
+    def treat_page(self):
+        """Delink a single page."""
+        new = re.sub(self.image_regex, '', self.current_page.text)
+        self.put_current(new)
+
+
+class DelinkerFromCategory(CommonsDelinker):
+
+    """Bot to delink deleted images from pages found in category."""
+
+    pages_with_missing_files = 'Q4989282'
+
+    update_options = {
+        'exclude': 'no-delink',
+        'localonly': False,
+        'category': True,
+    }
+
+    @property
+    def generator(self):
+        """Retrieve pages with missing files and yield there image links."""
+        if self.opt.category is True:
+            cat = self.site.page_from_repository(self.pages_with_missing_files)
+        else:
+            cat = pywikibot.Category(self.site, self.opt.category)
+            if not cat.exists():
+                cat = None
+
+        if not cat:
+            pywikibot.warning('No valid category given for generator')
+            return
+
+        for article in cat.articles(namespaces=self.site.namespaces.MAIN):
+            yield from article.imagelinks()
+
+    def init_page(self, item) -> pywikibot.page.FilePage:
+        """Upcast logevent to FilePage and combine edit summary."""
+        return pywikibot.FilePage(item, ignore_extension=True)
+
+    def skip_page(self, page) -> pywikibot.page.FilePage:
+        """Skip pages which aren't deleted on any repository."""
+        if super().skip_page(page):
+            return True
+
+        params = {
+            'logtype': 'delete',
+            'reverse': True,
+            'page': 'File:' + page.title(underscore=True, with_ns=False),
+        }
+        try:
+            entry = next(self.site.logevents(**params))
+        except StopIteration:
+            try:
+                entry = next(self.site.image_repository().logevents(**params))
+            except StopIteration:
+                pywikibot.info()
+                pywikibot.warning(
+                    f'unable to delink missing {page.title(as_link=True)}')
+                found = list(self.site.search(
+                    page.title(),
+                    namespaces=self.site.namespaces.MAIN,
+                    total=1
+                ))
+                if found:
+                    pywikibot.info('probably <<lightblue>>'
+                                   f'{found[0].title(as_link=True)}'
+                                   '<<default>> is meant')
+                return True
+
+        self.summary_parameters = dict(entry)
+        return False
+
+
+class DelinkerFromLog(CommonsDelinker):
+
+    """Bot to delink deleted images from deletion log."""

     update_options = {
         'exclude': 'no-delink',
         'localonly': False,
         'since': '',
     }
-    summary_key = 'delinker-delink'

     @property
     def generator(self):
@@ -90,38 +203,6 @@
         self.summary_parameters = dict(item)
         return pywikibot.FilePage(item.page(), ignore_extension=True)

-    def skip_page(self, page) -> bool:
-        """Skip pages which neither exists locally nor on shared repository."""
-        pywikibot.info('.', newline=False)
-        if page.exists() or page.file_is_shared():
-            return True
-        return super().skip_page(page)
-
-    def treat(self, file_page):
-        """Set page to current page and delink that page."""
-        # use image_regex from image.py
-        namespace = file_page.site.namespaces[6]
-        escaped = case_escape(namespace.case,
-                              file_page.title(with_ns=False),
-                              underscore=True)
-        self.image_regex = re.compile(
-            r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|'
-            r'(?:[^\[\]]|\[\[[^\]]+\]\]|\[[^\]]+\])*|) *\]\]'
-            .format('|'.join(ignore_case(s) for s in namespace), escaped))
-
-        shown = False
-        for page in file_page.using_pages(content=True, namespaces=0):
-            if not shown:
-                pywikibot.info('\n>>> <<lightgreen>>Delinking '
-                               f'{file_page.title()}<<default>> <<<')
-                shown = True
-            super().treat(page)
-
-    def treat_page(self):
-        """Delink a single page."""
-        new = re.sub(self.image_regex, '', self.current_page.text)
-        self.put_current(new)
-
     def teardown(self):
         """Save the last used logevent timestamp."""
         if not hasattr(self, 'last_ts'):
@@ -153,11 +234,13 @@
         opt = removeprefix(opt, '-')
         if opt == 'localonly':
             options[opt] = True
+        elif opt == 'category':
+            options[opt] = value or True
         else:
             options[opt] = value

-    bot = CommonsDelinker(site=pywikibot.Site(), **options)
-    bot.run()
+    bot = DelinkerFromCategory if options.get('category') else DelinkerFromLog
+    bot(**options).run()


 if __name__ == '__main__':

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1061456?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I49d9260f2cbcb7e98f1916da82b191119a0bf127
Gerrit-Change-Number: 1061456
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <i...@gno.de>
Gerrit-Reviewer: Aram <arambakr1...@gmail.com>
Gerrit-Reviewer: D3r1ck01 <dalangi-...@wikimedia.org>
Gerrit-Reviewer: Xqt <i...@gno.de>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org
To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org

Reply via email to