jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1062670?usp=email )

Change subject: delinker: use difflib to find the closest image match
......................................................................

delinker: use difflib to find the closest image match

Change-Id: Ib5604e2a533e433f0434b109d5d0d0ed53031efa
---
M scripts/delinker.py
1 file changed, 25 insertions(+), 22 deletions(-)

Approvals:
  jenkins-bot: Verified
  Xqt: Looks good to me, approved




diff --git a/scripts/delinker.py b/scripts/delinker.py
index a6dc176..a2b3c90 100755
--- a/scripts/delinker.py
+++ b/scripts/delinker.py
@@ -45,6 +45,7 @@
 import configparser
 import heapq
 import re
+from difflib import get_close_matches

 import pywikibot
 from pywikibot.backports import removeprefix
@@ -64,7 +65,7 @@
     summary_key = 'delinker-delink'

     def skip_page(self, page) -> bool:
-        """Skip pages which neither exists locally nor on shared repository."""
+        """Skip pages which either exists locally or on shared repository."""
         pywikibot.info('.', newline=False)
         if page.exists() or page.file_is_shared():
             return True
@@ -135,31 +136,33 @@
         if super().skip_page(page):
             return True

+        title = page.title(with_ns=False)
         params = {
             'logtype': 'delete',
-            'page': 'File:' + page.title(underscore=True, with_ns=False),
+            'page': 'File:' + title,
+            'total': 1,
         }
-        try:
-            entry = next(self.site.logevents(**params))
-        except StopIteration:
-            try:
-                entry = next(self.site.image_repository().logevents(**params))
-            except StopIteration:
-                pywikibot.info()
-                pywikibot.warning(
-                    f'unable to delink missing {page.title(as_link=True)}')
-                found = list(self.site.search(
-                    page.title(),
-                    namespaces=self.site.namespaces.MAIN,
-                    total=1
-                ))
-                if found:
-                    pywikibot.info('probably <<lightblue>>'
-                                   f'{found[0].title(as_link=True)}'
-                                   '<<default>> is meant')
-                return True
+        entries = list(self.site.logevents(**params))
+        if not entries:
+            entries = list(self.site.image_repository().logevents(**params))

-        self.summary_parameters = dict(entry)
+        if not entries:
+            pywikibot.info()
+            pywikibot.warning(
+                f'unable to delink missing {page.title(as_link=True)}')
+            possibilities = [
+                p.title(with_ns=False)
+                for p in self.site.search(page.title(),
+                                          namespaces=self.site.namespaces.MAIN,
+                                          total=5)
+            ]
+            found = get_close_matches(title, possibilities, n=1)
+            if found:
+                pywikibot.info(
+                    f'probably <<lightblue>>{found[0]}<<default>> is meant')
+            return True
+
+        self.summary_parameters = dict(entries[0])
         return False



--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1062670?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib5604e2a533e433f0434b109d5d0d0ed53031efa
Gerrit-Change-Number: 1062670
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <i...@gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-...@wikimedia.org>
Gerrit-Reviewer: Xqt <i...@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Aram <arambakr1...@gmail.com>
_______________________________________________
Pywikibot-commits mailing list -- pywikibot-commits@lists.wikimedia.org
To unsubscribe send an email to pywikibot-commits-le...@lists.wikimedia.org

Reply via email to