John Vandenberg has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/205837

Change subject: Remove cosmetic changes dependency on isbn script
......................................................................

Remove cosmetic changes dependency on isbn script

Move ISBN regex into textlib for re-use.
Use stdnum package as preferred provider of ISBN routines.

Bug: T89993
Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc
---
M pywikibot/textlib.py
M scripts/cosmetic_changes.py
M scripts/isbn.py
M setup.py
4 files changed, 100 insertions(+), 20 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/37/205837/1

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 4c2da87..f7d854f 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1229,6 +1229,21 @@
     return bool(m)
 
 
+def reformat_ISBNs(text, match_func):
+    """Reformat ISBNs.
+
+    @param text: text containing ISBNs
+    @type text: str
+    @param match_func: function to reformat matched ISBNs
+    @type match_func: callable
+    @return: reformatted text
+    @rtype: str
+    """
+    isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d\-]+[\dXx])')
+    text = isbnR.sub(match_func, text)
+    return text
+
+
 # ---------------------------------------
 # Time parsing functionality (Archivebot)
 # ---------------------------------------
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index bb99a30..cdc9997 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -18,8 +18,9 @@
                   the predefined message texts with original and replacements
                   inserted.
 
--ignore:          Ignores if an error occured and either skips the page or
-                  only that method. It can be set to 'page' or 'method'.
+-ignore:          Ignores if an error occured and skip either the page, or
+                  only that method, or only an instance of the problem in the
+                  page text. It can be set to 'page', 'method', or 'match'.
 
 &warning;
 
@@ -76,13 +77,28 @@
 #
 
 import re
-from pywikibot.tools import MediaWikiVersion
+
+from warnings import warn
+
+try:
+    import stdnum.isbn as stdnum_isbn
+    scripts_isbn = None
+except ImportError:
+    stdnum_isbn = None
+    # Old dependency
+    try:
+        import scripts.isbn as scripts_isbn
+    except ImportError:
+        scripts_isbn = None
+
 import pywikibot
-import isbn
+
 from pywikibot import config, i18n, textlib, pagegenerators
 from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
 from pywikibot.page import url2unicode
 from pywikibot.tools import deprecate_arg, first_lower, first_upper
+from pywikibot.tools import MediaWikiVersion
+
 
 warning = """
 ATTENTION: You can run this script as a stand-alone for testing purposes.
@@ -153,10 +169,60 @@
     }
 }
 
-
 CANCEL_ALL = False
 CANCEL_PAGE = 1
 CANCEL_METHOD = 2
+CANCEL_MATCH = 3
+
+
+def _format_isbn_match(match, strict=True):
+    """Helper function to validate and format a single matched ISBN."""
+    isbn = match.group('code')
+    if stdnum_isbn:
+        try:
+            stdnum_isbn.validate(isbn)
+        except stdnum_isbn.ValidationError as e:
+            if strict:
+                raise
+            pywikibot.log('ISBN "%s" validation error: %s' % (isbn, e))
+            return isbn
+
+        return stdnum_isbn.format(isbn)
+    else:
+        try:
+            scripts_isbn.is_valid(isbn)
+        except scripts_isbn.InvalidIsbnException:
+            if strict:
+                raise
+            pywikibot.log('ISBN "%s" validation error: %s' % (isbn, e))
+            return isbn
+
+        isbn = scripts_isbn.getIsbn(isbn)
+        isbn.format()
+        return isbn.code
+
+
+def _format_isbn_match_loose(match):
+    """Helper function to only reformat a validated ISBN."""
+    return _format_isbn_match(match, strict=False)
+
+
+def _reformat_ISBNs(text, strict=True):
+    """Helper function to normalise ISBNs in text.
+
+    @raises Exception: Invalid ISBN encountered when strict enabled
+    """
+    if not stdnum_isbn:
+        if not scripts_isbn:
+            raise NotImplementedError(
+                'ISBN functionality not available.  Install stdnum package.')
+
+        warn('package stdnum.isbn not found; using scripts.isbn',
+             ImportWarning)
+
+    func = _format_isbn_match if strict else _format_isbn_match_loose
+
+    return textlib.reformat_ISBNs(text, func)
 
 
 class CosmeticChangesToolkit:
@@ -196,6 +262,7 @@
             self.fixTypo,
 
             self.fixArabicLetters,
+            self.fix_ISBN,
         )
 
     @classmethod
@@ -218,20 +285,10 @@
                 raise
         return text if result is None else result
 
-    @staticmethod
-    def isbn_execute(text):
-        """Hyphenate ISBN numbers and catch 'InvalidIsbnException'."""
-        try:
-            return isbn.hyphenateIsbnNumbers(text)
-        except isbn.InvalidIsbnException as error:
-            pywikibot.log(u"ISBN error: %s" % error)
-            return None
-
     def _change(self, text):
         """Execute all clean up methods."""
         for method in self.common_methods:
             text = self.safe_execute(method, text)
-        text = self.safe_execute(CosmeticChangesToolkit.isbn_execute, text)
         return text
 
     def change(self, text):
@@ -898,6 +955,11 @@
             r'\1== {{int:license-header}} ==', exceptions, True)
         return text
 
+    def fix_ISBN(self, text):
+        """Hyphenate ISBN numbers."""
+        return _reformat_ISBNs(
+            text, strict=False if self.ignore == CANCEL_MATCH else True)
+
 
 class CosmeticChangesBot(ExistingPageBot, NoRedirectPageBot):
 
@@ -959,6 +1021,8 @@
                 options['ignore'] = CANCEL_METHOD
             elif ignore_mode == 'page':
                 options['ignore'] = CANCEL_PAGE
+            elif ignore_mode == 'match':
+                options['ignore'] = CANCEL_MATCH
             else:
                 raise ValueError('Unknown ignore mode 
"{0}"!'.format(ignore_mode))
         else:
diff --git a/scripts/isbn.py b/scripts/isbn.py
index b9b118e..fc4cf54 100755
--- a/scripts/isbn.py
+++ b/scripts/isbn.py
@@ -45,6 +45,9 @@
 #
 
 import re
+
+from functools import partial
+
 import pywikibot
 from pywikibot import i18n, pagegenerators, Bot, WikidataBot
 
@@ -1416,11 +1419,8 @@
     return i.code
 
 
-def hyphenateIsbnNumbers(text):
-    """Helper function to hyphenate an ISBN."""
-    isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d\-]+[\dXx])')
-    text = isbnR.sub(_hyphenateIsbnNumber, text)
-    return text
+hyphenateIsbnNumbers = partial(textlib.reformat_ISBNs,
+                               match_func=_hyphenateIsbnNumber)
 
 
 def _isbn10toIsbn13(match):
diff --git a/setup.py b/setup.py
index a33928e..446808f 100644
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@
 
 extra_deps = {
     # Core library dependencies
+    'isbn': ['python-stdnum'],
     'daemonize': ['daemonize'],
     'Graphviz':  ['pydot'],
     'MySQL': ['oursql'],

-- 
To view, visit https://gerrit.wikimedia.org/r/205837
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to