John Vandenberg has uploaded a new change for review.
https://gerrit.wikimedia.org/r/205837
Change subject: Remove cosmetic changes dependency on isbn script
......................................................................
Remove cosmetic changes dependency on isbn script
Move ISBN regex into textlib for re-use.
Use stdnum package as preferred provider of ISBN routines.
Bug: T89993
Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc
---
M pywikibot/textlib.py
M scripts/cosmetic_changes.py
M scripts/isbn.py
M setup.py
4 files changed, 100 insertions(+), 20 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/37/205837/1
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 4c2da87..f7d854f 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1229,6 +1229,21 @@
return bool(m)
+def reformat_ISBNs(text, match_func):
+ """Reformat ISBNs.
+
+ @param text: text containing ISBNs
+ @type text: str
+ @param match_func: function to reformat matched ISBNs
+ @type match_func: callable
+ @return: reformatted text
+ @rtype: str
+ """
+ isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d\-]+[\dXx])')
+ text = isbnR.sub(match_func, text)
+ return text
+
+
# ---------------------------------------
# Time parsing functionality (Archivebot)
# ---------------------------------------
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index bb99a30..cdc9997 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -18,8 +18,9 @@
the predefined message texts with original and replacements
inserted.
--ignore: Ignores if an error occured and either skips the page or
- only that method. It can be set to 'page' or 'method'.
+-ignore: Ignores if an error occured and skip either the page, or
+ only that method, or only an instance of the problem in the
+ page text. It can be set to 'page', 'method', or 'match'.
&warning;
@@ -76,13 +77,28 @@
#
import re
-from pywikibot.tools import MediaWikiVersion
+
+from warnings import warn
+
+try:
+ import stdnum.isbn as stdnum_isbn
+ scripts_isbn = None
+except ImportError:
+ stdnum_isbn = None
+ # Old dependency
+ try:
+ import scripts.isbn as scripts_isbn
+ except ImportError:
+ scripts_isbn = None
+
import pywikibot
-import isbn
+
from pywikibot import config, i18n, textlib, pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
from pywikibot.page import url2unicode
from pywikibot.tools import deprecate_arg, first_lower, first_upper
+from pywikibot.tools import MediaWikiVersion
+
warning = """
ATTENTION: You can run this script as a stand-alone for testing purposes.
@@ -153,10 +169,60 @@
}
}
-
CANCEL_ALL = False
CANCEL_PAGE = 1
CANCEL_METHOD = 2
+CANCEL_MATCH = 3
+
+
+def _format_isbn_match(match, strict=True):
+ """Helper function to validate and format a single matched ISBN."""
+ isbn = match.group('code')
+ if stdnum_isbn:
+ try:
+ stdnum_isbn.validate(isbn)
+ except stdnum_isbn.ValidationError as e:
+ if strict:
+ raise
+ pywikibot.log('ISBN "%s" validation error: %s' % (isbn, e))
+ return isbn
+
+ return stdnum_isbn.format(isbn)
+ else:
+ try:
+ scripts_isbn.is_valid(isbn)
+ except scripts_isbn.InvalidIsbnException:
+ if strict:
+ raise
+ pywikibot.log('ISBN "%s" validation error: %s' % (isbn, e))
+ return isbn
+
+ isbn = scripts_isbn.getIsbn(isbn)
+ isbn.format()
+ return isbn.code
+
+
+def _format_isbn_match_loose(match):
+ """Helper function to only reformat a validated ISBN."""
+ return _format_isbn_match(match, strict=False)
+
+
+def _reformat_ISBNs(text, strict=True):
+ """Helper function to normalise ISBNs in text.
+
+ @raises Exception: Invalid ISBN encountered when strict enabled
+ """
+ if not stdnum_isbn:
+ if not scripts_isbn:
+ raise NotImplementedError(
+ 'ISBN functionality not available. Install stdnum package.')
+
+ warn('package stdnum.isbn not found; using scripts.isbn',
+ ImportWarning)
+
+ func = _format_isbn_match if strict else _format_isbn_match_loose
+
+ return textlib.reformat_ISBNs(text, func)
class CosmeticChangesToolkit:
@@ -196,6 +262,7 @@
self.fixTypo,
self.fixArabicLetters,
+ self.fix_ISBN,
)
@classmethod
@@ -218,20 +285,10 @@
raise
return text if result is None else result
- @staticmethod
- def isbn_execute(text):
- """Hyphenate ISBN numbers and catch 'InvalidIsbnException'."""
- try:
- return isbn.hyphenateIsbnNumbers(text)
- except isbn.InvalidIsbnException as error:
- pywikibot.log(u"ISBN error: %s" % error)
- return None
-
def _change(self, text):
"""Execute all clean up methods."""
for method in self.common_methods:
text = self.safe_execute(method, text)
- text = self.safe_execute(CosmeticChangesToolkit.isbn_execute, text)
return text
def change(self, text):
@@ -898,6 +955,11 @@
r'\1== {{int:license-header}} ==', exceptions, True)
return text
+ def fix_ISBN(self, text):
+ """Hyphenate ISBN numbers."""
+ return _reformat_ISBNs(
+ text, strict=False if self.ignore == CANCEL_MATCH else True)
+
class CosmeticChangesBot(ExistingPageBot, NoRedirectPageBot):
@@ -959,6 +1021,8 @@
options['ignore'] = CANCEL_METHOD
elif ignore_mode == 'page':
options['ignore'] = CANCEL_PAGE
+ elif ignore_mode == 'match':
+ options['ignore'] = CANCEL_MATCH
else:
raise ValueError('Unknown ignore mode
"{0}"!'.format(ignore_mode))
else:
diff --git a/scripts/isbn.py b/scripts/isbn.py
index b9b118e..fc4cf54 100755
--- a/scripts/isbn.py
+++ b/scripts/isbn.py
@@ -45,6 +45,9 @@
#
import re
+
+from functools import partial
+
import pywikibot
from pywikibot import i18n, pagegenerators, Bot, WikidataBot
@@ -1416,11 +1419,8 @@
return i.code
-def hyphenateIsbnNumbers(text):
- """Helper function to hyphenate an ISBN."""
- isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d\-]+[\dXx])')
- text = isbnR.sub(_hyphenateIsbnNumber, text)
- return text
+hyphenateIsbnNumbers = partial(textlib.reformat_ISBNs,
+ match_func=_hyphenateIsbnNumber)
def _isbn10toIsbn13(match):
diff --git a/setup.py b/setup.py
index a33928e..446808f 100644
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@
extra_deps = {
# Core library dependencies
+ 'isbn': ['python-stdnum'],
'daemonize': ['daemonize'],
'Graphviz': ['pydot'],
'MySQL': ['oursql'],
--
To view, visit https://gerrit.wikimedia.org/r/205837
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits