John Vandenberg has uploaded a new change for review.
https://gerrit.wikimedia.org/r/246793
Change subject: Fix Python 3 TypeError in fixArabicLetters
......................................................................
Fix Python 3 TypeError in fixArabicLetters
digits[digits.keys()[0]] causes a TypeError.
Replace custom 'file' exception with standard 'file'
exception.
Move deactivated segment of fixArabicLetters into new
method fixArabicDigits, and use textlib.NON_LATIN_DIGITS.
Bug: T101801
Change-Id: Ib0349fe4f55a150b16603b02c44c0100b84b8d6d
---
M pywikibot/cosmetic_changes.py
1 file changed, 35 insertions(+), 18 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/93/246793/1
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index 6b4af85..e4c9d43 100755
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -234,6 +234,9 @@
self.fixTypo,
self.fixArabicLetters,
+ # FIXME: Deactivated due to T57185
+ # self.fixArabicDigits,
+
self.fix_ISBN,
)
@@ -809,6 +812,7 @@
return
exceptions = [
'gallery',
+ 'file',
'hyperlink',
'interwiki',
# FIXME: but changes letters inside wikilinks
@@ -822,26 +826,12 @@
'startspace',
'inputbox',
]
- # FIXME: use textlib.NON_LATIN_DIGITS
# valid digits
- digits = {
- 'ckb': u'٠١٢٣٤٥٦٧٨٩',
- 'fa': u'۰۱۲۳۴۵۶۷۸۹',
- }
+ digits = textlib.NON_LATIN_DIGITS
faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
- new = digits.pop(self.site.code)
- # This only works if there are only two items in digits dict
- old = digits[digits.keys()[0]]
- # do not change inside file links
- namespaces = list(self.site.namespace(6, all=True))
- pattern = re.compile(
- u'\\[\\[(%s):.+?\\.\\w+? *(\\|((\\[\\[.*?\\]\\])|.)*)?\\]\\]'
- % u'|'.join(namespaces),
- re.UNICODE)
# not to let bot edits in latin content
exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
% {'fa': faChrs}))
- exceptions.append(pattern)
text = textlib.replaceExcept(text, u',', u'،', exceptions)
if self.site.code == 'ckb':
text = textlib.replaceExcept(text,
@@ -859,9 +849,36 @@
return text
- # FIXME: split this function into two.
- # replace persian/arabic digits
- # deactivated due to bug 55185
+ def fixArabicDigits(self, text):
+ """Replace Persian/Arabic digits."""
+ if self.site.code not in ['ckb', 'fa']:
+ return
+
+ exceptions = [
+ 'gallery',
+ 'file',
+ 'hyperlink',
+ 'interwiki',
+ # FIXME: but changes letters inside wikilinks
+ # 'link',
+ 'math',
+ 'pre',
+ 'template',
+ 'timeline',
+ 'ref',
+ 'source',
+ 'startspace',
+ 'inputbox',
+ ]
+ faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
+ # not to let bot edits in latin content
+ exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
+ % {'fa': faChrs}))
+
+ new = textlib.NON_LATIN_DIGITS[self.site.code]
+ old = textlib.NON_LATIN_DIGITS[
+ 'fa' if self.site.code == 'ckb' else 'ckb']
+
for i in range(0, 10):
text = textlib.replaceExcept(text, old[i], new[i], exceptions)
# do not change digits in class, style and table params
--
To view, visit https://gerrit.wikimedia.org/r/246793
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib0349fe4f55a150b16603b02c44c0100b84b8d6d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits