jenkins-bot has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1193418?usp=email )
Change subject: [bugfix] remove U+9676 replacement
......................................................................
[bugfix] remove U+9676 replacement
The U+9676 replacement in transliteration._trans dict is the last one
of a group of replacements made in compat release but isn't usefull for
the same key. Finally there is a direct replacement made in
transliterate method. Therefore remove it from dict and keep the later.
Also
- use umlauts for 'ö' and 'ü' like in 'ä'
- remove replacement für 'C' (U+67) which is an ASCII char
- split extended latin to IPA ans PUA
- use positional-only argument for char
- use keyword-only arguments for prev and succ parameters
- some tests added
Change-Id: I9448a2801d6110992d3f380f0ef6b9a501c3a515
---
M pywikibot/userinterfaces/transliteration.py
M tests/ui_tests.py
2 files changed, 53 insertions(+), 19 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/userinterfaces/transliteration.py
b/pywikibot/userinterfaces/transliteration.py
index 7e297d2..94179b4 100644
--- a/pywikibot/userinterfaces/transliteration.py
+++ b/pywikibot/userinterfaces/transliteration.py
@@ -6,7 +6,11 @@
#
from __future__ import annotations
-from pywikibot.tools import ModuleDeprecationWrapper, deprecate_arg
+from pywikibot.tools import (
+ ModuleDeprecationWrapper,
+ deprecate_arg,
+ deprecated_signature,
+)
#: Non ascii digits used by the framework
@@ -70,11 +74,11 @@
'Ṉ': 'N', 'Ṋ': 'N', 'Ɲ': 'N', 'ɲ': 'n', 'Ƞ': 'N', 'ǹ': 'n', 'ń': 'n',
'ñ': 'n', 'ņ': 'n', 'ň': 'n', 'ṅ': 'n', 'ṇ': 'n', 'ṉ': 'n', 'ṋ': 'n',
'ƞ': 'n', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ṍ': 'O', 'Ṏ': 'O',
- 'Ȭ': 'O', 'Ö': 'O', 'Ō': 'O', 'Ṑ': 'O', 'Ṓ': 'O', 'Ŏ': 'O', 'Ǒ': 'O',
+ 'Ȭ': 'O', 'Ö': 'Oe', 'Ō': 'O', 'Ṑ': 'O', 'Ṓ': 'O', 'Ŏ': 'O', 'Ǒ': 'O',
'Ȯ': 'O', 'Ȱ': 'O', 'Ọ': 'O', 'Ǫ': 'O', 'Ǭ': 'O', 'Ơ': 'O', 'Ờ': 'O',
'Ớ': 'O', 'Ỡ': 'O', 'Ợ': 'O', 'Ở': 'O', 'Ỏ': 'O', 'Ɵ': 'O', 'Ø': 'O',
'Ǿ': 'O', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ṍ': 'o', 'ṏ': 'o',
- 'ȭ': 'o', 'ö': 'o', 'ō': 'o', 'ṑ': 'o', 'ṓ': 'o', 'ŏ': 'o', 'ǒ': 'o',
+ 'ȭ': 'o', 'ö': 'oe', 'ō': 'o', 'ṑ': 'o', 'ṓ': 'o', 'ŏ': 'o', 'ǒ': 'o',
'ȯ': 'o', 'ȱ': 'o', 'ọ': 'o', 'ǫ': 'o', 'ǭ': 'o', 'ơ': 'o', 'ờ': 'o',
'ớ': 'o', 'ỡ': 'o', 'ợ': 'o', 'ở': 'o', 'ỏ': 'o', 'ɵ': 'o', 'ø': 'o',
'ǿ': 'o', 'Ȍ': 'Ö', 'Ő': 'Ö', 'Ȫ': 'Ö', 'ȍ': 'ö', 'ő': 'ö', 'ȫ': 'ö',
@@ -90,10 +94,10 @@
'Ṭ': 'T', 'Ṯ': 'T', 'Ṱ': 'T', 'Ŧ': 'T', 'Ƭ': 'T', 'Ʈ': 'T', 'ţ': 't',
'ț': 't', 'ť': 't', 'ṫ': 't', 'ṭ': 't', 'ṯ': 't', 'ṱ': 't', 'ŧ': 't',
'Ⱦ': 't', 'ƭ': 't', 'ʈ': 't', 'Ù': 'U', 'Ú': 'U', 'Ũ': 'U', 'Ṹ': 'U',
- 'Ṵ': 'U', 'Ü': 'U', 'Ṳ': 'U', 'Ū': 'U', 'Ṻ': 'U', 'Ŭ': 'U', 'Ụ': 'U',
+ 'Ṵ': 'U', 'Ü': 'Ue', 'Ṳ': 'U', 'Ū': 'U', 'Ṻ': 'U', 'Ŭ': 'U', 'Ụ': 'U',
'Ů': 'U', 'Ų': 'U', 'Ǔ': 'U', 'Ṷ': 'U', 'Ủ': 'U', 'Ư': 'U', 'Ữ': 'U',
'Ự': 'U', 'Ử': 'U', 'ù': 'u', 'ú': 'u', 'ũ': 'u', 'ṹ': 'u', 'ṵ': 'u',
- 'ü': 'u', 'ṳ': 'u', 'ū': 'u', 'ṻ': 'u', 'ŭ': 'u', 'ụ': 'u', 'ů': 'u',
+ 'ü': 'ue', 'ṳ': 'u', 'ū': 'u', 'ṻ': 'u', 'ŭ': 'u', 'ụ': 'u', 'ů': 'u',
'ų': 'u', 'ǔ': 'u', 'ṷ': 'u', 'ủ': 'u', 'ư': 'u', 'ữ': 'u', 'ự': 'u',
'ử': 'u', 'Ȕ': 'Ü', 'Ű': 'Ü', 'Ǜ': 'Ü', 'Ǘ': 'Ü', 'Ǖ': 'Ü', 'Ǚ': 'Ü',
'ȕ': 'ü', 'ű': 'ü', 'ǜ': 'ü', 'ǘ': 'ü', 'ǖ': 'ü', 'ǚ': 'ü', 'Û': 'Ux',
@@ -113,12 +117,14 @@
'Ƣ': 'G', 'ᵷ': 'g', 'ɣ': 'g', 'ƣ': 'g', 'ᵹ': 'g', 'Ƅ': 'H', 'ƅ': 'h',
'Ƕ': 'Wh', 'ƕ': 'wh', 'Ɩ': 'I', 'ɩ': 'i', 'Ŋ': 'Ng', 'ŋ': 'ng', 'Œ': 'OE',
'œ': 'oe', 'Ɔ': 'O', 'ɔ': 'o', 'Ȣ': 'Ou', 'ȣ': 'ou', 'Ƽ': 'Q', 'ĸ': 'q',
- 'ƽ': 'q', 'ȹ': 'qp', '\uf20e': 'r', 'ſ': 's', 'ß': 'ss', 'Ʃ': 'Sh',
- 'ʃ': 'sh', 'ᶋ': 'sh', 'Ʉ': 'U', 'ʉ': 'u', 'Ʌ': 'V', 'ʌ': 'v', 'Ɯ': 'W',
- 'Ƿ': 'W', 'ɯ': 'w', 'ƿ': 'w', 'Ȝ': 'Y', 'ȝ': 'y', 'IJ': 'IJ', 'ij': 'ij',
- 'Ƨ': 'Z', 'ʮ': 'z', 'ƨ': 'z', 'Ʒ': 'Zh', 'ʒ': 'zh', 'Ǯ': 'Dzh', 'ǯ': 'dzh',
- 'Ƹ': "'", 'ƹ': "'", 'ʔ': "'", 'ˀ': "'", 'Ɂ': "'", 'ɂ': "'", 'Þ': 'Th',
- 'þ': 'th', 'C': '!', 'ʗ': '!', 'ǃ': '!',
+ 'ƽ': 'q', 'ȹ': 'qp', 'ſ': 's', 'ß': 'ss', 'IJ': 'IJ', 'ij': 'ij', 'Ɯ': 'W',
+ 'Ƿ': 'W', 'ƿ': 'w', 'Ȝ': 'Y', 'ȝ': 'y', 'Ƨ': 'Z', 'ƨ': 'z', 'Ʒ': 'Zh',
+ 'ʒ': 'zh', 'Ǯ': 'Dzh', 'ǯ': 'dzh', 'Þ': 'Th', 'þ': 'th',
+ # International Phonetic Alphabet
+ 'ʃ': 'sh', 'ᶋ': 'sh', 'Ʉ': 'U', 'ʉ': 'u', 'Ʌ': 'V', 'ʌ': 'v', 'ʔ': "'",
+ 'ˀ': "'", 'Ɂ': "'", 'ɂ': "'", 'ʗ': '!', 'ǃ': '!', 'Ƹ': "'", 'ƹ': "'",
+ # Private Use Area
+ '': 'r',
# Punctuation and typography
'«': '"', '»': '"', '“': '"', '”': '"', '„': '"', '¨': '"', '‘': "'",
'’': "'", '′': "'", '@': '(at)', '¤': '$', '¢': 'c', '€': 'E', '£': 'L',
@@ -193,7 +199,6 @@
'ى': 'á', 'ﻯ': 'á', 'ﻰ': 'á', 'ﯼ': 'y', 'ﯽ': 'y', 'ﯿ': 'y', 'ﯾ': 'y',
'ﻻ': 'la', 'ﻼ': 'la', 'ﷲ': 'llah', 'إ': "a'", 'أ': "a'", 'ؤ': "w'",
'ئ': "y'",
- '◌': 'iy', # indicates absence of vowels
# Perso-Arabic
'پ': 'p', 'ﭙ': 'p', 'چ': 'ch', 'ژ': 'zh', 'گ': 'g', 'ﮔ': 'g', 'ﮕ': 'g',
'ﮓ': 'g',
@@ -1117,23 +1122,29 @@
continue
while (value.encode(encoding, 'replace').decode(encoding) == '?'
and value in trans):
- value = trans[value]
+ value = trans[value] # pragma: no cover
trans[char] = value
self.trans = trans
@deprecate_arg('next', 'succ') # since 9.0
- def transliterate(self, char: str, default: str = '?',
+ @deprecated_signature(since='10.6.0')
+ def transliterate(self, char: str, /, default: str = '?', *,
prev: str = '-', succ: str = '-') -> str:
"""Transliterate the character.
.. versionchanged:: 9.0
*next* parameter was renamed to *succ*.
+ .. versionchanged:: 10.6
+ *char* argument is positional only; *prev* and *succ*
+ arguments are keyword only.
:param char: The character to transliterate.
- :param default: The character used when there is no transliteration.
+ :param default: The character used when there is no
+ transliteration.
:param prev: The previous character
:param succ: The succeeding character
- :return: The transliterated character which may be an empty string
+ :return: The transliterated character which may be an empty
+ string
"""
result = default
if char in self.trans:
diff --git a/tests/ui_tests.py b/tests/ui_tests.py
index 74da260..fabe471 100755
--- a/tests/ui_tests.py
+++ b/tests/ui_tests.py
@@ -13,7 +13,9 @@
import platform
import unittest
from contextlib import nullcontext, redirect_stdout, suppress
+from functools import partial
from typing import NoReturn
+from unicodedata import normalize
from unittest.mock import patch
import pywikibot
@@ -33,7 +35,11 @@
terminal_interface_unix,
terminal_interface_win32,
)
-from pywikibot.userinterfaces.transliteration import NON_ASCII_DIGITS, _trans
+from pywikibot.userinterfaces.transliteration import (
+ NON_ASCII_DIGITS,
+ Transliterator,
+ _trans,
+)
from tests.aspects import TestCase, TestCaseBase
@@ -366,21 +372,28 @@
'\x1b[93mu\x1b[0m\x1b[93me\x1b[0m\x1b[93mo\x1b[0m\n')
-class TestTransliterationTable(TestCase):
+class TestTransliteration(TestCase):
"""Test transliteration table."""
net = False
+ @classmethod
+ def setUpClass(cls) -> None:
+ """Set up Transliterator function."""
+ trans = Transliterator('ascii')
+ cls.t = staticmethod(partial(trans.transliterate, prev='P'))
+
def test_ascii_digits(self) -> None:
"""Test that non ascii digits are in transliteration table."""
for lang, digits in NON_ASCII_DIGITS.items():
with self.subTest(lang=lang):
- for char in digits:
+ for i, char in enumerate(digits):
self.assertTrue(char.isdigit())
self.assertFalse(char.isascii())
self.assertIn(char, _trans,
f'{char!r} not in transliteration table')
+ self.assertEqual(self.t(char), str(i))
def test_transliteration_table(self) -> None:
"""Test transliteration table consistency."""
@@ -388,6 +401,16 @@
with self.subTest():
self.assertNotEqual(k, v)
+ def test_transliterator(self) -> None:
+ """Test Transliterator."""
+ for char in 'äöü':
+ self.assertEqual(self.t(char), normalize('NFD', char)[0] + 'e')
+ self.assertEqual(self.t('1'), '?')
+ self.assertEqual(self.t('◌'), 'P')
+ self.assertEqual(self.t('ッ'), '?')
+ self.assertEqual(self.t('仝'), 'P')
+ self.assertEqual(self.t('ຫ'), 'h')
+
# TODO: add tests for background colors.
class FakeUITest(TestCase):
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1193418?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings?usp=email
Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9448a2801d6110992d3f380f0ef6b9a501c3a515
Gerrit-Change-Number: 1193418
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]