[Pywikibot-commits] [Gerrit] ...core[master]: [bugfix] remove U+9676 replacement

jenkins-bot (Code Review) Sat, 18 Oct 2025 10:15:58 -0700

jenkins-bot has submitted this change. ( 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1193418?usp=email )


Change subject: [bugfix] remove U+9676 replacement
......................................................................

[bugfix] remove U+9676 replacement

The U+9676 replacement in transliteration._trans dict is the last one
of a group of replacements made in compat release but isn't usefull for
the same key. Finally there is a direct replacement made in
transliterate method. Therefore remove it from dict and keep the later.

Also
- use umlauts for 'ö' and 'ü' like in 'ä'
- remove replacement für 'C' (U+67) which is an ASCII char
- split extended latin to IPA ans PUA
- use positional-only argument for char
- use keyword-only arguments for  prev and succ parameters
- some tests added

Change-Id: I9448a2801d6110992d3f380f0ef6b9a501c3a515
---
M pywikibot/userinterfaces/transliteration.py
M tests/ui_tests.py
2 files changed, 53 insertions(+), 19 deletions(-)

Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified




diff --git a/pywikibot/userinterfaces/transliteration.py 
b/pywikibot/userinterfaces/transliteration.py
index 7e297d2..94179b4 100644
--- a/pywikibot/userinterfaces/transliteration.py
+++ b/pywikibot/userinterfaces/transliteration.py
@@ -6,7 +6,11 @@
 #
 from __future__ import annotations

-from pywikibot.tools import ModuleDeprecationWrapper, deprecate_arg
+from pywikibot.tools import (
+    ModuleDeprecationWrapper,
+    deprecate_arg,
+    deprecated_signature,
+)


 #: Non ascii digits used by the framework
@@ -70,11 +74,11 @@
     'Ṉ': 'N', 'Ṋ': 'N', 'Ɲ': 'N', 'ɲ': 'n', 'Ƞ': 'N', 'ǹ': 'n', 'ń': 'n',
     'ñ': 'n', 'ņ': 'n', 'ň': 'n', 'ṅ': 'n', 'ṇ': 'n', 'ṉ': 'n', 'ṋ': 'n',
     'ƞ': 'n', 'Ò': 'O', 'Ó': 'O', 'Ô': 'O', 'Õ': 'O', 'Ṍ': 'O', 'Ṏ': 'O',
-    'Ȭ': 'O', 'Ö': 'O', 'Ō': 'O', 'Ṑ': 'O', 'Ṓ': 'O', 'Ŏ': 'O', 'Ǒ': 'O',
+    'Ȭ': 'O', 'Ö': 'Oe', 'Ō': 'O', 'Ṑ': 'O', 'Ṓ': 'O', 'Ŏ': 'O', 'Ǒ': 'O',
     'Ȯ': 'O', 'Ȱ': 'O', 'Ọ': 'O', 'Ǫ': 'O', 'Ǭ': 'O', 'Ơ': 'O', 'Ờ': 'O',
     'Ớ': 'O', 'Ỡ': 'O', 'Ợ': 'O', 'Ở': 'O', 'Ỏ': 'O', 'Ɵ': 'O', 'Ø': 'O',
     'Ǿ': 'O', 'ò': 'o', 'ó': 'o', 'ô': 'o', 'õ': 'o', 'ṍ': 'o', 'ṏ': 'o',
-    'ȭ': 'o', 'ö': 'o', 'ō': 'o', 'ṑ': 'o', 'ṓ': 'o', 'ŏ': 'o', 'ǒ': 'o',
+    'ȭ': 'o', 'ö': 'oe', 'ō': 'o', 'ṑ': 'o', 'ṓ': 'o', 'ŏ': 'o', 'ǒ': 'o',
     'ȯ': 'o', 'ȱ': 'o', 'ọ': 'o', 'ǫ': 'o', 'ǭ': 'o', 'ơ': 'o', 'ờ': 'o',
     'ớ': 'o', 'ỡ': 'o', 'ợ': 'o', 'ở': 'o', 'ỏ': 'o', 'ɵ': 'o', 'ø': 'o',
     'ǿ': 'o', 'Ȍ': 'Ö', 'Ő': 'Ö', 'Ȫ': 'Ö', 'ȍ': 'ö', 'ő': 'ö', 'ȫ': 'ö',
@@ -90,10 +94,10 @@
     'Ṭ': 'T', 'Ṯ': 'T', 'Ṱ': 'T', 'Ŧ': 'T', 'Ƭ': 'T', 'Ʈ': 'T', 'ţ': 't',
     'ț': 't', 'ť': 't', 'ṫ': 't', 'ṭ': 't', 'ṯ': 't', 'ṱ': 't', 'ŧ': 't',
     'Ⱦ': 't', 'ƭ': 't', 'ʈ': 't', 'Ù': 'U', 'Ú': 'U', 'Ũ': 'U', 'Ṹ': 'U',
-    'Ṵ': 'U', 'Ü': 'U', 'Ṳ': 'U', 'Ū': 'U', 'Ṻ': 'U', 'Ŭ': 'U', 'Ụ': 'U',
+    'Ṵ': 'U', 'Ü': 'Ue', 'Ṳ': 'U', 'Ū': 'U', 'Ṻ': 'U', 'Ŭ': 'U', 'Ụ': 'U',
     'Ů': 'U', 'Ų': 'U', 'Ǔ': 'U', 'Ṷ': 'U', 'Ủ': 'U', 'Ư': 'U', 'Ữ': 'U',
     'Ự': 'U', 'Ử': 'U', 'ù': 'u', 'ú': 'u', 'ũ': 'u', 'ṹ': 'u', 'ṵ': 'u',
-    'ü': 'u', 'ṳ': 'u', 'ū': 'u', 'ṻ': 'u', 'ŭ': 'u', 'ụ': 'u', 'ů': 'u',
+    'ü': 'ue', 'ṳ': 'u', 'ū': 'u', 'ṻ': 'u', 'ŭ': 'u', 'ụ': 'u', 'ů': 'u',
     'ų': 'u', 'ǔ': 'u', 'ṷ': 'u', 'ủ': 'u', 'ư': 'u', 'ữ': 'u', 'ự': 'u',
     'ử': 'u', 'Ȕ': 'Ü', 'Ű': 'Ü', 'Ǜ': 'Ü', 'Ǘ': 'Ü', 'Ǖ': 'Ü', 'Ǚ': 'Ü',
     'ȕ': 'ü', 'ű': 'ü', 'ǜ': 'ü', 'ǘ': 'ü', 'ǖ': 'ü', 'ǚ': 'ü', 'Û': 'Ux',
@@ -113,12 +117,14 @@
     'Ƣ': 'G', 'ᵷ': 'g', 'ɣ': 'g', 'ƣ': 'g', 'ᵹ': 'g', 'Ƅ': 'H', 'ƅ': 'h',
     'Ƕ': 'Wh', 'ƕ': 'wh', 'Ɩ': 'I', 'ɩ': 'i', 'Ŋ': 'Ng', 'ŋ': 'ng', 'Œ': 'OE',
     'œ': 'oe', 'Ɔ': 'O', 'ɔ': 'o', 'Ȣ': 'Ou', 'ȣ': 'ou', 'Ƽ': 'Q', 'ĸ': 'q',
-    'ƽ': 'q', 'ȹ': 'qp', '\uf20e': 'r', 'ſ': 's', 'ß': 'ss', 'Ʃ': 'Sh',
-    'ʃ': 'sh', 'ᶋ': 'sh', 'Ʉ': 'U', 'ʉ': 'u', 'Ʌ': 'V', 'ʌ': 'v', 'Ɯ': 'W',
-    'Ƿ': 'W', 'ɯ': 'w', 'ƿ': 'w', 'Ȝ': 'Y', 'ȝ': 'y', 'Ĳ': 'IJ', 'ĳ': 'ij',
-    'Ƨ': 'Z', 'ʮ': 'z', 'ƨ': 'z', 'Ʒ': 'Zh', 'ʒ': 'zh', 'Ǯ': 'Dzh', 'ǯ': 'dzh',
-    'Ƹ': "'", 'ƹ': "'", 'ʔ': "'", 'ˀ': "'", 'Ɂ': "'", 'ɂ': "'", 'Þ': 'Th',
-    'þ': 'th', 'C': '!', 'ʗ': '!', 'ǃ': '!',
+    'ƽ': 'q', 'ȹ': 'qp', 'ſ': 's', 'ß': 'ss', 'Ĳ': 'IJ', 'ĳ': 'ij', 'Ɯ': 'W',
+    'Ƿ': 'W', 'ƿ': 'w', 'Ȝ': 'Y', 'ȝ': 'y', 'Ƨ': 'Z', 'ƨ': 'z', 'Ʒ': 'Zh',
+    'ʒ': 'zh', 'Ǯ': 'Dzh', 'ǯ': 'dzh', 'Þ': 'Th', 'þ': 'th',
+    # International Phonetic Alphabet
+    'ʃ': 'sh', 'ᶋ': 'sh', 'Ʉ': 'U', 'ʉ': 'u', 'Ʌ': 'V', 'ʌ': 'v', 'ʔ': "'",
+    'ˀ': "'", 'Ɂ': "'", 'ɂ': "'", 'ʗ': '!', 'ǃ': '!', 'Ƹ': "'", 'ƹ': "'",
+    # Private Use Area
+    '': 'r',
     # Punctuation and typography
     '«': '"', '»': '"', '“': '"', '”': '"', '„': '"', '¨': '"', '‘': "'",
     '’': "'", '′': "'", '@': '(at)', '¤': '$', '¢': 'c', '€': 'E', '£': 'L',
@@ -193,7 +199,6 @@
     'ى': 'á', 'ﻯ': 'á', 'ﻰ': 'á', 'ﯼ': 'y', 'ﯽ': 'y', 'ﯿ': 'y', 'ﯾ': 'y',
     'ﻻ': 'la', 'ﻼ': 'la', 'ﷲ': 'llah', 'إ': "a'", 'أ': "a'", 'ؤ': "w'",
     'ئ': "y'",
-    '◌': 'iy',  # indicates absence of vowels
     # Perso-Arabic
     'پ': 'p', 'ﭙ': 'p', 'چ': 'ch', 'ژ': 'zh', 'گ': 'g', 'ﮔ': 'g', 'ﮕ': 'g',
     'ﮓ': 'g',
@@ -1117,23 +1122,29 @@
                 continue
             while (value.encode(encoding, 'replace').decode(encoding) == '?'
                    and value in trans):
-                value = trans[value]
+                value = trans[value]  # pragma: no cover
             trans[char] = value
         self.trans = trans

     @deprecate_arg('next', 'succ')  # since 9.0
-    def transliterate(self, char: str, default: str = '?',
+    @deprecated_signature(since='10.6.0')
+    def transliterate(self, char: str, /, default: str = '?', *,
                       prev: str = '-', succ: str = '-') -> str:
         """Transliterate the character.

         .. versionchanged:: 9.0
            *next* parameter was renamed to *succ*.
+        .. versionchanged:: 10.6
+           *char* argument is positional only; *prev* and *succ*
+           arguments are keyword only.

         :param char: The character to transliterate.
-        :param default: The character used when there is no transliteration.
+        :param default: The character used when there is no
+            transliteration.
         :param prev: The previous character
         :param succ: The succeeding character
-        :return: The transliterated character which may be an empty string
+        :return: The transliterated character which may be an empty
+            string
         """
         result = default
         if char in self.trans:
diff --git a/tests/ui_tests.py b/tests/ui_tests.py
index 74da260..fabe471 100755
--- a/tests/ui_tests.py
+++ b/tests/ui_tests.py
@@ -13,7 +13,9 @@
 import platform
 import unittest
 from contextlib import nullcontext, redirect_stdout, suppress
+from functools import partial
 from typing import NoReturn
+from unicodedata import normalize
 from unittest.mock import patch

 import pywikibot
@@ -33,7 +35,11 @@
     terminal_interface_unix,
     terminal_interface_win32,
 )
-from pywikibot.userinterfaces.transliteration import NON_ASCII_DIGITS, _trans
+from pywikibot.userinterfaces.transliteration import (
+    NON_ASCII_DIGITS,
+    Transliterator,
+    _trans,
+)
 from tests.aspects import TestCase, TestCaseBase


@@ -366,21 +372,28 @@
             '\x1b[93mu\x1b[0m\x1b[93me\x1b[0m\x1b[93mo\x1b[0m\n')


-class TestTransliterationTable(TestCase):
+class TestTransliteration(TestCase):

     """Test transliteration table."""

     net = False

+    @classmethod
+    def setUpClass(cls) -> None:
+        """Set up Transliterator function."""
+        trans = Transliterator('ascii')
+        cls.t = staticmethod(partial(trans.transliterate, prev='P'))
+
     def test_ascii_digits(self) -> None:
         """Test that non ascii digits are in transliteration table."""
         for lang, digits in NON_ASCII_DIGITS.items():
             with self.subTest(lang=lang):
-                for char in digits:
+                for i, char in enumerate(digits):
                     self.assertTrue(char.isdigit())
                     self.assertFalse(char.isascii())
                     self.assertIn(char, _trans,
                                   f'{char!r} not in transliteration table')
+                    self.assertEqual(self.t(char), str(i))

     def test_transliteration_table(self) -> None:
         """Test transliteration table consistency."""
@@ -388,6 +401,16 @@
             with self.subTest():
                 self.assertNotEqual(k, v)

+    def test_transliterator(self) -> None:
+        """Test Transliterator."""
+        for char in 'äöü':
+            self.assertEqual(self.t(char), normalize('NFD', char)[0] + 'e')
+        self.assertEqual(self.t('1'), '?')
+        self.assertEqual(self.t('◌'), 'P')
+        self.assertEqual(self.t('ッ'), '?')
+        self.assertEqual(self.t('仝'), 'P')
+        self.assertEqual(self.t('ຫ'), 'h')
+

 # TODO: add tests for background colors.
 class FakeUITest(TestCase):

--
To view, visit 
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1193418?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9448a2801d6110992d3f380f0ef6b9a501c3a515
Gerrit-Change-Number: 1193418
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: Xqt <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
Pywikibot-commits mailing list -- [email protected]
To unsubscribe send an email to [email protected]

[Pywikibot-commits] [Gerrit] ...core[master]: [bugfix] remove U+9676 replacement

Reply via email to