XZise has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/240291

Change subject: [FIX] color_format: Only handle unicode strings
......................................................................

[FIX] color_format: Only handle unicode strings

When using `unicode.format` it does return a `unicode` instance, but
`color_format` is based on Python's `Formatter` class which doesn't apply that
rule.

So it needs to workaround the issue that Python's `Formatter` will return the
`bytes` representation of an object when there is no further specification
(aka nothing after the colon). The default implementation will also return
`bytes` when there is no literal text and it might cause `UnicodeDecodeError`s
when there is literal text and the bytes representation does use non-ASCII
characters.

Bug: T113411
Change-Id: I0f8e0f1ac3e64c57918d99e9716c486270a87766
---
M pywikibot/tools/formatter.py
M tests/tools_formatter_tests.py
2 files changed, 65 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/91/240291/1

diff --git a/pywikibot/tools/formatter.py b/pywikibot/tools/formatter.py
index 151e4d9..1fdb170 100644
--- a/pywikibot/tools/formatter.py
+++ b/pywikibot/tools/formatter.py
@@ -15,6 +15,7 @@
 from string import Formatter
 
 from pywikibot.logging import output
+from pywikibot.tools import UnicodeType
 from pywikibot.userinterfaces.terminal_interface_base import colors
 
 
@@ -103,11 +104,39 @@
         if previous_literal:
             yield previous_literal, None, None, None
 
+    def _vformat(self, *args, **kwargs):
+        """
+        Override original `_vformat` to prevent that it changes into `bytes`.
+
+        The original `_vformat` is returning `bytes` under certain
+        curcumstances. It happens when the `format_string` is empty, when there
+        is no literal text around it or when the field value is not a `unicode`
+        already.
+        """
+        result = super(_ColorFormatter, self)._vformat(*args, **kwargs)
+        if not isinstance(result, UnicodeType):
+            assert result == b''
+            result = ''  # This is changing it into a unicode
+        return result
+
     def vformat(self, format_string, args, kwargs):
-        """Return the normal format result but verify no colors are 
keywords."""
+        """
+        Return the normal format result but verify no colors are keywords.
+
+        @param format_string: The format template string
+        @type format_string: unicode
+        @param args: The positional field values
+        @type args: sequence
+        @param kwargs: The named field values
+        @type kwargs: dict
+        @return: The formatted string
+        @rtype: unicode
+        """
         if self.colors.intersection(kwargs):  # kwargs use colors
             raise ValueError('Keyword argument(s) use valid color(s): ' +
                              '", "'.join(self.colors.intersection(kwargs)))
+        if not isinstance(format_string, UnicodeType):
+            raise TypeError('expected str, got 
{0}'.format(type(format_string)))
         return super(_ColorFormatter, self).vformat(format_string, args,
                                                     kwargs)
 
@@ -118,5 +147,8 @@
 
     It is automatically adding \03 in front of color fields so it's
     unnecessary to add them manually. Any other \03 in the text is disallowed.
+
+    @param text: The format template string
+    @type text: unicode
     """
     return _ColorFormatter().format(text, *args, **kwargs)
diff --git a/tests/tools_formatter_tests.py b/tests/tools_formatter_tests.py
index a441e35..bd3a2df 100644
--- a/tests/tools_formatter_tests.py
+++ b/tests/tools_formatter_tests.py
@@ -10,6 +10,7 @@
 __version__ = '$Id$'
 #
 from pywikibot.tools import formatter
+from pywikibot.tools import UnicodeMixin
 
 from tests.aspects import unittest, TestCase
 
@@ -35,20 +36,31 @@
 
     """Test color_format function in bot module."""
 
+    class DummyUnicode(UnicodeMixin):
+
+        def __unicode__(self):
+            return 'ä'
+
     net = False
+
+    def assert_format(self, format_string, expected, *args, **kwargs):
+        """Assert that color_format returns the expected string and type."""
+        result = formatter.color_format(format_string, *args, **kwargs)
+        self.assertEqual(result, expected)
+        self.assertIsInstance(result, type(expected))
 
     def test_no_colors(self):
         """Test without colors in template string."""
-        self.assertEqual(formatter.color_format('42'), '42')
-        self.assertEqual(formatter.color_format('{0}', 42), '42')
-        self.assertEqual(formatter.color_format('{ans}', ans=42), '42')
+        self.assert_format('', '')
+        self.assert_format('42', '42')
+        self.assert_format('{0}', '42', 42)
+        self.assert_format('before {0} after', 'before 42 after', 42)
+        self.assert_format('{ans}', '42', ans=42)
 
     def test_colors(self):
         """Test with colors in template string."""
-        self.assertEqual(formatter.color_format('{0}{black}', 42),
-                         '42\03{black}')
-        self.assertEqual(formatter.color_format('{ans}{black}', ans=42),
-                         '42\03{black}')
+        self.assert_format('{0}{black}', '42\03{black}', 42)
+        self.assert_format('{ans}{black}', '42\03{black}', ans=42)
         self.assertRaisesRegex(
             ValueError, r'.*conversion.*', formatter.color_format,
             '{0}{black!r}', 42)
@@ -58,8 +70,7 @@
 
     def test_marker(self):
         r"""Test that the \03 marker is only allowed in front of colors."""
-        self.assertEqual(formatter.color_format('{0}\03{black}', 42),
-                         '42\03{black}')
+        self.assert_format('{0}\03{black}', '42\03{black}', 42)
         # literal before a normal field
         self.assertRaisesRegex(
             ValueError, r'.*\\03', formatter.color_format,
@@ -74,6 +85,18 @@
         self.assertRaises(ValueError,
                           formatter.color_format, '{aqua}{black}', aqua=42)
 
+    def test_non_ascii(self):
+        """Test non-ASCII replacements."""
+        self.assert_format('{0}', 'ä', 'ä')
+        self.assert_format('{black}{0}', '\03{black}ä', 'ä')
+        self.assert_format('{0}', 'ä', self.DummyUnicode())
+        self.assert_format('{black}{0}', '\03{black}ä', self.DummyUnicode())
+
+    def test_bytes_format(self):
+        """Test that using `bytes` is not allowed."""
+        self.assertRaises(TypeError, formatter.color_format, b'{0}', 'a')
+        self.assertRaises(TypeError, formatter.color_format, b'{black}{0}', 
'a')
+
 
 if __name__ == '__main__':
     try:

-- 
To view, visit https://gerrit.wikimedia.org/r/240291
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0f8e0f1ac3e64c57918d99e9716c486270a87766
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to