STINNER Victor <victor.stin...@gmail.com> added the comment:
>>>> "{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412")
> python: Objects/unicodeobject.c:1223: _copy_characters: Assertion `ch <=
> to_maxchar' failed.
Attached patch fixes this issue.
----------
keywords: +patch
Added file: http://bugs.python.org/file25327/format_nonascii.patch
_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue14648>
_______________________________________
diff -r 6762b943ee59 Lib/test/test_unicode.py
--- a/Lib/test/test_unicode.py Tue Apr 17 21:42:07 2012 -0400
+++ b/Lib/test/test_unicode.py Mon Apr 23 16:25:13 2012 +0200
@@ -924,6 +924,14 @@ class UnicodeTest(string_tests.CommonTes
self.assertRaises(ValueError, format, '', '#')
self.assertRaises(ValueError, format, '', '#20')
+ # Non-ASCII
+ self.assertEqual("{0:s}{1:s}".format("ABC", "\u0410\u0411\u0412"),
+ 'ABC\u0410\u0411\u0412')
+ self.assertEqual("{0:.3s}".format("ABC\u0410\u0411\u0412"),
+ 'ABC')
+ self.assertEqual("{0:.0s}".format("ABC\u0410\u0411\u0412"),
+ '')
+
def test_format_map(self):
self.assertEqual(''.format_map({}), '')
self.assertEqual('a'.format_map({}), 'a')
diff -r 6762b943ee59 Objects/unicodeobject.c
--- a/Objects/unicodeobject.c Tue Apr 17 21:42:07 2012 -0400
+++ b/Objects/unicodeobject.c Mon Apr 23 16:25:13 2012 +0200
@@ -1957,6 +1957,37 @@ PyUnicode_FromKindAndData(int kind, cons
}
}
+Py_UCS4
+_PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end)
+{
+ enum PyUnicode_Kind kind;
+ void *startptr, *endptr;
+
+ assert(PyUnicode_IS_READY(unicode));
+ assert(0 <= start);
+ assert(end <= PyUnicode_GET_LENGTH(unicode));
+ assert(start <= end);
+
+ if (start == 0 && end == PyUnicode_GET_LENGTH(unicode))
+ return PyUnicode_MAX_CHAR_VALUE(unicode);
+
+ if (start == end)
+ return 127;
+
+ kind = PyUnicode_KIND(unicode);
+ startptr = PyUnicode_DATA(unicode);
+ endptr = (char*)startptr + end * kind;
+ if (start)
+ startptr = (char*)startptr + start * kind;
+ switch(kind)
+ {
+ case PyUnicode_1BYTE_KIND: return ucs1lib_find_max_char(startptr, endptr);
+ case PyUnicode_2BYTE_KIND: return ucs2lib_find_max_char(startptr, endptr);
+ default:
+ case PyUnicode_4BYTE_KIND: return ucs4lib_find_max_char(startptr, endptr);
+ }
+}
+
/* Ensure that a string uses the most efficient storage, if it is not the
case: create a new string with of the right kind. Write NULL into *p_unicode
on error. */
diff -r 6762b943ee59 Python/formatter_unicode.c
--- a/Python/formatter_unicode.c Tue Apr 17 21:42:07 2012 -0400
+++ b/Python/formatter_unicode.c Mon Apr 23 16:25:13 2012 +0200
@@ -713,10 +713,10 @@ format_string_internal(PyObject *value,
Py_ssize_t lpad;
Py_ssize_t rpad;
Py_ssize_t total;
- Py_ssize_t pos;
+ Py_ssize_t i, pos;
Py_ssize_t len = PyUnicode_GET_LENGTH(value);
PyObject *result = NULL;
- Py_UCS4 maxchar = 127;
+ Py_UCS4 ch, maxchar = 127;
/* sign is not allowed on strings */
if (format->sign != '\0') {
@@ -752,8 +752,12 @@ format_string_internal(PyObject *value,
if (lpad != 0 || rpad != 0)
maxchar = Py_MAX(maxchar, format->fill_char);
+ ch = _PyUnicode_FindMaxChar(value, 0, len);
+ maxchar = Py_MAX(maxchar, ch);
+
/* allocate the resulting string */
result = PyUnicode_New(total, maxchar);
+ printf("maxchar = 0x%x\n", maxchar);
if (result == NULL)
goto done;
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com