New submission from Christian Heimes:
The array module is using a different typecode for unicode array
depending on UCS2 or UCS4:
#define Py_UNICODE_SIZE 4
#if Py_UNICODE_SIZE = 4
#define Py_UNICODE_WIDE
#endif
#ifdef Py_UNICODE_WIDE
#define PyArr_UNI 'w'
#define PyArr_UNISTR w
#else
#define PyArr_UNI 'u'
#define PyArr_UNISTR u
#endif
It's causing a bunch of unit test to fail which depend on 'u' as the
type code for an unicode array. I don't see the benefit from specifying
an alternative typecode for wide unicode arrays. It may be useful to
have an additional typecode that fails for UCS-2 builds.
My patch keeps 'u' in every build and adds 'w' as an alias for 'u' in
UCS-4 builds only. It also introduces the new module variable typecodes
which is a unicode string containing all valid typecodes.
--
components: Extension Modules
files: py3k_array_typecode.patch
messages: 56353
nosy: tiran
severity: normal
status: open
title: array unittest problems with UCS4 build
versions: Python 3.0
__
Tracker [EMAIL PROTECTED]
http://bugs.python.org/issue1268
__Index: Objects/codeobject.c
===
--- Objects/codeobject.c (Revision 58412)
+++ Objects/codeobject.c (Arbeitskopie)
@@ -59,7 +59,7 @@
freevars == NULL || !PyTuple_Check(freevars) ||
cellvars == NULL || !PyTuple_Check(cellvars) ||
name == NULL || (!PyString_Check(name) !PyUnicode_Check(name)) ||
- filename == NULL || !PyString_Check(filename) ||
+ filename == NULL || (!PyString_Check(name) !PyUnicode_Check(name)) ||
lnotab == NULL || !PyString_Check(lnotab) ||
!PyObject_CheckReadBuffer(code)) {
PyErr_BadInternalCall();
Index: Lib/test/test_codecs.py
===
--- Lib/test/test_codecs.py (Revision 58412)
+++ Lib/test/test_codecs.py (Arbeitskopie)
@@ -803,7 +803,7 @@
codecs.register_error(UnicodeInternalTest, codecs.ignore_errors)
decoder = codecs.getdecoder(unicode_internal)
ab = ab.encode(unicode_internal)
-ignored = decoder(bytes(%s\x22\x22\x22\x22%s % (ab[:4], ab[4:])),
+ignored = decoder(bytes(%s\x22\x22\x22\x22%s % (ab[:4], ab[4:]), ascii),
UnicodeInternalTest)
self.assertEquals((ab, 12), ignored)
Index: Lib/test/test_array.py
===
--- Lib/test/test_array.py (Revision 58412)
+++ Lib/test/test_array.py (Arbeitskopie)
@@ -17,8 +17,18 @@
array.array.__init__(typecode)
tests = [] # list to accumulate all tests
-typecodes = ubBhHiIlLfd
+typecodes = array.typecodes
+class TypecodesTest(unittest.TestCase):
+expected_typecodes = ubBhHiIlLfd
+
+def test_typecodes(self):
+global typecodes
+for typecode in self.expected_typecodes:
+self.assert_(typecode in typecodes, typecode)
+
+tests.append(TypecodesTest)
+
class BadConstructorTest(unittest.TestCase):
def test_constructor(self):
@@ -773,6 +783,12 @@
tests.append(UnicodeTest)
+class UnicodeWideTest(UnicodeTest):
+typecode = 'w'
+
+if 'w' in typecodes:
+tests.append(UnicodeWideTest)
+
class NumberTest(BaseTest):
def test_extslice(self):
Index: Lib/test/test_re.py
===
--- Lib/test/test_re.py (Revision 58412)
+++ Lib/test/test_re.py (Arbeitskopie)
@@ -591,7 +591,7 @@
self.assertEqual([item.group(0) for item in iter],
[:, ::, :::])
-def test_bug_926075(self):
+def DISABLED_test_bug_926075(self):
self.assert_(re.compile('bug_926075') is not
re.compile(str8('bug_926075')))
@@ -618,7 +618,7 @@
def test_empty_array(self):
# SF buf 1647541
import array
-for typecode in 'bBuhHiIlLfd':
+for typecode in array.typecodes:
a = array.array(typecode)
self.assertEqual(re.compile(bla).match(a), None)
self.assertEqual(re.compile().match(a).groups(), ())
Index: Lib/test/test_codeccallbacks.py
===
--- Lib/test/test_codeccallbacks.py (Revision 58412)
+++ Lib/test/test_codeccallbacks.py (Arbeitskopie)
@@ -140,17 +140,17 @@
sin += chr(sys.maxunicode)
sout = ba\\xac\\u1234\\u20ac\\u8000
if sys.maxunicode 0x:
-sout += bytes(\\U%08x % sys.maxunicode)
+sout += bytes(\\U%08x % sys.maxunicode, ascii)
self.assertEqual(sin.encode(ascii, backslashreplace), sout)
sout = ba\xac\\u1234\\u20ac\\u8000
if sys.maxunicode 0x:
-sout += bytes(\\U%08x % sys.maxunicode)
+sout += bytes(\\U%08x % sys.maxunicode, ascii)
self.assertEqual(sin.encode(latin-1, backslashreplace), sout)
sout