Guido van Rossum added the comment:

Here's a better patch that also fixes a few related issues.

__________________________________
Tracker <[EMAIL PROTECTED]>
<http://bugs.python.org/issue1140>
__________________________________
Index: Lib/test/test_re.py
===================================================================
--- Lib/test/test_re.py	(revision 57629)
+++ Lib/test/test_re.py	(working copy)
@@ -83,6 +83,31 @@
         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                          'abc\ndef\n')
 
+    def test_bug_1140(self):
+        # re.sub(x, y, u'') should return u'', not '', and
+        # re.sub(x, y, '') should return '', not u''.
+        # Also:
+        # re.sub(x, y, unicode(x)) should return unicode(y), and
+        # re.sub(x, y, str(x)) should return
+        #     str(y) if isinstance(y, str) else unicode(y).
+        for x in 'x', u'x':
+            for y in 'y', u'y':
+                z = re.sub(x, y, u'')
+                self.assertEqual(z, u'')
+                self.assertEqual(type(z), unicode)
+                #
+                z = re.sub(x, y, '')
+                self.assertEqual(z, '')
+                self.assertEqual(type(z), str)
+                #
+                z = re.sub(x, y, unicode(x))
+                self.assertEqual(z, y)
+                self.assertEqual(type(z), unicode)
+                #
+                z = re.sub(x, y, str(x))
+                self.assertEqual(z, y)
+                self.assertEqual(type(z), type(y))
+
     def test_sub_template_numeric_escape(self):
         # bug 776311 and friends
         self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
Index: Modules/_sre.c
===================================================================
--- Modules/_sre.c	(revision 57629)
+++ Modules/_sre.c	(working copy)
@@ -1979,7 +1979,7 @@
 #endif
 
 static PyObject*
-join_list(PyObject* list, PyObject* pattern)
+join_list(PyObject* list, PyObject* string)
 {
     /* join list elements */
 
@@ -1990,24 +1990,15 @@
 #endif
     PyObject* result;
 
-    switch (PyList_GET_SIZE(list)) {
-    case 0:
-        Py_DECREF(list);
-        return PySequence_GetSlice(pattern, 0, 0);
-    case 1:
-        result = PyList_GET_ITEM(list, 0);
-        Py_INCREF(result);
-        Py_DECREF(list);
-        return result;
-    }
-
-    /* two or more elements: slice out a suitable separator from the
-       first member, and use that to join the entire list */
-
-    joiner = PySequence_GetSlice(pattern, 0, 0);
+    joiner = PySequence_GetSlice(string, 0, 0);
     if (!joiner)
         return NULL;
 
+    if (PyList_GET_SIZE(list) == 0) {
+        Py_DECREF(list);
+        return joiner;
+    }
+
 #if PY_VERSION_HEX >= 0x01060000
     function = PyObject_GetAttrString(joiner, "join");
     if (!function) {
@@ -2443,7 +2434,7 @@
     Py_DECREF(filter);
 
     /* convert list to single string (also removes list) */
-    item = join_list(list, self->pattern);
+    item = join_list(list, string);
 
     if (!item)
         return NULL;
_______________________________________________
Python-bugs-list mailing list 
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to