tokenizer.c

guido.van.rossum Wed, 06 Jun 2007 17:54:37 -0700

Author: guido.van.rossum
Date: Thu Jun  7 02:54:15 2007
New Revision: 55796


Modified:
   python/branches/py3k-struni/Lib/io.py
   python/branches/py3k-struni/Lib/test/test_tarfile.py
   python/branches/py3k-struni/Parser/tokenizer.c
Log:
tokenizer.c: make coding markup work again.

io.open() now takes all positional parameters (so we can conveniently
call it from C code).

test_tarfile.py no longer uses u"..." literals, but is otherwise still
badly broken.

This is a checkpoint; some more stuff now breaks.


Modified: python/branches/py3k-struni/Lib/io.py
==============================================================================
--- python/branches/py3k-struni/Lib/io.py       (original)
+++ python/branches/py3k-struni/Lib/io.py       Thu Jun  7 02:54:15 2007
@@ -49,7 +49,7 @@
         self.characters_written = characters_written
 
 
-def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
+def open(file, mode="r", buffering=None, encoding=None, newline=None):
     """Replacement for the built-in open function.
 
     Args:
@@ -59,7 +59,6 @@
       buffering: optional int >= 0 giving the buffer size; values
                  can be: 0 = unbuffered, 1 = line buffered,
                  larger = fully buffered.
-    Keywords (for text modes only; *must* be given as keyword arguments):
       encoding: optional string giving the text encoding.
       newline: optional newlines specifier; must be None, '\n' or '\r\n';
                specifies the line ending expected on input and written on

Modified: python/branches/py3k-struni/Lib/test/test_tarfile.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_tarfile.py        (original)
+++ python/branches/py3k-struni/Lib/test/test_tarfile.py        Thu Jun  7 
02:54:15 2007
@@ -432,17 +432,17 @@
         tarinfo = tar.getmember("pax/regtype1")
         self.assertEqual(tarinfo.uname, "foo")
         self.assertEqual(tarinfo.gname, "bar")
-        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"�������")
+        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "�������")
 
         tarinfo = tar.getmember("pax/regtype2")
         self.assertEqual(tarinfo.uname, "")
         self.assertEqual(tarinfo.gname, "bar")
-        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"�������")
+        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "�������")
 
         tarinfo = tar.getmember("pax/regtype3")
         self.assertEqual(tarinfo.uname, "tarfile")
         self.assertEqual(tarinfo.gname, "tarfile")
-        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"�������")
+        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "�������")
 
     def test_pax_number_fields(self):
         # All following number fields are read from the pax header.
@@ -727,11 +727,11 @@
 
     def test_pax_global_header(self):
         pax_headers = {
-                u"foo": u"bar",
-                u"uid": u"0",
-                u"mtime": u"1.23",
-                u"test": u"���",
-                u"���": u"test"}
+                "foo": "bar",
+                "uid": "0",
+                "mtime": "1.23",
+                "test": "���",
+                "���": "test"}
 
         tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
                 pax_headers=pax_headers)
@@ -756,11 +756,11 @@
     def test_pax_extended_header(self):
         # The fields from the pax header have priority over the
         # TarInfo.
-        pax_headers = {u"path": u"foo", u"uid": u"123"}
+        pax_headers = {"path": "foo", "uid": "123"}
 
         tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, 
encoding="iso8859-1")
         t = tarfile.TarInfo()
-        t.name = u"���"     # non-ASCII
+        t.name = "���"     # non-ASCII
         t.uid = 8**8        # too large
         t.pax_headers = pax_headers
         tar.addfile(t)
@@ -808,11 +808,11 @@
         else:
             tar.addfile(tarinfo)
 
-        tarinfo.name = u"���"
+        tarinfo.name = "���"
         self.assertRaises(UnicodeError, tar.addfile, tarinfo)
 
         tarinfo.name = "foo"
-        tarinfo.uname = u"���"
+        tarinfo.uname = "���"
         self.assertRaises(UnicodeError, tar.addfile, tarinfo)
 
     def test_unicode_argument(self):
@@ -825,7 +825,7 @@
         tar.close()
 
     def test_uname_unicode(self):
-        for name in (u"���", "���"):
+        for name in ("���", "���"):
             t = tarfile.TarInfo("foo")
             t.uname = name
             t.gname = name
@@ -860,9 +860,9 @@
     def test_error_handlers(self):
         # Test if the unicode error handlers work correctly for characters
         # that cannot be expressed in a given encoding.
-        self._create_unicode_name(u"���")
+        self._create_unicode_name("���")
 
-        for handler, name in (("utf-8", u"���".encode("utf8")),
+        for handler, name in (("utf-8", "���".encode("utf8")),
                     ("replace", "???"), ("ignore", "")):
             tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
                     errors=handler)
@@ -874,11 +874,11 @@
     def test_error_handler_utf8(self):
         # Create a pathname that has one component representable using
         # iso8859-1 and the other only in iso8859-15.
-        self._create_unicode_name(u"���/�")
+        self._create_unicode_name("���/�")
 
         tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
                 errors="utf-8")
-        self.assertEqual(tar.getnames()[0], "���/" + u"�".encode("utf8"))
+        self.assertEqual(tar.getnames()[0], "���/" + "�".encode("utf8"))
 
 
 class AppendTest(unittest.TestCase):

Modified: python/branches/py3k-struni/Parser/tokenizer.c
==============================================================================
--- python/branches/py3k-struni/Parser/tokenizer.c      (original)
+++ python/branches/py3k-struni/Parser/tokenizer.c      Thu Jun  7 02:54:15 2007
@@ -396,25 +396,29 @@
 static int
 fp_setreadl(struct tok_state *tok, const char* enc)
 {
-       PyObject *reader, *stream, *readline;
+       PyObject *readline = NULL, *stream = NULL, *io = NULL;
+       int ok = 0;
 
-       /* XXX: constify filename argument. */
-       stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
-       if (stream == NULL)
-               return 0;
+       io = PyImport_ImportModule("io");
+       if (io == NULL)
+               goto cleanup;
 
-       reader = PyCodec_StreamReader(enc, stream, NULL);
-       Py_DECREF(stream);
-       if (reader == NULL)
-               return 0;
+       stream = PyObject_CallMethod(io, "open", "ssis",
+                                    tok->filename, "r", -1, enc);
+       if (stream == NULL)
+               goto cleanup;
 
-       readline = PyObject_GetAttrString(reader, "readline");
-       Py_DECREF(reader);
+       readline = PyObject_GetAttrString(stream, "readline");
        if (readline == NULL)
-               return 0;
+               goto cleanup;
 
        tok->decoding_readline = readline;
-       return 1;
+       ok = 1;
+
+  cleanup:
+       Py_XDECREF(stream);
+       Py_XDECREF(io);
+       return ok;
 }
 
 /* Fetch the next byte from TOK. */

_______________________________________________
Python-3000-checkins mailing list
[email protected]
http://mail.python.org/mailman/listinfo/python-3000-checkins

[Python-3000-checkins] r55796 - in python/branches/py3k-struni: Lib/io.py Lib/test/test_tarfile.py Parser/tokenizer.c

Reply via email to