Author: guido.van.rossum
Date: Mon Oct 15 04:52:41 2007
New Revision: 58466

Modified:
   python/branches/py3k/Include/code.h
   python/branches/py3k/Include/unicodeobject.h
   python/branches/py3k/Misc/ACKS
   python/branches/py3k/Modules/_ctypes/callbacks.c
   python/branches/py3k/Modules/posixmodule.c
   python/branches/py3k/Modules/pyexpat.c
   python/branches/py3k/Objects/codeobject.c
   python/branches/py3k/Objects/moduleobject.c
   python/branches/py3k/Objects/unicodeobject.c
   python/branches/py3k/Python/bltinmodule.c
   python/branches/py3k/Python/ceval.c
   python/branches/py3k/Python/compile.c
   python/branches/py3k/Python/frozen.c
   python/branches/py3k/Python/import.c
   python/branches/py3k/Python/importdl.c
   python/branches/py3k/Python/pythonrun.c
   python/branches/py3k/Python/traceback.c
Log:
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default
filesystem encoding.
This is a bit tricky because the default filesystem encoding isn't
set by the time we import the first modules; at that point we fudge
things a bit.  This is okay since __file__ isn't really used much
except for error reporting.
Tested on OSX and Linux only so far.


Modified: python/branches/py3k/Include/code.h
==============================================================================
--- python/branches/py3k/Include/code.h (original)
+++ python/branches/py3k/Include/code.h Mon Oct 15 04:52:41 2007
@@ -21,8 +21,8 @@
     PyObject *co_freevars;     /* tuple of strings (free variable names) */
     PyObject *co_cellvars;      /* tuple of strings (cell variable names) */
     /* The rest doesn't count for hash/cmp */
-    PyObject *co_filename;     /* string (where it was loaded from) */
-    PyObject *co_name;         /* string (name, for reference) */
+    PyObject *co_filename;     /* unicode (where it was loaded from) */
+    PyObject *co_name;         /* unicode (name, for reference) */
     int co_firstlineno;                /* first source line number */
     PyObject *co_lnotab;       /* string (encoding addr<->lineno mapping) */
     void *co_zombieframe;     /* for optimization only (see frameobject.c) */

Modified: python/branches/py3k/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k/Include/unicodeobject.h        (original)
+++ python/branches/py3k/Include/unicodeobject.h        Mon Oct 15 04:52:41 2007
@@ -154,6 +154,7 @@
 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
@@ -245,6 +246,7 @@
 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
+# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
@@ -641,6 +643,20 @@
 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
     PyObject *, const char *);
 
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+   If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+   UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+   invalid characters with '?'.
+
+   The function is intended to be used for paths and file names only
+   during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+    const char *s               /* encoded string */
+    );
+
 /* Return a char* holding the UTF-8 encoded value of the
    Unicode object.
 

Modified: python/branches/py3k/Misc/ACKS
==============================================================================
--- python/branches/py3k/Misc/ACKS      (original)
+++ python/branches/py3k/Misc/ACKS      Mon Oct 15 04:52:41 2007
@@ -273,6 +273,7 @@
 Shane Hathaway
 Rycharde Hawkes
 Jochen Hayek
+Christian Heimes
 Thomas Heller
 Lance Finn Helsten
 Jonathan Hendry
@@ -667,6 +668,7 @@
 Hector Urtubia
 Atul Varma
 Dmitry Vasiliev
+Alexandre Vassalotti
 Frank Vercruesse
 Mike Verdone
 Jaap Vermeulen

Modified: python/branches/py3k/Modules/_ctypes/callbacks.c
==============================================================================
--- python/branches/py3k/Modules/_ctypes/callbacks.c    (original)
+++ python/branches/py3k/Modules/_ctypes/callbacks.c    Mon Oct 15 04:52:41 2007
@@ -34,9 +34,9 @@
        PyCodeObject *py_code = 0;
        PyFrameObject *py_frame = 0;
     
-       py_srcfile = PyString_FromString(filename);
+       py_srcfile = PyUnicode_DecodeFSDefault(filename);
        if (!py_srcfile) goto bad;
-       py_funcname = PyString_FromString(funcname);
+       py_funcname = PyUnicode_FromString(funcname);
        if (!py_funcname) goto bad;
        py_globals = PyDict_New();
        if (!py_globals) goto bad;

Modified: python/branches/py3k/Modules/posixmodule.c
==============================================================================
--- python/branches/py3k/Modules/posixmodule.c  (original)
+++ python/branches/py3k/Modules/posixmodule.c  Mon Oct 15 04:52:41 2007
@@ -5370,7 +5370,7 @@
 #endif
     if (name == NULL)
         return PyErr_NoMemory();
-    result = PyString_FromString(name);
+    result = PyUnicode_DecodeFSDefault(name);
     free(name);
     return result;
 }
@@ -5428,7 +5428,7 @@
        Py_XDECREF(err);
        return NULL;
     }
-    return PyString_FromString(buffer);
+    return PyUnicode_DecodeFSDefault(buffer);
 }
 #endif
 

Modified: python/branches/py3k/Modules/pyexpat.c
==============================================================================
--- python/branches/py3k/Modules/pyexpat.c      (original)
+++ python/branches/py3k/Modules/pyexpat.c      Mon Oct 15 04:52:41 2007
@@ -232,13 +232,13 @@
         code = PyString_FromString("");
         if (code == NULL)
             goto failed;
-        name = PyString_FromString(func_name);
+        name = PyUnicode_FromString(func_name);
         if (name == NULL)
             goto failed;
         nulltuple = PyTuple_New(0);
         if (nulltuple == NULL)
             goto failed;
-        filename = PyString_FromString(__FILE__);
+        filename = PyUnicode_DecodeFSDefault(__FILE__);
         handler_info[slot].tb_code =
             PyCode_New(0,              /* argcount */
                        0,       /* kwonlyargcount */

Modified: python/branches/py3k/Objects/codeobject.c
==============================================================================
--- python/branches/py3k/Objects/codeobject.c   (original)
+++ python/branches/py3k/Objects/codeobject.c   Mon Oct 15 04:52:41 2007
@@ -50,6 +50,7 @@
 {
        PyCodeObject *co;
        Py_ssize_t i;
+
        /* Check argument types */
        if (argcount < 0 || nlocals < 0 ||
            code == NULL ||
@@ -58,20 +59,16 @@
            varnames == NULL || !PyTuple_Check(varnames) ||
            freevars == NULL || !PyTuple_Check(freevars) ||
            cellvars == NULL || !PyTuple_Check(cellvars) ||
-           name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
-           filename == NULL || !PyString_Check(filename) ||
+           name == NULL || !PyUnicode_Check(name) ||
+           filename == NULL || !PyUnicode_Check(filename) ||
            lnotab == NULL || !PyString_Check(lnotab) ||
            !PyObject_CheckReadBuffer(code)) {
                PyErr_BadInternalCall();
                return NULL;
        }
-       if (PyString_Check(name)) {
-               name = PyUnicode_FromString(PyString_AS_STRING(name));
-               if (name == NULL)
-                       return NULL;
-       } else {
-               Py_INCREF(name);
-       }
+       Py_INCREF(name);
+       Py_INCREF(filename);
+
        intern_strings(names);
        intern_strings(varnames);
        intern_strings(freevars);
@@ -299,8 +296,8 @@
 
        if (co->co_firstlineno != 0)
                lineno = co->co_firstlineno;
-       if (co->co_filename && PyString_Check(co->co_filename))
-               filename = PyString_AS_STRING(co->co_filename);
+       if (co->co_filename && PyUnicode_Check(co->co_filename))
+               filename = PyUnicode_AsString(co->co_filename);
        return PyUnicode_FromFormat(
                        "<code object %.100U at %p, file \"%.300s\", line %d>",
                        co->co_name, co, filename, lineno);

Modified: python/branches/py3k/Objects/moduleobject.c
==============================================================================
--- python/branches/py3k/Objects/moduleobject.c (original)
+++ python/branches/py3k/Objects/moduleobject.c Mon Oct 15 04:52:41 2007
@@ -86,12 +86,12 @@
        d = ((PyModuleObject *)m)->md_dict;
        if (d == NULL ||
            (fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
-           !PyString_Check(fileobj))
+           !PyUnicode_Check(fileobj))
        {
                PyErr_SetString(PyExc_SystemError, "module filename missing");
                return NULL;
        }
-       return PyString_AsString(fileobj);
+       return PyUnicode_AsString(fileobj);
 }
 
 void

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c        (original)
+++ python/branches/py3k/Objects/unicodeobject.c        Mon Oct 15 04:52:41 2007
@@ -117,7 +117,11 @@
 
 /* Default encoding to use and assume when NULL is passed as encoding
    parameter; it is fixed to "utf-8".  Always use the
-   PyUnicode_GetDefaultEncoding() API to access this global. */
+   PyUnicode_GetDefaultEncoding() API to access this global.
+
+   Don't forget to alter Py_FileSystemDefaultEncoding() if you change the
+   hard coded default!
+*/
 static const char unicode_default_encoding[] = "utf-8";
 
 Py_UNICODE
@@ -1231,6 +1235,35 @@
     return v;
 }
 
+PyObject*
+PyUnicode_DecodeFSDefault(const char *s)
+{
+    Py_ssize_t size = (Py_ssize_t)strlen(s);
+
+    /* During the early bootstrapping process, Py_FileSystemDefaultEncoding
+       can be undefined. If it is case, decode using UTF-8. The following 
assumes
+       that Py_FileSystemDefaultEncoding is set to a built-in encoding during 
the
+       bootstrapping process where the codecs aren't ready yet.
+    */
+    if (Py_FileSystemDefaultEncoding) {
+#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
+        if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) {
+            return PyUnicode_DecodeMBCS(s, size, "replace");
+        }
+#elif defined(__APPLE__)
+        if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) {
+            return PyUnicode_DecodeUTF8(s, size, "replace");
+        }
+#endif
+        return PyUnicode_Decode(s, size,
+                                Py_FileSystemDefaultEncoding,
+                                "replace");
+    }
+    else {
+        return PyUnicode_DecodeUTF8(s, size, "replace");
+    }
+}
+
 char*
 PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {

Modified: python/branches/py3k/Python/bltinmodule.c
==============================================================================
--- python/branches/py3k/Python/bltinmodule.c   (original)
+++ python/branches/py3k/Python/bltinmodule.c   Mon Oct 15 04:52:41 2007
@@ -10,6 +10,9 @@
 
 /* The default encoding used by the platform file system APIs
    Can remain NULL for all platforms that don't have such a concept
+
+   Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the
+   values for Py_FileSystemDefaultEncoding!
 */
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
 const char *Py_FileSystemDefaultEncoding = "mbcs";

Modified: python/branches/py3k/Python/ceval.c
==============================================================================
--- python/branches/py3k/Python/ceval.c (original)
+++ python/branches/py3k/Python/ceval.c Mon Oct 15 04:52:41 2007
@@ -767,7 +767,7 @@
        lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL;
 #endif
 #if defined(Py_DEBUG) || defined(LLTRACE)
-       filename = PyString_AsString(co->co_filename);
+       filename = PyUnicode_AsString(co->co_filename);
 #endif
 
        why = WHY_NOT;
@@ -2565,7 +2565,7 @@
                if (argcount > co->co_argcount) {
                        if (!(co->co_flags & CO_VARARGS)) {
                                PyErr_Format(PyExc_TypeError,
-                                   "%S() takes %s %d "
+                                   "%U() takes %s %d "
                                    "%spositional argument%s (%d given)",
                                    co->co_name,
                                    defcount ? "at most" : "exactly",
@@ -2599,7 +2599,7 @@
                        int j;
                        if (keyword == NULL || !PyUnicode_Check(keyword)) {
                                PyErr_Format(PyExc_TypeError,
-                                   "%S() keywords must be strings",
+                                   "%U() keywords must be strings",
                                    co->co_name);
                                goto fail;
                        }
@@ -2622,7 +2622,7 @@
                        if (j >= co->co_argcount + co->co_kwonlyargcount) {
                                if (kwdict == NULL) {
                                        PyErr_Format(PyExc_TypeError,
-                                           "%S() got an unexpected "
+                                           "%U() got an unexpected "
                                            "keyword argument '%S'",
                                            co->co_name,
                                            keyword);
@@ -2633,7 +2633,7 @@
                        else {
                                if (GETLOCAL(j) != NULL) {
                                        PyErr_Format(PyExc_TypeError,
-                                            "%S() got multiple "
+                                            "%U() got multiple "
                                             "values for keyword "
                                             "argument '%S'",
                                             co->co_name,
@@ -2661,7 +2661,7 @@
                                        continue;
                                }
                                PyErr_Format(PyExc_TypeError,
-                                       "%S() needs keyword-only argument %S",
+                                       "%U() needs keyword-only argument %S",
                                        co->co_name, name);
                                goto fail;
                        }
@@ -2671,7 +2671,7 @@
                        for (i = argcount; i < m; i++) {
                                if (GETLOCAL(i) == NULL) {
                                        PyErr_Format(PyExc_TypeError,
-                                           "%S() takes %s %d "
+                                           "%U() takes %s %d "
                                            "%spositional argument%s "
                                            "(%d given)",
                                            co->co_name,
@@ -2699,7 +2699,7 @@
        else {
                if (argcount > 0 || kwcount > 0) {
                        PyErr_Format(PyExc_TypeError,
-                                    "%S() takes no arguments (%d given)",
+                                    "%U() takes no arguments (%d given)",
                                     co->co_name,
                                     argcount + kwcount);
                        goto fail;

Modified: python/branches/py3k/Python/compile.c
==============================================================================
--- python/branches/py3k/Python/compile.c       (original)
+++ python/branches/py3k/Python/compile.c       Mon Oct 15 04:52:41 2007
@@ -1247,7 +1247,7 @@
                                PyObject_REPR(name), 
                                PyString_AS_STRING(c->u->u_name), 
                                reftype, arg,
-                               PyString_AS_STRING(co->co_name),
+                               PyUnicode_AsString(co->co_name),
                                PyObject_REPR(co->co_freevars));
                        Py_FatalError("compiler_make_closure()");
                }
@@ -4001,7 +4001,7 @@
        freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
        if (!freevars)
            goto error;
-       filename = PyString_FromString(c->c_filename);
+       filename = PyUnicode_DecodeFSDefault(c->c_filename);
        if (!filename)
                goto error;
 

Modified: python/branches/py3k/Python/frozen.c
==============================================================================
--- python/branches/py3k/Python/frozen.c        (original)
+++ python/branches/py3k/Python/frozen.c        Mon Oct 15 04:52:41 2007
@@ -17,7 +17,7 @@
        131,1,0,1,100,1,0,83,40,2,0,0,0,117,14,0,
        0,0,72,101,108,108,111,32,119,111,114,108,100,46,46,46,
        78,40,1,0,0,0,117,5,0,0,0,112,114,105,110,116,
-       40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,115,
+       40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,117,
        8,0,0,0,104,101,108,108,111,46,112,121,117,8,0,0,
        0,60,109,111,100,117,108,101,62,1,0,0,0,115,0,0,
        0,0,

Modified: python/branches/py3k/Python/import.c
==============================================================================
--- python/branches/py3k/Python/import.c        (original)
+++ python/branches/py3k/Python/import.c        Mon Oct 15 04:52:41 2007
@@ -74,10 +74,11 @@
                      3040 (added signature annotations)
                      3050 (print becomes a function)
                      3060 (PEP 3115 metaclass syntax)
-          3070 (PEP 3109 raise changes)
+                     3070 (PEP 3109 raise changes)
+                     3080 (PEP 3137 make __file__ and __name__ unicode)
 .
 */
-#define MAGIC (3070 | ((long)'\r'<<16) | ((long)'\n'<<24))
+#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24))
 
 /* Magic word as global; note that _PyImport_Init() can change the
    value of this global to accommodate for alterations of how the
@@ -652,7 +653,7 @@
        /* Remember the filename as the __file__ attribute */
        v = NULL;
        if (pathname != NULL) {
-               v = PyString_FromString(pathname);
+               v = PyUnicode_DecodeFSDefault(pathname);
                if (v == NULL)
                        PyErr_Clear();
        }
@@ -983,7 +984,7 @@
                PySys_WriteStderr("import %s # directory %s\n",
                        name, pathname);
        d = PyModule_GetDict(m);
-       file = PyString_FromString(pathname);
+       file = PyUnicode_DecodeFSDefault(pathname);
        if (file == NULL)
                goto error;
        path = Py_BuildValue("[O]", file);

Modified: python/branches/py3k/Python/importdl.c
==============================================================================
--- python/branches/py3k/Python/importdl.c      (original)
+++ python/branches/py3k/Python/importdl.c      Mon Oct 15 04:52:41 2007
@@ -62,7 +62,9 @@
                return NULL;
        }
        /* Remember the filename as the __file__ attribute */
-       if (PyModule_AddStringConstant(m, "__file__", pathname) < 0)
+       PyObject *path;
+       path = PyUnicode_DecodeFSDefault(pathname);
+       if (PyModule_AddObject(m, "__file__", path) < 0)
                PyErr_Clear(); /* Not important enough to report */
 
        if (_PyImport_FixupExtension(name, pathname) == NULL)

Modified: python/branches/py3k/Python/pythonrun.c
==============================================================================
--- python/branches/py3k/Python/pythonrun.c     (original)
+++ python/branches/py3k/Python/pythonrun.c     Mon Oct 15 04:52:41 2007
@@ -867,7 +867,8 @@
                return -1;
        d = PyModule_GetDict(m);
        if (PyDict_GetItemString(d, "__file__") == NULL) {
-               PyObject *f = PyString_FromString(filename);
+               PyObject *f;
+               f = PyUnicode_DecodeFSDefault(filename);
                if (f == NULL)
                        return -1;
                if (PyDict_SetItemString(d, "__file__", f) < 0) {

Modified: python/branches/py3k/Python/traceback.c
==============================================================================
--- python/branches/py3k/Python/traceback.c     (original)
+++ python/branches/py3k/Python/traceback.c     Mon Oct 15 04:52:41 2007
@@ -229,10 +229,10 @@
        while (tb != NULL && err == 0) {
                if (depth <= limit) {
                        err = tb_displayline(f,
-                           PyString_AsString(
+                           PyUnicode_AsString(
                                    tb->tb_frame->f_code->co_filename),
                            tb->tb_lineno,
-                           PyString_AsString(tb->tb_frame->f_code->co_name));
+                           PyUnicode_AsString(tb->tb_frame->f_code->co_name));
                }
                depth--;
                tb = tb->tb_next;
_______________________________________________
Python-3000-checkins mailing list
Python-3000-checkins@python.org
http://mail.python.org/mailman/listinfo/python-3000-checkins

Reply via email to