Author: martin.v.loewis Date: Fri Aug 31 13:01:23 2007 New Revision: 57837 Modified: python/branches/py3k/Objects/unicodeobject.c Log: Change %s argument for PyUnicode_FromFormat to be UTF-8. Fixes #1070.
Modified: python/branches/py3k/Objects/unicodeobject.c ============================================================================== --- python/branches/py3k/Objects/unicodeobject.c (original) +++ python/branches/py3k/Objects/unicodeobject.c Fri Aug 31 13:01:23 2007 @@ -621,8 +621,39 @@ abuffersize = width; break; case 's': - n += strlen(va_arg(count, char*)); + { + /* UTF-8 */ + unsigned char*s; + s = va_arg(count, unsigned char*); + while (*s) { + if (*s < 128) { + n++; s++; + } else if (*s < 0xc0) { + /* invalid UTF-8 */ + n++; s++; + } else if (*s < 0xc0) { + n++; + s++; if(!*s)break; + s++; + } else if (*s < 0xe0) { + n++; + s++; if(!*s)break; + s++; if(!*s)break; + s++; + } else { + #ifdef Py_UNICODE_WIDE + n++; + #else + n+=2; + #endif + s++; if(!*s)break; + s++; if(!*s)break; + s++; if(!*s)break; + s++; + } + } break; + } case 'U': { PyObject *obj = va_arg(count, PyObject *); @@ -775,9 +806,22 @@ appendstring(realbuffer); break; case 's': + { + /* Parameter must be UTF-8 encoded. + In case of encoding errors, use + the replacement character. */ + PyObject *u; p = va_arg(vargs, char*); - appendstring(p); + u = PyUnicode_DecodeUTF8(p, strlen(p), + "replace"); + if (!u) + goto fail; + Py_UNICODE_COPY(s, PyUnicode_AS_UNICODE(u), + PyUnicode_GET_SIZE(u)); + s += PyUnicode_GET_SIZE(u); + Py_DECREF(u); break; + } case 'U': { PyObject *obj = va_arg(vargs, PyObject *); _______________________________________________ Python-3000-checkins mailing list Python-3000-checkins@python.org http://mail.python.org/mailman/listinfo/python-3000-checkins