today i experimented a little with the django source code, and here are the results.
if you apply a very small patch (65lines, attached), you can write a view completely in unicode. means: - GET/POST contains unicode data - request.META contains unicode data - you can put unicode text into the HttpResponse (this was already possible without the patch) of course, this patch is a demonstration only. the charset is hardcoded to UTF-8 (should be settings.DEFAULT_CHARSET), and it only handles the WSGI way (the mod_python one is not handled). also templating and ORM are not touched. (not to mention the ugliness of the code) but still, i was quite surprised that with such small changes so much can be done. i think unicodizing django can be done in 4 easily separated steps/parts: 1. request/response 2. templating-system 3. database-system 4. "overall unicode-conversion". this is mostly about replacing bytestrings with u"bla" in the code, and switching __str__ to __unicode__ my biggest problem currently is, that i do not know how to continue... should i just write more and more patches to increase the "unicode-coverage" to more parts of django? or maybe a more coordinated approach would be better? because the actual conversion is not that hard. it's just that it touches a lot of parts... so it's not too deep, but very wide :-) gabor --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Django developers" group. To post to this group, send email to django-developers@googlegroups.com To unsubscribe from this group, send email to [EMAIL PROTECTED] For more options, visit this group at http://groups.google.com/group/django-developers -~----------~----~----~----~------~----~------~--~---
Index: django/http/__init__.py =================================================================== --- django/http/__init__.py (revision 3538) +++ django/http/__init__.py (working copy) @@ -73,13 +73,23 @@ POST.appendlist(name_dict['name'], submessage.get_payload()) return POST, FILES + +def hacked_parse_qsl(query_string,flag): + """needed to workaround the cgi.parse_sql unicode-problem""" + query_string = query_string.encode('ascii') + #FIXME: use settings.DEFAULT_CHARSET here + q = parse_qsl(query_string,flag) + + return [ [k.decode('utf8'),v.decode('utf8')] for (k,v) in q] + + class QueryDict(MultiValueDict): """A specialized MultiValueDict that takes a query string when initialized. This is immutable unless you create a copy of it.""" def __init__(self, query_string, mutable=False): MultiValueDict.__init__(self) self._mutable = True - for key, value in parse_qsl((query_string or ''), True): # keep_blank_values=True + for key, value in hacked_parse_qsl((query_string or ''), True): # keep_blank_values=True self.appendlist(key, value) self._mutable = mutable @@ -147,6 +157,7 @@ if cookie == '': return {} c = SimpleCookie() + cookie = cookie.encode('ascii') #fix needed for Cookie.SimpleCookie c.load(cookie) cookiedict = {} for key in c.keys(): Index: django/core/handlers/wsgi.py =================================================================== --- django/core/handlers/wsgi.py (revision 3538) +++ django/core/handlers/wsgi.py (working copy) @@ -50,8 +50,19 @@ 505: 'HTTP VERSION NOT SUPPORTED', } +def unicodize_environ(environ): + + def unicodize_item(key,value): + key = key.decode('ascii') + if not key.startswith('wsgi.'): + value = value.decode('ascii') + return (key,value) + + return dict([unicodize_item(*i) for i in environ.items()]) + class WSGIRequest(http.HttpRequest): def __init__(self, environ): + environ = unicodize_environ(environ) self.environ = environ self.path = environ['PATH_INFO'] self.META = environ