#2543: [patch] Django doesn't handle UTF-8 encoded URLs properly
-----------------------------------------------+----------------------------
 Reporter:  Victor Ng <[EMAIL PROTECTED]>  |        Owner:  hugo
     Type:  defect                             |       Status:  new 
 Priority:  normal                             |    Milestone:      
Component:  Internationalization               |      Version:      
 Severity:  normal                             |   Resolution:      
 Keywords:                                     |  
-----------------------------------------------+----------------------------
Comment (by anonymous):

 I'm not sure I underatand.  How would you go about compromising the
 security of Django with non-UTF8 data in the URL?
 
 I've added a little more code to trap the UnicodeDecodeError and raise an
 instance of SuspiciousOperation for non-UTF8 data, but to be honest - I
 don't see how this is more or less secure than the previous patch.  Is
 this what you're looking for?
 
 I'm also not exactly sure how I'm supposed to excercise the URLresolver
 for the passing case - there doesn't seem to be a sample test that lets me
 see how to do this.  Can someone fill in the valid case of having UTF8
 data in a URL decode properly in to test_unicode.py?
 
 {{{
 Index: django/core/urlresolvers.py
 ===================================================================
 --- django/core/urlresolvers.py (revision 3582)
 +++ django/core/urlresolvers.py (working copy)
 @@ -79,9 +79,9 @@
              test_regex = grouped
          # Note we're using re.match here on purpose because the start of
          # to string needs to match.
 -        if not re.match(test_regex + '$', str(value)): # TODO: Unicode?
 +        if not re.match(test_regex + '$', unicode(value), re.UNICODE):
              raise NoReverseMatch("Value %r didn't match regular
 expression %r" % (value, test_regex))
 -        return str(value) # TODO: Unicode?
 +        return unicode(value)
 
  class RegexURLPattern(object):
      def __init__(self, regex, callback, default_args=None):
 @@ -89,7 +89,7 @@
          # callback is either a string like
 'foo.views.news.stories.story_detail'
          # which represents the path to a module and a view function name,
 or a
          # callable object (view).
 -        self.regex = re.compile(regex)
 +        self.regex = re.compile(regex, re.UNICODE)
          if callable(callback):
              self._callback = callback
          else:
 @@ -143,7 +143,7 @@
      def __init__(self, regex, urlconf_name, default_kwargs=None):
          # regex is a string representing a regular expression.
          # urlconf_name is a string representing the module containing
 urlconfs.
 -        self.regex = re.compile(regex)
 +        self.regex = re.compile(regex, re.UNICODE)
          self.urlconf_name = urlconf_name
          self.callback = None
          self.default_kwargs = default_kwargs or {}
 @@ -225,6 +225,7 @@
          from django.conf import settings
          urlconf = settings.ROOT_URLCONF
      resolver = RegexURLResolver(r'^/', urlconf)
 +
      return resolver.resolve(path)
 
  def reverse(viewname, urlconf=None, args=None, kwargs=None):
 Index: django/core/handlers/base.py
 ===================================================================
 --- django/core/handlers/base.py        (revision 3582)
 +++ django/core/handlers/base.py        (working copy)
 @@ -50,10 +50,18 @@
 
      def get_response(self, path, request):
          "Returns an HttpResponse object for the given HttpRequest"
 +        from exceptions import UnicodeDecodeError
          from django.core import exceptions, urlresolvers
          from django.core.mail import mail_admins
          from django.conf import settings
 
 +        # URLs are encoded in UTF8 (RFC3986, RFC2718)
 +
 +        try:
 +            path = path.decode('utf8')
 +        except UnicodeDecodeError, ude:
 +            raise exceptions.SuspiciousOperation, "Failed to decode the
 URL using UTF8.  This is probably a user agent."
 +
          # Apply request middleware
          for middleware_method in self._request_middleware:
              response = middleware_method(request)
 Index: tests/othertests/test_unicode.py
 ===================================================================
 --- tests/othertests/test_unicode.py    (revision 0)
 +++ tests/othertests/test_unicode.py    (revision 0)
 @@ -0,0 +1,23 @@
 +from django.core.handlers.base import BaseHandler
 +from django.core.exceptions import SuspiciousOperation
 +from django.conf import settings
 +
 +# Test that django.core.handlers.base.BaseHandler handles malicious UTF8
 +# URLs by raising a SuspiciousOperation
 +
 +utf16 = u'some_other_encoding/foo/bar/'.encode('utf16')
 +
 +bh = BaseHandler()
 +bh._request_middleware = [] # set an empty list of request middleware
 +
 +try:
 +    bh.get_response(utf16, None)
 +except SuspiciousOperation, so:
 +    # expected error
 +    pass
 +else:
 +    raise "expected a suspicious operation here"
 +
 +
 +# TODO: Test that django.core.handlers.base.BaseHandler handles 'clean'
 UTF8 URLs
 +# I'm actually not sure how to do this.
 Index: tests/othertests/urlpatterns_reverse.py
 ===================================================================
 --- tests/othertests/urlpatterns_reverse.py     (revision 3582)
 +++ tests/othertests/urlpatterns_reverse.py     (working copy)
 @@ -23,6 +23,8 @@
      ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', NoReverseMatch, [],
 {'name': 'adrian'}),
      ('^people/(?P<state>\w\w)/(\w+)/$', NoReverseMatch, ['il'], {'name':
 'adrian'}),
      ('^people/(?P<state>\w\w)/(\w+)/$', 'people/il/adrian/', ['adrian'],
 {'state': 'il'}),
 +    ('^people/(?P<state>\w\w)/(\w+)/$', 'people/il/adrian/', ['adrian'],
 {'state': 'il'}),
 +    ('^people/(?P<state>\w\w)/(?P<name>\w+)/$', u'people/il/adria\xd0/',
 [], {'state': u'il', 'name': u'adria\xd0'}),
  )
 
  def run_tests(verbosity=0):
 
 }}}

-- 
Ticket URL: <http://code.djangoproject.com/ticket/2543>
Django <http://code.djangoproject.org/>
The web framework for perfectionists with deadlines.
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at 
http://groups.google.com/group/django-updates
-~----------~----~----~----~------~----~------~--~---

Reply via email to