------------------------------------------------------------
revno: 1044
committer: Mark Sapiro <[email protected]>
branch nick: 2.2
timestamp: Thu 2009-03-12 18:14:10 -0700
message:
Fixed a bug in Utils.canonstr() that would throw a UnicodeDecodeError
if the string contained an HTML entity > 255 and also characters in the
128-255 range. Bug #341594.
modified:
Mailman/Utils.py
NEWS
=== modified file 'Mailman/Utils.py'
--- Mailman/Utils.py 2009-01-03 02:43:05 +0000
+++ Mailman/Utils.py 2009-03-13 01:14:10 +0000
@@ -809,12 +809,25 @@
newparts = []
parts = re.split(r'&(?P<ref>[^;]+);', s)
def appchr(i):
- if i < 256:
- newparts.append(chr(i))
+ # do everything in unicode
+ newparts.append(unichr(i))
+ def tounicode(s):
+ # We want the default fallback to be iso-8859-1 even if the language
+ # is English (us-ascii). This seems like a practical compromise so
+ # that non-ASCII characters in names can be used in English lists w/o
+ # having to change the global charset for English from us-ascii (which
+ # I superstitiously think may have unintended consequences).
+ if isinstance(s, unicode):
+ return s
+ if lang is None:
+ charset = 'iso-8859-1'
else:
- newparts.append(unichr(i))
+ charset = GetCharSet(lang)
+ if charset == 'us-ascii':
+ charset = 'iso-8859-1'
+ return unicode(s, charset, 'replace')
while True:
- newparts.append(parts.pop(0))
+ newparts.append(tounicode(parts.pop(0)))
if not parts:
break
ref = parts.pop(0)
@@ -823,28 +836,16 @@
appchr(int(ref[1:]))
except ValueError:
# Non-convertable, stick with what we got
- newparts.append('&'+ref+';')
+ newparts.append(tounicode('&'+ref+';'))
else:
c = htmlentitydefs.entitydefs.get(ref, '?')
if c.startswith('#') and c.endswith(';'):
appchr(int(ref[1:-1]))
else:
- newparts.append(c)
+ newparts.append(tounicode(c))
newstr = EMPTYSTRING.join(newparts)
- if isinstance(newstr, UnicodeType):
- return newstr
- # We want the default fallback to be iso-8859-1 even if the language is
- # English (us-ascii). This seems like a practical compromise so that
- # non-ASCII characters in names can be used in English lists w/o having to
- # change the global charset for English from us-ascii (which I
- # superstitiously think may have unintended consequences).
- if lang is None:
- charset = 'iso-8859-1'
- else:
- charset = GetCharSet(lang)
- if charset == 'us-ascii':
- charset = 'iso-8859-1'
- return unicode(newstr, charset, 'replace')
+ # newstr is unicode
+ return newstr
# The opposite of canonstr() -- sorta. I.e. it attempts to encode s in the
=== modified file 'NEWS'
--- NEWS 2009-02-23 22:28:58 +0000
+++ NEWS 2009-03-13 01:14:10 +0000
@@ -37,6 +37,10 @@
Bug fixes and other patches
+ - Fixed a bug in Utils.canonstr() that would throw a UnicodeDecodeError
+ if the string contained an HTML entity > 255 and also characters in the
+ 128-255 range. Bug #341594.
+
- Fixed some technical problems with the listinfo.html template and its
translations.
--
Active development version (web u/i update)
https://code.launchpad.net/~mailman-coders/mailman/2.2
Your team Mailman Checkins is subscribed to branch lp:mailman/2.2.
To unsubscribe from this branch go to
https://code.launchpad.net/~mailman-coders/mailman/2.2/+edit-subscription.
_______________________________________________
Mailman-checkins mailing list
[email protected]
Unsubscribe:
http://mail.python.org/mailman/options/mailman-checkins/archive%40jab.org