Roy Hyunjin Han <[email protected]> added the comment:
Improved workaround to handle another degenerate case where the encoded string
is in between non-encoded strings.
import re
import email.header
pattern_ecre = re.compile(r'((=\?.*?\?[qb]\?).*\?=)', re.VERBOSE |
re.IGNORECASE | re.MULTILINE)
def decodeSafely(x):
match = pattern_ecre.search(x)
if not match:
return x
string, encoding = match.groups()
stringBefore, string, stringAfter = x.partition(string)
return stringBefore + email.header.decode_header('%s%s==?=' % (encoding,
string.replace(encoding, '').replace('?', '').replace('=', '')))[0][0] +
stringAfter
print
decodeSafely('=?UTF-8?B?MjAxMSBBVVRNIENBTEwgZm9yIE5PTUlO?==?UTF-8?B?QVRJT05TIG9mIFZQIGZvciBNZW1iZXJz?==?UTF-8?B?aGlw?=')
print decodeSafely('"=?UTF-8?B?QVVUTSBIZWFkcXVhcnRlcnM=?="<[email protected]>')
----------
_______________________________________
Python tracker <[email protected]>
<http://bugs.python.org/issue10574>
_______________________________________
_______________________________________________
Python-bugs-list mailing list
Unsubscribe:
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com