When I check pending list, I see many charset encoded headers, which
are illiterate by themselves. So I made a dirty hack on tmda-pending,
which gave out readable headers.
I don't know much about python and I just copy some code from
EncWord.py of Mailman. I want somebody else to make it better.
--
| Jinhyok Heo (novembre @ ournature.org || http://ournature.org/~novembre/)
|--------------------------------------------------------------------------
| "We are still reaching for the sky. In the developed countries people
| are coming back down, saying, `It's empty up there.'" --- a Ladakhi monk
--- tmda-pending Fri Sep 13 10:38:22 2002
+++ tmda-pending.n Fri Sep 13 13:25:33 2002
@@ -149,6 +149,120 @@
import sys
import time
+import string
+import base64
+
+class DecodeError(ValueError):
+ __super_init = ValueError.__init__
+ def __init__(self, msg):
+ self.__super_init('invalid encoded-word: %s' % msg)
+
+class Decoder:
+ """Decode mail header encoded-word format defined by RFC 2047"""
+
+ offset = 0
+
+ def decode(self, s):
+ """Decode an encoded-word.
+
+ Returns the charset of the encoded-word, the decoded text, and the
+ position of the first character following the encoded-word.
+
+ The first position of the input string must by the first character of
+ the encoded-word.
+ """
+ if s[:2] <> '=?':
+ raise DecodeError('must start with "=?", not %s' % repr(s[:2]))
+ charset = self._get_charset(s)
+ encoding = self._get_encoding(s)
+ _text = self._get_text(s)
+ # encoding must be either 'q' or 'b', ensured by _get_encoding()
+ if encoding == 'q':
+ text = self._decode_q(_text)
+ else:
+ text = self._decode_b(_text)
+ return charset, text, self.offset
+
+ # TBD: Technically the charset and encoding can't contain SPACE, CTLs, or
+ # especials; do not currently check this.
+
+ def _get_charset(self, s):
+ i = string.find(s, '?', 2)
+ if i == -1:
+ raise DecodeError("can't find end of charset")
+ self.offset = i + 1
+ return s[2:i]
+
+ _valid_encodings = ('q', 'b')
+
+ def _get_encoding(self, s):
+ i = string.find(s, '?', self.offset)
+ if i < 0:
+ raise DecodeError("can't find encoding")
+ enc = string.lower(s[self.offset:i])
+ self.offset = i + 1
+ if enc not in Decoder._valid_encodings:
+ raise DecodeError('not a valid encoding: %s' % enc)
+ return enc
+
+ def _get_text(self, s):
+ i = string.find(s, '?=', self.offset)
+ if i < 0:
+ raise DecodeError("can't find end of encoded text")
+ text = s[self.offset:i]
+ self.offset = i + 2
+ return text
+
+ SPACE = chr(0x20)
+
+ def _decode_q(self, s):
+ """Q encoding defined by RFC 2047"""
+ chunks = []
+ offset = 0
+ end = len(s)
+ while offset < end:
+ i = string.find(s, '=', offset)
+ j = string.find(s, '_', offset)
+ if i < 0 and j < 0:
+ chunks.append(s[offset:])
+ break
+ if (j < i and j >= 0) or i < 0:
+ chunks.append(s[offset:j])
+ chunks.append(Decoder.SPACE)
+ offset = j + 1
+ else:
+ chunks.append(s[offset:i])
+ hexdig = s[i+1:i+3]
+ chunks.append(chr(string.atoi(hexdig, 16)))
+ offset = i + 3
+ return string.join(chunks, '')
+
+ def _decode_b(self, s):
+ """B encoding == base64 encoding defined by RFC 2045"""
+ return base64.decodestring(s)
+
+def decode(s):
+ """Decode a string containing encoded words"""
+ _decode = Decoder().decode
+
+ chunks = []
+ offset = 0
+ charset = None
+ while 1:
+ i = string.find(s, '=?', offset)
+ if i < 0:
+ chunks.append(s[offset:])
+ break
+ chunks.append(s[offset:i])
+ _charset, text, offset = _decode(s[i:])
+ offset = offset + i
+ if charset is None:
+ charset = _charset
+ elif charset <> _charset:
+ raise ValueError("can't decode string with multiple charsets")
+ chunks.append(text)
+ return string.join(chunks, ''), charset
+
try:
import paths
except ImportError:
@@ -396,9 +510,9 @@
msg_size,
bytes)
for hdr in ('date', 'from', 'to', 'subject'):
+ text, charset = decode(headers.getheader(hdr, 'None'))
print "%s %s: %s" % (' >>',
- hdr.capitalize()[:4].rjust(4),
- headers.getheader(hdr, 'None'))
+ hdr.capitalize()[:4].rjust(4), text)
if summary:
print '<mailto:%s>' % (confirm_accept_address
(recipient_address, msg))