Update of /cvsroot/tmda/tmda/TMDA/pythonlib/email
In directory usw-pr-cvs1:/tmp/cvs-serv7162/TMDA/pythonlib/email
Modified Files:
Charset.py Encoders.py Generator.py Header.py Iterators.py
MIMEAudio.py MIMEImage.py MIMEMultipart.py MIMENonMultipart.py
MIMEText.py Message.py Parser.py Utils.py __init__.py
base64MIME.py quopriMIME.py
Log Message:
Sync pythonlib/email with email 2.4. Changes from NEWS:
2.4 (01-Oct-2002)
This version has been backported to Python 2.2.2.
- Updated all the documentation.
- Clarification to the semantics of Header.__init__() and
Header.append() when it gets byte strings and Unicode strings as
its first argument. When a byte string is used, the charset
must be the encoding of the string, such that unicode(s,charset)
succeeds. When a Unicode string is used, the charset is a hint,
and the first of the following to succeed is used: us-ascii, the
charset hint, utf-8.
- A new header encoding flag has been added to the Charset
module. SHORTEST (which cannot be used for body encodings)
returns the string either quoted-printable or base64 encoding,
whichever is shortest in terms of characters. This is a good
heuristic for providing the most human readable value possible.
The utf-8 charset uses SHORTEST encoding by default now.
- Message.get_content_charset() is a new method that returns the
charset parameter on the Content-Type header, unquoted and RFC
2231 decoded if necessary.
- "import email" no longer imports some sub-modules by side-effect.
- Fixed some problems related to RFC 2231 encoding of boundary and
charset parameters on Content-Type headers. Document that
get_param() and get_params() may return values that are strings
or 3-tuples.
- The signature of the non-public function _structure() has
changed.
Index: Charset.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Charset.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Charset.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Charset.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -1,26 +1,27 @@
# Copyright (C) 2001,2002 Python Software Foundation
-# Author: [EMAIL PROTECTED] (Ben Gertzfield)
-
-try:
- unicode
-except NameError:
- def _is_unicode(x):
- return 1==0
-else:
- # Use UnicodeType instead of built-in unicode for Py2.1 compatibility
- from types import UnicodeType
- def _is_unicode(x):
- return isinstance(x, UnicodeType)
+# Author: [EMAIL PROTECTED] (Ben Gertzfield), [EMAIL PROTECTED] (Barry Warsaw)
+from types import UnicodeType
from email.Encoders import encode_7or8bit
import email.base64MIME
import email.quopriMIME
+def _isunicode(s):
+ return isinstance(s, UnicodeType)
+
+# Python 2.2.1 and beyond has these symbols
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
# Flags for types of header encodings
-QP = 1 # Quoted-Printable
-BASE64 = 2 # Base64
+QP = 1 # Quoted-Printable
+BASE64 = 2 # Base64
+SHORTEST = 3 # the shorter of QP and base64, but only for headers
# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
MISC_LEN = 7
@@ -41,7 +42,7 @@
'shift_jis': (BASE64, None, 'iso-2022-jp'),
'iso-2022-jp': (BASE64, None, None),
'koi8-r': (BASE64, BASE64, None),
- 'utf-8': (BASE64, BASE64, 'utf-8'),
+ 'utf-8': (SHORTEST, BASE64, 'utf-8'),
}
# Aliases for other commonly-used names for character sets. Map
@@ -84,15 +85,17 @@
# Convenience functions for extending the above mappings
def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
- """Add charset properties to the global map.
+ """Add character set properties to the global registry.
charset is the input character set, and must be the canonical name of a
character set.
Optional header_enc and body_enc is either Charset.QP for
- quoted-printable, Charset.BASE64 for base64 encoding, or None for no
- encoding. It describes how message headers and message bodies in the
- input charset are to be encoded. Default is no encoding.
+ quoted-printable, Charset.BASE64 for base64 encoding, Charset.SHORTEST for
+ the shortest of qp or base64 encoding, or None for no encoding. SHORTEST
+ is only valid for header_enc. It describes how message headers and
+ message bodies in the input charset are to be encoded. Default is no
+ encoding.
Optional output_charset is the character set that the output should be
in. Conversions will proceed from input charset, to Unicode, to the
@@ -101,9 +104,11 @@
Both input_charset and output_charset must have Unicode codec entries in
the module's charset-to-codec mapping; use add_codec(charset, codecname)
- to add codecs the module does not know about. See the codec module's
+ to add codecs the module does not know about. See the codecs module's
documentation for more information.
"""
+ if body_enc == SHORTEST:
+ raise ValueError, 'SHORTEST not allowed for body_enc'
CHARSETS[charset] = (header_enc, body_enc, output_charset)
@@ -121,7 +126,7 @@
charset is the canonical name of a character set. codecname is the name
of a Python codec, as appropriate for the second argument to the unicode()
- built-in, or to the .encode() method of a Unicode string.
+ built-in, or to the encode() method of a Unicode string.
"""
CODEC_MAP[charset] = codecname
@@ -133,8 +138,9 @@
This class provides information about the requirements imposed on email
for a specific character set. It also provides convenience routines for
converting between character sets, given the availability of the
- applicable codecs. Given an character set, it will do its best to provide
- information on how to use that character set in an email.
+ applicable codecs. Given a character set, it will do its best to provide
+ information on how to use that character set in an email in an
+ RFC-compliant way.
Certain character sets must be encoded with quoted-printable or base64
when used in email headers or bodies. Certain character sets must be
@@ -147,12 +153,14 @@
header_encoding: If the character set must be encoded before it can be
used in an email header, this attribute will be set to
- Charset.QP (for quoted-printable) or Charset.BASE64 (for
- base64 encoding). Otherwise, it will be None.
+ Charset.QP (for quoted-printable), Charset.BASE64 (for
+ base64 encoding), or Charset.SHORTEST for the shortest of
+ QP or BASE64 encoding. Otherwise, it will be None.
body_encoding: Same as header_encoding, but describes the encoding for the
mail message's body, which indeed may be different than the
- header encoding.
+ header encoding. Charset.SHORTEST is not allowed for
+ body_encoding.
output_charset: Some character sets must be converted before the can be
used in email headers or bodies. If the input_charset is
@@ -175,7 +183,7 @@
# charset_map dictionary. Try that first, but let the user override
# it.
henc, benc, conv = CHARSETS.get(self.input_charset,
- (BASE64, BASE64, None))
+ (SHORTEST, SHORTEST, None))
# Set the attributes, allowing the arguments to override the default.
self.header_encoding = henc
self.body_encoding = benc
@@ -202,13 +210,14 @@
This is either the string `quoted-printable' or `base64' depending on
the encoding used, or it is a function in which case you should call
the function with a single argument, the Message object being
- encoded. The function should then set the Content-Transfer-Encoding:
+ encoded. The function should then set the Content-Transfer-Encoding
header itself to whatever is appropriate.
Returns "quoted-printable" if self.body_encoding is QP.
Returns "base64" if self.body_encoding is BASE64.
Returns "7bit" otherwise.
"""
+ assert self.body_encoding <> SHORTEST
if self.body_encoding == QP:
return 'quoted-printable'
elif self.body_encoding == BASE64:
@@ -227,16 +236,16 @@
"""Convert a possibly multibyte string to a safely splittable format.
Uses the input_codec to try and convert the string to Unicode, so it
- can be safely split on character boundaries (even for double-byte
+ can be safely split on character boundaries (even for multibyte
characters).
- Returns the string untouched if we don't know how to convert it to
+ Returns the string as-is if it isn't known how to convert it to
Unicode with the input_charset.
Characters that could not be converted to Unicode will be replaced
with the Unicode replacement character U+FFFD.
"""
- if _is_unicode(s) or self.input_codec is None:
+ if _isunicode(s) or self.input_codec is None:
return s
try:
return unicode(s, self.input_codec, 'replace')
@@ -245,26 +254,24 @@
# string unchanged.
return s
- def from_splittable(self, ustr, to_output=1):
+ def from_splittable(self, ustr, to_output=True):
"""Convert a splittable string back into an encoded string.
- Uses the proper codec to try and convert the string from
- Unicode back into an encoded format. Return the string as-is
- if it is not Unicode, or if it could not be encoded from
- Unicode.
+ Uses the proper codec to try and convert the string from Unicode back
+ into an encoded format. Return the string as-is if it is not Unicode,
+ or if it could not be converted from Unicode.
Characters that could not be converted from Unicode will be replaced
with an appropriate character (usually '?').
- If to_output is true, uses output_codec to convert to an encoded
- format. If to_output is false, uses input_codec. to_output defaults
- to 1.
+ If to_output is True (the default), uses output_codec to convert to an
+ encoded format. If to_output is False, uses input_codec.
"""
if to_output:
codec = self.output_codec
else:
codec = self.input_codec
- if not _is_unicode(ustr) or codec is None:
+ if not _isunicode(ustr) or codec is None:
return ustr
try:
return ustr.encode(codec, 'replace')
@@ -275,7 +282,7 @@
def get_output_charset(self):
"""Return the output character set.
- This is self.output_charset if that is set, otherwise it is
+ This is self.output_charset if that is not None, otherwise it is
self.input_charset.
"""
return self.output_charset or self.input_charset
@@ -284,22 +291,26 @@
"""Return the length of the encoded header string."""
cset = self.get_output_charset()
# The len(s) of a 7bit encoding is len(s)
- if self.header_encoding is BASE64:
+ if self.header_encoding == BASE64:
return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN
- elif self.header_encoding is QP:
+ elif self.header_encoding == QP:
return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN
+ elif self.header_encoding == SHORTEST:
+ lenb64 = email.base64MIME.base64_len(s)
+ lenqp = email.quopriMIME.header_quopri_len(s)
+ return min(lenb64, lenqp) + len(cset) + MISC_LEN
else:
return len(s)
- def header_encode(self, s, convert=0):
+ def header_encode(self, s, convert=False):
"""Header-encode a string, optionally converting it to output_charset.
- If convert is true, the string will be converted from the input
+ If convert is True, the string will be converted from the input
charset to the output charset automatically. This is not useful for
multibyte character sets, which have line length issues (multibyte
characters must be split on a character, not a byte boundary); use the
high-level Header class to deal with these issues. convert defaults
- to 0.
+ to False.
The type of encoding (base64 or quoted-printable) will be based on
self.header_encoding.
@@ -308,17 +319,24 @@
if convert:
s = self.convert(s)
# 7bit/8bit encodings return the string unchanged (modulo conversions)
- if self.header_encoding is BASE64:
+ if self.header_encoding == BASE64:
return email.base64MIME.header_encode(s, cset)
- elif self.header_encoding is QP:
+ elif self.header_encoding == QP:
return email.quopriMIME.header_encode(s, cset)
+ elif self.header_encoding == SHORTEST:
+ lenb64 = email.base64MIME.base64_len(s)
+ lenqp = email.quopriMIME.header_quopri_len(s)
+ if lenb64 < lenqp:
+ return email.base64MIME.header_encode(s, cset)
+ else:
+ return email.quopriMIME.header_encode(s, cset)
else:
return s
- def body_encode(self, s, convert=1):
+ def body_encode(self, s, convert=True):
"""Body-encode a string and convert it to output_charset.
- If convert is true (the default), the string will be converted from
+ If convert is True (the default), the string will be converted from
the input charset to output charset automatically. Unlike
header_encode(), there are no issues with byte boundaries and
multibyte charsets in email bodies, so this is usually pretty safe.
Index: Encoders.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Encoders.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Encoders.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Encoders.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -51,7 +51,7 @@
def encode_base64(msg):
"""Encode the message's payload in Base64.
- Also, add an appropriate Content-Transfer-Encoding: header.
+ Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload()
encdata = _bencode(orig)
@@ -61,9 +61,9 @@
def encode_quopri(msg):
- """Encode the message's payload in Quoted-Printable.
+ """Encode the message's payload in quoted-printable.
- Also, add an appropriate Content-Transfer-Encoding: header.
+ Also, add an appropriate Content-Transfer-Encoding header.
"""
orig = msg.get_payload()
encdata = _qencode(orig)
@@ -73,7 +73,7 @@
def encode_7or8bit(msg):
- """Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
+ """Set the Content-Transfer-Encoding header to 7bit or 8bit."""
orig = msg.get_payload()
if orig is None:
# There's no payload. For backwards compatibility we use 7bit
Index: Generator.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Generator.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Generator.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Generator.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -18,6 +18,11 @@
except SyntaxError:
from email._compat21 import _isstring
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
EMPTYSTRING = ''
SEMISPACE = '; '
@@ -42,14 +47,15 @@
# Public interface
#
- def __init__(self, outfp, mangle_from_=1, maxheaderlen=78):
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
"""Create the generator for message flattening.
outfp is the output file-like object for writing the message to. It
must have a write() method.
- Optional mangle_from_ is a flag that, when true, escapes From_ lines
- in the body of the message by putting a `>' in front of them.
+ Optional mangle_from_ is a flag that, when True (the default), escapes
+ From_ lines in the body of the message by putting a `>' in front of
+ them.
Optional maxheaderlen specifies the longest length for a non-continued
header. When a header line is longer (in characters, with tabs
@@ -61,21 +67,20 @@
"""
self._fp = outfp
self._mangle_from_ = mangle_from_
- self.__first = 1
self.__maxheaderlen = maxheaderlen
def write(self, s):
# Just delegate to the file object
self._fp.write(s)
- def flatten(self, msg, unixfrom=0):
+ def flatten(self, msg, unixfrom=False):
"""Print the message object tree rooted at msg to the output file
specified when the Generator instance was created.
unixfrom is a flag that forces the printing of a Unix From_ delimiter
before the first object in the message tree. If the original message
has no From_ delimiter, a `standard' one is crafted. By default, this
- is 0 to inhibit the printing of any From_ delimiter.
+ is False to inhibit the printing of any From_ delimiter.
Note that for subobjects, no From_ line is printed.
"""
@@ -146,23 +151,17 @@
def _write_headers(self, msg):
for h, v in msg.items():
- # We only write the MIME-Version: header for the outermost
- # container message. Unfortunately, we can't use same technique
- # as for the Unix-From above because we don't know when
- # MIME-Version: will occur.
- if h.lower() == 'mime-version' and not self.__first:
- continue
# RFC 2822 says that lines SHOULD be no more than maxheaderlen
# characters wide, so we're well within our rights to split long
# headers.
text = '%s: %s' % (h, v)
if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
- text = self._split_header(h, text)
+ text = self._split_header(text)
print >> self._fp, text
# A blank line always separates headers from body
print >> self._fp
- def _split_header(self, name, text):
+ def _split_header(self, text):
maxheaderlen = self.__maxheaderlen
# Find out whether any lines in the header are really longer than
# maxheaderlen characters wide. There could be continuation lines
@@ -225,7 +224,7 @@
for part in subparts:
s = StringIO()
g = self.clone(s)
- g.flatten(part, unixfrom=0)
+ g.flatten(part, unixfrom=False)
msgtexts.append(s.getvalue())
# Now make sure the boundary we've selected doesn't appear in any of
# the message texts.
@@ -264,7 +263,7 @@
for part in msg.get_payload():
s = StringIO()
g = self.clone(s)
- g.flatten(part, unixfrom=0)
+ g.flatten(part, unixfrom=False)
text = s.getvalue()
lines = text.split('\n')
# Strip off the unnecessary trailing empty line
@@ -284,7 +283,7 @@
# of length 1. The zeroth element of the list should be the Message
# object for the subpart. Extract that object, stringify it, and
# write it out.
- g.flatten(msg.get_payload(0), unixfrom=0)
+ g.flatten(msg.get_payload(0), unixfrom=False)
self._fp.write(s.getvalue())
@@ -295,7 +294,7 @@
Like the Generator base class, except that non-text parts are substituted
with a format string representing the part.
"""
- def __init__(self, outfp, mangle_from_=1, maxheaderlen=78, fmt=None):
+ def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
"""Like Generator.__init__() except that an additional optional
argument is allowed.
@@ -327,7 +326,7 @@
for part in msg.walk():
maintype = part.get_main_type('text')
if maintype == 'text':
- print >> self, part.get_payload(decode=1)
+ print >> self, part.get_payload(decode=True)
elif maintype == 'multipart':
# Just skip this
pass
@@ -354,7 +353,7 @@
return boundary
b = boundary
counter = 0
- while 1:
+ while True:
cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
if not cre.search(text):
break
Index: Header.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Header.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Header.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Header.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -1,9 +1,11 @@
# Copyright (C) 2002 Python Software Foundation
-# Author: [EMAIL PROTECTED] (Ben Gertzfield)
+# Author: [EMAIL PROTECTED] (Ben Gertzfield), [EMAIL PROTECTED] (Barry Warsaw)
"""Header encoding and decoding functionality."""
import re
+from types import StringType, UnicodeType
+
import email.quopriMIME
import email.base64MIME
from email.Charset import Charset
@@ -14,6 +16,12 @@
# Python 2.1 spells integer division differently
from email._compat21 import _floordiv
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
CRLFSPACE = '\r\n '
CRLF = '\r\n'
NL = '\n'
@@ -25,6 +33,9 @@
ENCODE = 1
DECODE = 2
+USASCII = Charset('us-ascii')
+UTF8 = Charset('utf-8')
+
# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
=\? # literal =?
@@ -117,21 +128,19 @@
class Header:
def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
continuation_ws=' '):
- """Create a MIME-compliant header that can contain many languages.
-
- Specify the initial header value in s. If None, the initial header
- value is not set.
+ """Create a MIME-compliant header that can contain many character sets.
- Specify both s's character set, and the default character set by
- setting the charset argument to a Charset object (not a character set
- name string!). If None, a us-ascii Charset is used as both s's
- initial charset and as the default character set for subsequent
- .append() calls.
-
- You can later append to the header with append(s, charset) below;
- charset does not have to be the same as the one initially specified
- here. In fact, it's optional, and if not given, defaults to the
- charset specified in the constructor.
+ Optional s is the initial header value. If None, the initial header
+ value is not set. You can later append to the header with .append()
+ method calls. s may be a byte string or a Unicode string, but see the
+ .append() documentation for semantics.
+
+ Optional charset serves two purposes: it has the same meaning as the
+ charset argument to the .append() method. It also sets the default
+ character set for all subsequent .append() calls that omit the charset
+ argument. If charset is not provided in the constructor, the us-ascii
+ charset is used both as s's initial charset and as the default for
+ subsequent .append() calls.
The maximum line length can be specified explicit via maxlinelen. For
splitting the first line to a shorter value (to account for the field
@@ -143,7 +152,7 @@
lines.
"""
if charset is None:
- charset = Charset()
+ charset = USASCII
self._charset = charset
self._continuation_ws = continuation_ws
cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
@@ -186,20 +195,44 @@
return not self == other
def append(self, s, charset=None):
- """Append string s with Charset charset to the MIME header.
+ """Append a string to the MIME header.
- If charset is given, it should be a Charset instance, or the name of a
- character set (which will be converted to a Charset instance). A
- value of None (the default) means charset is the one given in the
- class constructor.
+ Optional charset, if given, should be a Charset instance or the name
+ of a character set (which will be converted to a Charset instance). A
+ value of None (the default) means that the charset given in the
+ constructor is used.
+
+ s may be a byte string or a Unicode string. If it is a byte string
+ (i.e. isinstance(s, StringType) is true), then charset is the encoding
+ of that byte string, and a UnicodeError will be raised if the string
+ cannot be decoded with that charset. If s is a Unicode string, then
+ charset is a hint specifying the character set of the characters in
+ the string. In this case, when producing an RFC 2822 compliant header
+ using RFC 2047 rules, the Unicode string will be encoded using the
+ following charsets in order: us-ascii, the charset hint, utf-8. The
+ first character set not to provoke a UnicodeError is used.
"""
if charset is None:
charset = self._charset
elif not isinstance(charset, Charset):
charset = Charset(charset)
+ # Normalize and check the string
+ if isinstance(s, StringType):
+ # Possibly raise UnicodeError if it can't e encoded
+ unicode(s, charset.get_output_charset())
+ elif isinstance(s, UnicodeType):
+ # Convert Unicode to byte string for later concatenation
+ for charset in USASCII, charset, UTF8:
+ try:
+ s = s.encode(charset.get_output_charset())
+ break
+ except UnicodeError:
+ pass
+ else:
+ assert False, 'Could not encode to utf-8'
self._chunks.append((s, charset))
- def _split(self, s, charset, firstline=0):
+ def _split(self, s, charset, firstline=False):
# Split up a header safely for use with encode_chunks. BAW: this
# appears to be a private convenience method.
splittable = charset.to_splittable(s)
@@ -227,13 +260,13 @@
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
splitpnt = self._maxlinelen
- first = charset.from_splittable(splittable[:splitpnt], 0)
- last = charset.from_splittable(splittable[splitpnt:], 0)
+ first = charset.from_splittable(splittable[:splitpnt], False)
+ last = charset.from_splittable(splittable[splitpnt:], False)
else:
# Divide and conquer.
halfway = _floordiv(len(splittable), 2)
- first = charset.from_splittable(splittable[:halfway], 0)
- last = charset.from_splittable(splittable[halfway:], 0)
+ first = charset.from_splittable(splittable[:halfway], False)
+ last = charset.from_splittable(splittable[halfway:], False)
# Do the split
return self._split(first, charset, firstline) + \
self._split(last, charset)
@@ -248,7 +281,7 @@
line = lines.pop(0)
if firstline:
maxlinelen = self._firstlinelen
- firstline = 0
+ firstline = False
else:
#line = line.lstrip()
maxlinelen = self._maxlinelen
@@ -338,13 +371,13 @@
# There's no encoding for this chunk's charsets
_max_append(chunks, header, self._maxlinelen)
else:
- _max_append(chunks, charset.header_encode(header, 0),
+ _max_append(chunks, charset.header_encode(header),
self._maxlinelen, ' ')
joiner = NL + self._continuation_ws
return joiner.join(chunks)
def encode(self):
- """Encode a message header, possibly converting charset and encoding.
+ """Encode a message header into an RFC-compliant format.
There are many issues involved in converting a given string for use in
an email header. Only certain character sets are readable in most
@@ -363,6 +396,6 @@
"""
newchunks = []
for s, charset in self._chunks:
- newchunks += self._split(s, charset, 1)
+ newchunks += self._split(s, charset, True)
self._chunks = newchunks
return self._encode_chunks()
Index: Iterators.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Iterators.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Iterators.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Iterators.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -14,7 +14,7 @@
-def _structure(msg, level=0, fp=None):
+def _structure(msg, fp=None, level=0):
"""A handy debugging aid"""
if fp is None:
fp = sys.stdout
@@ -22,4 +22,4 @@
print >> fp, tab + msg.get_content_type()
if msg.is_multipart():
for subpart in msg.get_payload():
- _structure(subpart, level+1, fp)
+ _structure(subpart, fp, level+1)
Index: MIMEAudio.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/MIMEAudio.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MIMEAudio.py 11 Sep 2002 22:35:59 -0000 1.1
+++ MIMEAudio.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -46,7 +46,7 @@
_audiodata is a string containing the raw audio data. If this data
can be decoded by the standard Python `sndhdr' module, then the
- subtype will be automatically included in the Content-Type: header.
+ subtype will be automatically included in the Content-Type header.
Otherwise, you can specify the specific audio subtype via the
_subtype parameter. If _subtype is not given, and no subtype can be
guessed, a TypeError is raised.
@@ -55,11 +55,11 @@
transport of the image data. It takes one argument, which is this
Image instance. It should use get_payload() and set_payload() to
change the payload to the encoded form. It should also add any
- Content-Transfer-Encoding: or other headers to the message as
+ Content-Transfer-Encoding or other headers to the message as
necessary. The default encoding is Base64.
Any additional keyword arguments are passed to the base class
- constructor, which turns them into parameters on the Content-Type:
+ constructor, which turns them into parameters on the Content-Type
header.
"""
if _subtype is None:
Index: MIMEImage.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/MIMEImage.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MIMEImage.py 11 Sep 2002 22:35:59 -0000 1.1
+++ MIMEImage.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -21,7 +21,7 @@
_imagedata is a string containing the raw image data. If this data
can be decoded by the standard Python `imghdr' module, then the
- subtype will be automatically included in the Content-Type: header.
+ subtype will be automatically included in the Content-Type header.
Otherwise, you can specify the specific image subtype via the _subtype
parameter.
@@ -29,11 +29,11 @@
transport of the image data. It takes one argument, which is this
Image instance. It should use get_payload() and set_payload() to
change the payload to the encoded form. It should also add any
- Content-Transfer-Encoding: or other headers to the message as
+ Content-Transfer-Encoding or other headers to the message as
necessary. The default encoding is Base64.
Any additional keyword arguments are passed to the base class
- constructor, which turns them into parameters on the Content-Type:
+ constructor, which turns them into parameters on the Content-Type
header.
"""
if _subtype is None:
Index: MIMEMultipart.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/MIMEMultipart.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MIMEMultipart.py 11 Sep 2002 22:35:59 -0000 1.1
+++ MIMEMultipart.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -15,7 +15,7 @@
"""Creates a multipart/* type message.
By default, creates a multipart/mixed message, with proper
- Content-Type: and MIME-Version: headers.
+ Content-Type and MIME-Version headers.
_subtype is the subtype of the multipart content type, defaulting to
`mixed'.
@@ -23,11 +23,11 @@
boundary is the multipart boundary string. By default it is
calculated as needed.
- _subparts is a sequence of initial subparts for the multipart. It
+ _subparts is a sequence of initial subparts for the payload. It
must be possible to convert this sequence to a list. You can always
attach new subparts to the message by using the attach() method.
- Additional parameters for the Content-Type: header are taken from the
+ Additional parameters for the Content-Type header are taken from the
keyword arguments (or passed into the _params argument).
"""
MIMEBase.MIMEBase.__init__(self, 'multipart', _subtype, **_params)
Index: MIMENonMultipart.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/MIMENonMultipart.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MIMENonMultipart.py 11 Sep 2002 22:35:59 -0000 1.1
+++ MIMENonMultipart.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -12,9 +12,13 @@
class MIMENonMultipart(MIMEBase.MIMEBase):
"""Base class for MIME multipart/* type messages."""
+ __pychecker__ = 'unusednames=payload'
+
def attach(self, payload):
# The public API prohibits attaching multiple subparts to MIMEBase
# derived subtypes since none of them are, by definition, of content
# type multipart/*
raise Errors.MultipartConversionError(
'Cannot attach additional subparts to non-multipart/*')
+
+ del __pychecker__
Index: MIMEText.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/MIMEText.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- MIMEText.py 11 Sep 2002 22:35:59 -0000 1.1
+++ MIMEText.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -22,9 +22,9 @@
_subtype is the MIME sub content type, defaulting to "plain".
- _charset is the character set parameter added to the Content-Type:
+ _charset is the character set parameter added to the Content-Type
header. This defaults to "us-ascii". Note that as a side-effect, the
- Content-Transfer-Encoding: header will also be set.
+ Content-Transfer-Encoding header will also be set.
The use of the _encoder is deprecated. The encoding of the payload,
and the setting of the character set parameter now happens implicitly
@@ -35,14 +35,14 @@
"""
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
- if _text and _text[-1] <> '\n':
+ if _text and not _text.endswith('\n'):
_text += '\n'
self.set_payload(_text, _charset)
if _encoder is not None:
warnings.warn('_encoder argument is obsolete.',
DeprecationWarning, 2)
# Because set_payload() with a _charset will set its own
- # Content-Transfer-Encoding: header, we need to delete the
+ # Content-Transfer-Encoding header, we need to delete the
# existing one or will end up with two of them. :(
del self['content-transfer-encoding']
_encoder(self)
Index: Message.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Message.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Message.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Message.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -16,6 +16,12 @@
SEMISPACE = '; '
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
# Regular expression used to split header parameters. BAW: this may be too
# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
# most headers found in the wild. We may eventually need a full fledged
@@ -28,7 +34,7 @@
# Helper functions
-def _formatparam(param, value=None, quote=1):
+def _formatparam(param, value=None, quote=True):
"""Convenience function to format and return a key=value pair.
This will quote the value if needed or if quote is true.
@@ -53,25 +59,26 @@
def _unquotevalue(value):
if isinstance(value, TupleType):
- return (value[0], value[1], Utils.unquote(value[2]))
+ return value[0], value[1], Utils.unquote(value[2])
else:
return Utils.unquote(value)
class Message:
- """Basic message object for use inside the object tree.
+ """Basic message object.
A message object is defined as something that has a bunch of RFC 2822
- headers and a payload. If the body of the message is a multipart, then
- the payload is a list of Messages, otherwise it is a string.
+ headers and a payload. It may optionally have an envelope header
+ (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
+ multipart or a message/rfc822), then the payload is a list of Message
+ objects, otherwise it is a string.
- These objects implement part of the `mapping' interface, which assumes
+ Message objects implement part of the `mapping' interface, which assumes
there is exactly one occurrance of the header per message. Some headers
- do in fact appear multiple times (e.g. Received:) and for those headers,
+ do in fact appear multiple times (e.g. Received) and for those headers,
you must use the explicit API to set or get all the headers. Not all of
the mapping methods are implemented.
-
"""
def __init__(self):
self._headers = []
@@ -85,13 +92,13 @@
def __str__(self):
"""Return the entire formatted message as a string.
- This includes the headers, body, and `unixfrom' line.
+ This includes the headers, body, and envelope header.
"""
- return self.as_string(unixfrom=1)
+ return self.as_string(unixfrom=True)
- def as_string(self, unixfrom=0):
+ def as_string(self, unixfrom=False):
"""Return the entire formatted message as a string.
- Optional `unixfrom' when true, means include the Unix From_ envelope
+ Optional `unixfrom' when True, means include the Unix From_ envelope
header.
"""
from email.Generator import Generator
@@ -101,10 +108,10 @@
return fp.getvalue()
def is_multipart(self):
- """Return true if the message consists of multiple parts."""
- if type(self._payload) is ListType:
- return 1
- return 0
+ """Return True if the message consists of multiple parts."""
+ if isinstance(self._payload, ListType):
+ return True
+ return False
#
# Unix From_ line
@@ -123,16 +130,18 @@
If the current payload is empty, then the current payload will be made
a scalar, set to the given value.
+
+ Note: This method is deprecated. Use .attach() instead.
"""
warnings.warn('add_payload() is deprecated, use attach() instead.',
DeprecationWarning, 2)
if self._payload is None:
self._payload = payload
- elif type(self._payload) is ListType:
+ elif isinstance(self._payload, ListType):
self._payload.append(payload)
elif self.get_main_type() not in (None, 'multipart'):
raise Errors.MultipartConversionError(
- 'Message main Content-Type: must be "multipart" or missing')
+ 'Message main content type must be "multipart" or missing')
else:
self._payload = [self._payload, payload]
@@ -140,8 +149,7 @@
"""Add the given payload to the current payload.
The current payload will always be a list of objects after this method
- is called. If you want to set the payload to a scalar object
- (e.g. because you're attaching a message/rfc822 subpart), use
+ is called. If you want to set the payload to a scalar object, use
set_payload() instead.
"""
if self._payload is None:
@@ -149,22 +157,25 @@
else:
self._payload.append(payload)
- def get_payload(self, i=None, decode=0):
- """Return the current payload exactly as is.
-
- Optional i returns that index into the payload.
+ def get_payload(self, i=None, decode=False):
+ """Return a reference to the payload.
- Optional decode is a flag indicating whether the payload should be
- decoded or not, according to the Content-Transfer-Encoding: header.
- When true and the message is not a multipart, the payload will be
- decoded if this header's value is `quoted-printable' or `base64'. If
- some other encoding is used, or the header is missing, the payload is
- returned as-is (undecoded). If the message is a multipart and the
- decode flag is true, then None is returned.
+ The payload will either be a list object or a string. If you mutate
+ the list object, you modify the message's payload in place. Optional
+ i returns that index into the payload.
+
+ Optional decode is a flag (defaulting to False) indicating whether the
+ payload should be decoded or not, according to the
+ Content-Transfer-Encoding header. When True and the message is not a
+ multipart, the payload will be decoded if this header's value is
+ `quoted-printable' or `base64'. If some other encoding is used, or
+ the header is missing, the payload is returned as-is (undecoded). If
+ the message is a multipart and the decode flag is True, then None is
+ returned.
"""
if i is None:
payload = self._payload
- elif type(self._payload) is not ListType:
+ elif not isinstance(self._payload, ListType):
raise TypeError, i
else:
payload = self._payload[i]
@@ -180,11 +191,12 @@
# unchanged.
return payload
-
def set_payload(self, payload, charset=None):
"""Set the payload to the given value.
- Optionally set the charset, which must be a Charset instance."""
+ Optional charset sets the message's default character set. See
+ set_charset() for details.
+ """
self._payload = payload
if charset is not None:
self.set_charset(charset)
@@ -192,17 +204,17 @@
def set_charset(self, charset):
"""Set the charset of the payload to a given character set.
- charset can be a string or a Charset object. If it is a string, it
- will be converted to a Charset object by calling Charset's
- constructor. If charset is None, the charset parameter will be
- removed from the Content-Type: field. Anything else will generate a
- TypeError.
+ charset can be a Charset instance, a string naming a character set, or
+ None. If it is a string it will be converted to a Charset instance.
+ If charset is None, the charset parameter will be removed from the
+ Content-Type field. Anything else will generate a TypeError.
- The message will be assumed to be a text message encoded with
+ The message will be assumed to be of type text/* encoded with
charset.input_charset. It will be converted to charset.output_charset
and encoded properly, if needed, when generating the plain text
representation of the message. MIME headers (MIME-Version,
Content-Type, Content-Transfer-Encoding) will be added as needed.
+
"""
if charset is None:
self.del_param('charset')
@@ -230,7 +242,8 @@
self.add_header('Content-Transfer-Encoding', cte)
def get_charset(self):
- """Return the Charset object associated with the message's payload."""
+ """Return the Charset instance associated with the message's payload.
+ """
return self._charset
#
@@ -271,8 +284,8 @@
newheaders.append((k, v))
self._headers = newheaders
- def __contains__(self, key):
- return key.lower() in [k.lower() for k, v in self._headers]
+ def __contains__(self, name):
+ return name.lower() in [k.lower() for k, v in self._headers]
def has_key(self, name):
"""Return true if the message contains the header."""
@@ -283,8 +296,9 @@
"""Return a list of all the message's header field names.
These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
"""
return [k for k, v in self._headers]
@@ -292,8 +306,9 @@
"""Return a list of all the message's header values.
These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
"""
return [v for k, v in self._headers]
@@ -301,8 +316,9 @@
"""Get all the message's header fields and values.
These will be sorted in the order they appeared in the original
- message, and may contain duplicates. Any fields deleted and
- re-inserted are always appended to the header list.
+ message, or were added to the message, and may contain duplicates.
+ Any fields deleted and re-inserted are always appended to the header
+ list.
"""
return self._headers[:]
@@ -386,7 +402,7 @@
"""Returns the message's content type.
The returned string is coerced to lowercase and returned as a single
- string of the form `maintype/subtype'. If there was no Content-Type:
+ string of the form `maintype/subtype'. If there was no Content-Type
header in the message, failobj is returned (defaults to None).
"""
missing = []
@@ -420,17 +436,17 @@
#
def get_content_type(self):
- """Returns the message's content type.
+ """Return the message's content type.
- The returned string is coerced to lowercase and returned as a ingle
- string of the form `maintype/subtype'. If there was no Content-Type:
- header in the message, the default type as give by get_default_type()
- will be returned. Since messages always have a default type this will
- always return a value.
-
- The current state of RFC standards define a message's default type to
- be text/plain unless it appears inside a multipart/digest container,
- in which case it would be message/rfc822.
+ The returned string is coerced to lower case of the form
+ `maintype/subtype'. If there was no Content-Type header in the
+ message, the default type as given by get_default_type() will be
+ returned. Since according to RFC 2045, messages always have a default
+ type this will always return a value.
+
+ RFC 2045 defines a message's default type to be text/plain unless it
+ appears inside a multipart/digest container, in which case it would be
+ message/rfc822.
"""
missing = []
value = self.get('content-type', missing)
@@ -444,21 +460,19 @@
return ctype
def get_content_maintype(self):
- """Returns the message's main content type.
+ """Return the message's main content type.
This is the `maintype' part of the string returned by
- get_content_type(). If no slash is found in the full content type, a
- ValueError is raised.
+ get_content_type().
"""
ctype = self.get_content_type()
return ctype.split('/')[0]
def get_content_subtype(self):
- """Returns the message's sub content type.
+ """Returns the message's sub-content type.
This is the `subtype' part of the string returned by
- get_content_type(). If no slash is found in the full content type, a
- ValueError is raised.
+ get_content_type().
"""
ctype = self.get_content_type()
return ctype.split('/')[1]
@@ -468,7 +482,7 @@
Most messages have a default content type of text/plain, except for
messages that are subparts of multipart/digest containers. Such
- subparts then have a default content type of message/rfc822.
+ subparts have a default content type of message/rfc822.
"""
return self._default_type
@@ -477,7 +491,7 @@
ctype should be either "text/plain" or "message/rfc822", although this
is not enforced. The default content type is not stored in the
- Content-Type: header.
+ Content-Type header.
"""
self._default_type = ctype
@@ -502,18 +516,18 @@
params = Utils.decode_params(params)
return params
- def get_params(self, failobj=None, header='content-type', unquote=1):
- """Return the message's Content-Type: parameters, as a list.
+ def get_params(self, failobj=None, header='content-type', unquote=True):
+ """Return the message's Content-Type parameters, as a list.
The elements of the returned list are 2-tuples of key/value pairs, as
split on the `=' sign. The left hand side of the `=' is the key,
while the right hand side is the value. If there is no `=' sign in
- the parameter the value is the empty string. The value is always
- unquoted, unless unquote is set to a false value.
+ the parameter the value is the empty string. The value is as
+ described in the get_param() method.
- Optional failobj is the object to return if there is no Content-Type:
+ Optional failobj is the object to return if there is no Content-Type
header. Optional header is the header to search instead of
- Content-Type:.
+ Content-Type. If unquote is True, the value is unquoted.
"""
missing = []
params = self._get_params_preserve(missing, header)
@@ -524,15 +538,28 @@
else:
return params
- def get_param(self, param, failobj=None, header='content-type', unquote=1):
- """Return the parameter value if found in the Content-Type: header.
-
- Optional failobj is the object to return if there is no Content-Type:
- header. Optional header is the header to search instead of
- Content-Type:
-
- Parameter keys are always compared case insensitively. Values are
- always unquoted, unless unquote is set to a false value.
+ def get_param(self, param, failobj=None, header='content-type',
+ unquote=True):
+ """Return the parameter value if found in the Content-Type header.
+
+ Optional failobj is the object to return if there is no Content-Type
+ header, or the Content-Type header has no such parameter. Optional
+ header is the header to search instead of Content-Type.
+
+ Parameter keys are always compared case insensitively. The return
+ value can either be a string, or a 3-tuple if the parameter was RFC
+ 2231 encoded. When it's a 3-tuple, the elements of the value are of
+ the form (CHARSET, LANGUAGE, VALUE), where LANGUAGE may be the empty
+ string. Your application should be prepared to deal with these, and
+ can convert the parameter to a Unicode string like so:
+
+ param = msg.get_param('foo')
+ if isinstance(param, tuple):
+ param = unicode(param[2], param[0])
+
+ In any case, the parameter value (either the returned string, or the
+ VALUE item in the 3-tuple) is always unquoted, unless unquote is set
+ to False.
"""
if not self.has_key(header):
return failobj
@@ -544,23 +571,23 @@
return v
return failobj
- def set_param(self, param, value, header='Content-Type', requote=1,
+ def set_param(self, param, value, header='Content-Type', requote=True,
charset=None, language=''):
- """Set a parameter in the Content-Type: header.
+ """Set a parameter in the Content-Type header.
If the parameter already exists in the header, its value will be
replaced with the new value.
- If header is Content-Type: and has not yet been defined in this
+ If header is Content-Type and has not yet been defined for this
message, it will be set to "text/plain" and the new parameter and
- value will be appended, as per RFC 2045.
+ value will be appended as per RFC 2045.
- An alternate header can specified in the header argument, and
- all parameters will be quoted as appropriate unless requote is
- set to a false value.
+ An alternate header can specified in the header argument, and all
+ parameters will be quoted as necessary unless requote is False.
- If charset is specified the parameter will be encoded according to RFC
- 2231. In this case language is optional.
+ If charset is specified, the parameter will be encoded according to RFC
+ 2231. Optional language specifies the RFC 2231 language, defaulting
+ to the empty string. Both charset and language should be strings.
"""
if not isinstance(value, TupleType) and charset:
value = (charset, language, value)
@@ -592,12 +619,13 @@
del self[header]
self[header] = ctype
- def del_param(self, param, header='content-type', requote=1):
+ def del_param(self, param, header='content-type', requote=True):
"""Remove the given parameter completely from the Content-Type header.
- The header will be re-written in place without param or its value.
- All values will be quoted as appropriate unless requote is set to a
- false value.
+ The header will be re-written in place without the parameter or its
+ value. All values will be quoted as necessary unless requote is
+ False. Optional header specifies an alternative to the Content-Type
+ header.
"""
if not self.has_key(header):
return
@@ -613,25 +641,25 @@
del self[header]
self[header] = new_ctype
- def set_type(self, type, header='Content-Type', requote=1):
- """Set the main type and subtype for the Content-Type: header.
+ def set_type(self, type, header='Content-Type', requote=True):
+ """Set the main type and subtype for the Content-Type header.
type must be a string in the form "maintype/subtype", otherwise a
ValueError is raised.
- This method replaces the Content-Type: header, keeping all the
- parameters in place. If requote is false, this leaves the existing
+ This method replaces the Content-Type header, keeping all the
+ parameters in place. If requote is False, this leaves the existing
header's quoting as is. Otherwise, the parameters will be quoted (the
default).
- An alternate header can be specified in the header argument. When the
- Content-Type: header is set, we'll always also add a MIME-Version:
+ An alternative header can be specified in the header argument. When
+ the Content-Type header is set, we'll always also add a MIME-Version
header.
"""
# BAW: should we be strict?
if not type.count('/') == 1:
raise ValueError
- # Set the Content-Type: you get a MIME-Version:
+ # Set the Content-Type, you get a MIME-Version
if header.lower() == 'content-type':
del self['mime-version']
self['MIME-Version'] = '1.0'
@@ -648,7 +676,7 @@
def get_filename(self, failobj=None):
"""Return the filename associated with the payload if present.
- The filename is extracted from the Content-Disposition: header's
+ The filename is extracted from the Content-Disposition header's
`filename' parameter, and it is unquoted.
"""
missing = []
@@ -666,45 +694,48 @@
def get_boundary(self, failobj=None):
"""Return the boundary associated with the payload if present.
- The boundary is extracted from the Content-Type: header's `boundary'
+ The boundary is extracted from the Content-Type header's `boundary'
parameter, and it is unquoted.
"""
missing = []
boundary = self.get_param('boundary', missing)
if boundary is missing:
return failobj
+ if isinstance(boundary, TupleType):
+ # RFC 2231 encoded, so decode. It better end up as ascii
+ return unicode(boundary[2], boundary[0]).encode('us-ascii')
return _unquotevalue(boundary.strip())
def set_boundary(self, boundary):
- """Set the boundary parameter in Content-Type: to 'boundary'.
+ """Set the boundary parameter in Content-Type to 'boundary'.
- This is subtly different than deleting the Content-Type: header and
+ This is subtly different than deleting the Content-Type header and
adding a new one with a new boundary parameter via add_header(). The
main difference is that using the set_boundary() method preserves the
- order of the Content-Type: header in the original message.
+ order of the Content-Type header in the original message.
- HeaderParseError is raised if the message has no Content-Type: header.
+ HeaderParseError is raised if the message has no Content-Type header.
"""
missing = []
params = self._get_params_preserve(missing, 'content-type')
if params is missing:
- # There was no Content-Type: header, and we don't know what type
+ # There was no Content-Type header, and we don't know what type
# to set it to, so raise an exception.
- raise Errors.HeaderParseError, 'No Content-Type: header found'
+ raise Errors.HeaderParseError, 'No Content-Type header found'
newparams = []
- foundp = 0
+ foundp = False
for pk, pv in params:
if pk.lower() == 'boundary':
newparams.append(('boundary', '"%s"' % boundary))
- foundp = 1
+ foundp = True
else:
newparams.append((pk, pv))
if not foundp:
- # The original Content-Type: header had no boundary attribute.
+ # The original Content-Type header had no boundary attribute.
# Tack one one the end. BAW: should we raise an exception
# instead???
newparams.append(('boundary', '"%s"' % boundary))
- # Replace the existing Content-Type: header with the new value
+ # Replace the existing Content-Type header with the new value
newheaders = []
for h, v in self._headers:
if h.lower() == 'content-type':
@@ -726,15 +757,30 @@
# Must be using Python 2.1
from email._compat21 import walk
+ def get_content_charset(self, failobj=None):
+ """Return the charset parameter of the Content-Type header.
+
+ If there is no Content-Type header, or if that header has no charset
+ parameter, failobj is returned.
+ """
+ missing = []
+ charset = self.get_param('charset', missing)
+ if charset is missing:
+ return failobj
+ if isinstance(charset, TupleType):
+ # RFC 2231 encoded, so decode it, and it better end up as ascii.
+ return unicode(charset[2], charset[0]).encode('us-ascii')
+ return charset
+
def get_charsets(self, failobj=None):
"""Return a list containing the charset(s) used in this message.
- The returned list of items describes the Content-Type: headers'
+ The returned list of items describes the Content-Type headers'
charset parameter for this message and all the subparts in its
payload.
Each item will either be a string (the value of the charset parameter
- in the Content-Type: header of that part) or the value of the
+ in the Content-Type header of that part) or the value of the
'failobj' parameter (defaults to None), if the part does not have a
main MIME type of "text", or the charset is not defined.
@@ -742,4 +788,4 @@
one for the container message (i.e. self), so that a non-multipart
message will still return a list of length 1.
"""
- return [part.get_param('charset', failobj) for part in self.walk()]
+ return [part.get_content_charset(failobj) for part in self.walk()]
Index: Parser.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Parser.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Parser.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Parser.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -14,9 +14,16 @@
EMPTYSTRING = ''
NL = '\n'
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
+
class Parser:
- def __init__(self, _class=Message.Message, strict=0):
+ def __init__(self, _class=Message.Message, strict=False):
"""Parser of RFC 2822 and MIME email messages.
Creates an in-memory object tree representing the email message, which
@@ -41,14 +48,28 @@
self._class = _class
self._strict = strict
- def parse(self, fp, headersonly=0):
+ def parse(self, fp, headersonly=False):
+ """Create a message structure from the data in a file.
+
+ Reads all the data from the file and returns the root of the message
+ structure. Optional headersonly is a flag specifying whether to stop
+ parsing after reading the headers or not. The default is False,
+ meaning it parses the entire contents of the file.
+ """
root = self._class()
self._parseheaders(root, fp)
if not headersonly:
self._parsebody(root, fp)
return root
- def parsestr(self, text, headersonly=0):
+ def parsestr(self, text, headersonly=False):
+ """Create a message structure from a string.
+
+ Returns the root of the message structure. Optional headersonly is a
+ flag specifying whether to stop parsing after reading the headers or
+ not. The default is False, meaning it parses the entire contents of
+ the file.
+ """
return self.parse(StringIO(text), headersonly=headersonly)
def _parseheaders(self, container, fp):
@@ -57,7 +78,7 @@
lastheader = ''
lastvalue = []
lineno = 0
- while 1:
+ while True:
# Don't strip the line before we test for the end condition,
# because whitespace-only header lines are RFC compliant
# continuation lines.
@@ -216,7 +237,7 @@
# by a blank line. We'll represent each header block as a
# separate Message object
blocks = []
- while 1:
+ while True:
blockmsg = self._class()
self._parseheaders(blockmsg, fp)
if not len(blockmsg):
Index: Utils.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Utils.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Utils.py 11 Sep 2002 22:35:59 -0000 1.1
+++ Utils.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -22,6 +22,12 @@
from rfc822 import parsedate_tz as _parsedate_tz
try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
+try:
from quopri import decodestring as _qdecode
except ImportError:
# Python 2.1 doesn't have quopri.decodestring()
@@ -30,12 +36,11 @@
if not s:
return s
- hasnewline = (s[-1] == '\n')
infp = StringIO(s)
outfp = StringIO()
_quopri.decode(infp, outfp)
value = outfp.getvalue()
- if not hasnewline and value[-1] =='\n':
+ if not s.endswith('\n') and value.endswith('\n'):
return value[:-1]
return value
@@ -67,9 +72,8 @@
# newline". Blech!
if not s:
return s
- hasnewline = (s[-1] == '\n')
value = base64.decodestring(s)
- if not hasnewline and value[-1] == '\n':
+ if not s.endswith('\n') and value.endswith('\n'):
return value[:-1]
return value
@@ -88,7 +92,7 @@
def formataddr(pair):
"""The inverse of parseaddr(), this takes a 2-tuple of the form
(realname, email_address) and returns the string value suitable
- for an RFC 2822 From:, To: or Cc:.
+ for an RFC 2822 From, To or Cc header.
If the first element of pair is false, then the second element is
returned unmodified.
@@ -170,7 +174,7 @@
-def formatdate(timeval=None, localtime=0):
+def formatdate(timeval=None, localtime=False):
"""Returns a date string as specified by RFC 2822, e.g.:
Fri, 09 Nov 2001 01:08:47 -0000
@@ -178,7 +182,7 @@
Optional timeval if given is a floating point time value as accepted by
gmtime() and localtime(), otherwise the current time is used.
- Optional localtime is a flag that when true, interprets timeval, and
+ Optional localtime is a flag that when True, interprets timeval, and
returns a date relative to the local timezone instead of UTC, properly
taking daylight savings time into account.
"""
@@ -217,12 +221,12 @@
def make_msgid(idstring=None):
- """Returns a string suitable for RFC 2822 compliant Message-ID:, e.g:
+ """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
<[EMAIL PROTECTED]>
Optional idstring if given is a string used to strengthen the
- uniqueness of the Message-ID, otherwise an empty string is used.
+ uniqueness of the message id.
"""
timeval = time.time()
utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
@@ -282,19 +286,28 @@
def encode_rfc2231(s, charset=None, language=None):
- """Encode string according to RFC 2231"""
+ """Encode string according to RFC 2231.
+
+ If neither charset nor language is given, then s is returned as-is. If
+ charset is given but not language, the string is encoded using the empty
+ string for language.
+ """
import urllib
s = urllib.quote(s, safe='')
if charset is None and language is None:
return s
- else:
- return "%s'%s'%s" % (charset, language, s)
+ if language is None:
+ language = ''
+ return "%s'%s'%s" % (charset, language, s)
rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
def decode_params(params):
- """Decode parameters list according to RFC 2231"""
+ """Decode parameters list according to RFC 2231.
+
+ params is a sequence of 2-tuples containing (content type, string value).
+ """
new_params = []
# maps parameter's name to a list of continuations
rfc2231_params = {}
Index: __init__.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/__init__.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- __init__.py 11 Sep 2002 22:35:59 -0000 1.1
+++ __init__.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -4,36 +4,69 @@
"""A package for parsing, handling, and generating email messages.
"""
-__version__ = '2.3'
+__version__ = '2.4'
-__all__ = ['Charset',
- 'Encoders',
- 'Errors',
- 'Generator',
- 'Header',
- 'Iterators',
- 'MIMEAudio',
- 'MIMEBase',
- 'MIMEImage',
- 'MIMEMessage',
- 'MIMEText',
- 'Message',
- 'Parser',
- 'Utils',
- 'base64MIME',
- 'quopriMIME',
- 'message_from_string',
- 'message_from_file',
- ]
+__all__ = [
+ 'base64MIME',
+ 'Charset',
+ 'Encoders',
+ 'Errors',
+ 'Generator',
+ 'Header',
+ 'Iterators',
+ 'Message',
+ 'MIMEAudio',
+ 'MIMEBase',
+ 'MIMEImage',
+ 'MIMEMessage',
+ 'MIMEMultipart',
+ 'MIMENonMultipart',
+ 'MIMEText',
+ 'Parser',
+ 'quopriMIME',
+ 'Utils',
+ 'message_from_string',
+ 'message_from_file',
+ ]
+
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
-# Some convenience routines
-from email.Parser import Parser as _Parser
-from email.Message import Message as _Message
+# Some convenience routines. Don't import Parser and Message as side-effects
+# of importing email since those cascadingly import most of the rest of the
+# email package.
+def message_from_string(s, _class=None, strict=False):
+ """Parse a string into a Message object model.
+
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.Parser import Parser
+ if _class is None:
+ from email.Message import Message
+ _class = Message
+ return Parser(_class, strict=strict).parsestr(s)
+
+def message_from_file(fp, _class=None, strict=False):
+ """Read a file and parse its contents into a Message object model.
-def message_from_string(s, _class=_Message, strict=0):
- return _Parser(_class, strict=strict).parsestr(s)
+ Optional _class and strict are passed to the Parser constructor.
+ """
+ from email.Parser import Parser
+ if _class is None:
+ from email.Message import Message
+ _class = Message
+ return Parser(_class, strict=strict).parse(fp)
-def message_from_file(fp, _class=_Message, strict=0):
- return _Parser(_class, strict=strict).parse(fp)
+
+
+# Patch encodings.aliases to recognize 'ansi_x3.4_1968' which isn't a standard
+# alias in Python 2.1.3, but is used by the email package test suite.
+from encodings.aliases import aliases # The aliases dictionary
+if not aliases.has_key('ansi_x3.4_1968'):
+ aliases['ansi_x3.4_1968'] = 'ascii'
+del aliases # Not needed any more
Index: base64MIME.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/base64MIME.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- base64MIME.py 11 Sep 2002 22:35:59 -0000 1.1
+++ base64MIME.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -41,6 +41,12 @@
# See also Charset.py
MISC_LEN = 7
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
# Helpers
@@ -56,8 +62,8 @@
-def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
- eol=NL):
+def header_encode(header, charset='iso-8859-1', keep_eols=False,
+ maxlinelen=76, eol=NL):
"""Encode a single header line with Base64 encoding in a given charset.
Defined in RFC 2045, this Base64 encoding is identical to normal Base64
@@ -69,7 +75,7 @@
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
- parameter is set to true (the default is false).
+ parameter is True (the default is False).
Each line of the header will be terminated in the value of eol, which
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
@@ -106,7 +112,7 @@
lines = []
for line in base64ed:
# Ignore the last character of each line if it is a newline
- if line[-1] == NL:
+ if line.endswith(NL):
line = line[:-1]
# Add the chrome
lines.append('=?%s?b?%s?=' % (charset, line))
@@ -117,13 +123,13 @@
-def encode(s, binary=1, maxlinelen=76, eol=NL):
+def encode(s, binary=True, maxlinelen=76, eol=NL):
"""Encode a string with base64.
Each line will be wrapped at, at most, maxlinelen characters (defaults to
76 characters).
- If binary is false, end-of-line characters will be converted to the
+ If binary is False, end-of-line characters will be converted to the
canonical email end-of-line sequence \\r\\n. Otherwise they will be left
verbatim (this is the default).
@@ -143,7 +149,7 @@
# BAW: should encode() inherit b2a_base64()'s dubious behavior in
# adding a newline to the encoded string?
enc = b2a_base64(s[i:i + max_unencoded])
- if enc[-1] == NL and eol <> NL:
+ if enc.endswith(NL) and eol <> NL:
enc = enc[:-1] + eol
encvec.append(enc)
return EMPTYSTRING.join(encvec)
Index: quopriMIME.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/quopriMIME.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- quopriMIME.py 11 Sep 2002 22:35:59 -0000 1.1
+++ quopriMIME.py 1 Oct 2002 20:06:22 -0000 1.2
@@ -38,17 +38,23 @@
hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
bqre = re.compile(r'[^ !-<>-~\t]')
+try:
+ True, False
+except NameError:
+ True = 1
+ False = 0
+
# Helpers
def header_quopri_check(c):
- """Return true if the character should be escaped with header quopri."""
- return hqre.match(c) and 1
+ """Return True if the character should be escaped with header quopri."""
+ return hqre.match(c) and True
def body_quopri_check(c):
- """Return true if the character should be escaped with body quopri."""
- return bqre.match(c) and 1
+ """Return True if the character should be escaped with body quopri."""
+ return bqre.match(c) and True
def header_quopri_len(s):
@@ -92,8 +98,8 @@
-def header_encode(header, charset="iso-8859-1", keep_eols=0, maxlinelen=76,
- eol=NL):
+def header_encode(header, charset="iso-8859-1", keep_eols=False,
+ maxlinelen=76, eol=NL):
"""Encode a single header line with quoted-printable (like) encoding.
Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
@@ -114,7 +120,7 @@
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
to the canonical email line separator \\r\\n unless the keep_eols
- parameter is set to true (the default is false).
+ parameter is True (the default is False).
Each line of the header will be terminated in the value of eol, which
defaults to "\\n". Set this to "\\r\\n" if you are using the result of
@@ -151,10 +157,10 @@
-def encode(body, binary=0, maxlinelen=76, eol=NL):
+def encode(body, binary=False, maxlinelen=76, eol=NL):
"""Encode with quoted-printable, wrapping at maxlinelen characters.
- If binary is false (the default), end-of-line characters will be converted
+ If binary is False (the default), end-of-line characters will be converted
to the canonical email end-of-line sequence \\r\\n. Otherwise they will
be left verbatim.
@@ -213,7 +219,7 @@
# Now at end of line..
if prev and prev in ' \t':
# Special case for whitespace at end of file
- if lineno+1 == len(lines):
+ if lineno + 1 == len(lines):
prev = quote(prev)
if len(encoded_line) + len(prev) > maxlinelen:
encoded_body += encoded_line + '=' + eol + prev
@@ -283,7 +289,7 @@
if i == n:
decoded += eol
# Special case if original string did not end with eol
- if encoded[-1] <> eol and decoded[-1] == eol:
+ if not encoded.endswith(eol) and decoded.endswith(eol):
decoded = decoded[:-1]
return decoded
_______________________________________
tmda-cvs mailing list
http://tmda.net/lists/listinfo/tmda-cvs