Update of /cvsroot/tmda/tmda/TMDA/pythonlib/email
In directory usw-pr-cvs1:/tmp/cvs-serv13083

Modified Files:
        Charset.py Message.py Parser.py __init__.py 
Log Message:
Sync TMDA/pythonlib/email with email v2.4.2, which contains some
very important bugfixes.


Index: Charset.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Charset.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Charset.py  1 Oct 2002 20:06:22 -0000       1.2
+++ Charset.py  10 Oct 2002 17:27:35 -0000      1.3
@@ -177,13 +177,15 @@
                   this attribute will have the same value as the input_codec.
     """
     def __init__(self, input_charset=DEFAULT_CHARSET):
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        input_charset = input_charset.lower()
         # Set the input charset after filtering through the aliases
         self.input_charset = ALIASES.get(input_charset, input_charset)
         # We can try to guess which encoding and conversion to use by the
         # charset_map dictionary.  Try that first, but let the user override
         # it.
         henc, benc, conv = CHARSETS.get(self.input_charset,
-                                        (SHORTEST, SHORTEST, None))
+                                        (SHORTEST, BASE64, None))
         # Set the attributes, allowing the arguments to override the default.
         self.header_encoding = henc
         self.body_encoding = benc

Index: Message.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Message.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Message.py  1 Oct 2002 20:06:22 -0000       1.2
+++ Message.py  10 Oct 2002 17:27:35 -0000      1.3
@@ -760,8 +760,9 @@
     def get_content_charset(self, failobj=None):
         """Return the charset parameter of the Content-Type header.
 
-        If there is no Content-Type header, or if that header has no charset
-        parameter, failobj is returned.
+        The returned string is always coerced to lower case.  If there is no
+        Content-Type header, or if that header has no charset parameter,
+        failobj is returned.
         """
         missing = []
         charset = self.get_param('charset', missing)
@@ -769,8 +770,9 @@
             return failobj
         if isinstance(charset, TupleType):
             # RFC 2231 encoded, so decode it, and it better end up as ascii.
-            return unicode(charset[2], charset[0]).encode('us-ascii')
-        return charset
+            charset = unicode(charset[2], charset[0]).encode('us-ascii')
+        # RFC 2046, $4.1.2 says charsets are not case sensitive
+        return charset.lower()
 
     def get_charsets(self, failobj=None):
         """Return a list containing the charset(s) used in this message.

Index: Parser.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Parser.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- Parser.py   1 Oct 2002 20:06:22 -0000       1.2
+++ Parser.py   10 Oct 2002 17:27:35 -0000      1.3
@@ -20,6 +20,8 @@
     True = 1
     False = 0
 
+nlcre = re.compile('\r\n|\r|\n')
+
 
 
 class Parser:
@@ -137,7 +139,7 @@
         # Parse the body, but first split the payload on the content-type
         # boundary if present.
         boundary = container.get_boundary()
-        isdigest = (container.get_type() == 'multipart/digest')
+        isdigest = (container.get_content_type() == 'multipart/digest')
         # If there's a boundary, split the payload text into its constituent
         # parts and parse each separately.  Otherwise, just parse the rest of
         # the body as a single message.  Note: any exceptions raised in the
@@ -167,8 +169,7 @@
                 preamble = payload[0:start]
             # Find out what kind of line endings we're using
             start += len(mo.group('sep')) + len(mo.group('ws'))
-            cre = re.compile('\r\n|\r|\n')
-            mo = cre.search(payload, start)
+            mo = nlcre.search(payload, start)
             if mo:
                 start += len(mo.group(0))
             # We create a compiled regexp first because we need to be able to
@@ -209,12 +210,12 @@
                 payload[start:terminator])
             for part in parts:
                 if isdigest:
-                    if part[0] == linesep:
+                    if part.startswith(linesep):
                         # There's no header block so create an empty message
                         # object as the container, and lop off the newline so
                         # we can parse the sub-subobject
                         msgobj = self._class()
-                        part = part[1:]
+                        part = part[len(linesep):]
                     else:
                         parthdrs, part = part.split(linesep+linesep, 1)
                         # msgobj in this case is the "message/rfc822" container

Index: __init__.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/__init__.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- __init__.py 1 Oct 2002 20:06:22 -0000       1.2
+++ __init__.py 10 Oct 2002 17:27:36 -0000      1.3
@@ -4,7 +4,7 @@
 """A package for parsing, handling, and generating email messages.
 """
 
-__version__ = '2.4'
+__version__ = '2.4.2'
 
 __all__ = [
     'base64MIME',

_______________________________________
tmda-cvs mailing list
http://tmda.net/lists/listinfo/tmda-cvs

Reply via email to