Update of /cvsroot/tmda/tmda/TMDA/pythonlib/email
In directory sc8-pr-cvs1:/tmp/cvs-serv22097/TMDA/pythonlib/email

Modified Files:
        Header.py __init__.py _compat21.py _parseaddr.py 
Log Message:
Sync Python email package with version 2.5a1. This allows us to fix
the non-ascii decoding problem reported by Cory Wright in
<[EMAIL PROTECTED]> on tmda-users.


Index: Header.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/Header.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- Header.py   14 Oct 2002 22:58:12 -0000      1.3
+++ Header.py   7 Jan 2003 00:03:02 -0000       1.4
@@ -127,7 +127,7 @@
 
 class Header:
     def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
-                 continuation_ws=' '):
+                 continuation_ws=' ', errors='strict'):
         """Create a MIME-compliant header that can contain many character sets.
 
         Optional s is the initial header value.  If None, the initial header
@@ -150,6 +150,8 @@
         continuation_ws must be RFC 2822 compliant folding whitespace (usually
         either a space or a hard tab) which will be prepended to continuation
         lines.
+
+        errors is passed through to the .append() call.
         """
         if charset is None:
             charset = USASCII
@@ -161,7 +163,7 @@
         # BAW: I believe `chunks' and `maxlinelen' should be non-public.
         self._chunks = []
         if s is not None:
-            self.append(s, charset)
+            self.append(s, charset, errors)
         if maxlinelen is None:
             maxlinelen = MAXLINELEN
         if header_name is None:
@@ -196,7 +198,7 @@
     def __ne__(self, other):
         return not self == other
 
-    def append(self, s, charset=None):
+    def append(self, s, charset=None, errors='strict'):
         """Append a string to the MIME header.
 
         Optional charset, if given, should be a Charset instance or the name
@@ -213,6 +215,9 @@
         using RFC 2047 rules, the Unicode string will be encoded using the
         following charsets in order: us-ascii, the charset hint, utf-8.  The
         first character set not to provoke a UnicodeError is used.
+
+        Optional `errors' is passed as the third argument to any unicode() or
+        ustr.encode() call.
         """
         if charset is None:
             charset = self._charset
@@ -227,12 +232,12 @@
                 # Possibly raise UnicodeError if the byte string can't be
                 # converted to a unicode with the input codec of the charset.
                 incodec = charset.input_codec or 'us-ascii'
-                ustr = unicode(s, incodec)
+                ustr = unicode(s, incodec, errors)
                 # Now make sure that the unicode could be converted back to a
                 # byte string with the output codec, which may be different
                 # than the iput coded.  Still, use the original byte string.
                 outcodec = charset.output_codec or 'us-ascii'
-                ustr.encode(outcodec)
+                ustr.encode(outcodec, errors)
             elif isinstance(s, UnicodeType):
                 # Now we have to be sure the unicode string can be converted
                 # to a byte string with a reasonable output codec.  We want to
@@ -240,7 +245,7 @@
                 for charset in USASCII, charset, UTF8:
                     try:
                         outcodec = charset.output_codec or 'us-ascii'
-                        s = s.encode(outcodec)
+                        s = s.encode(outcodec, errors)
                         break
                     except UnicodeError:
                         pass

Index: __init__.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/__init__.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- __init__.py 14 Oct 2002 22:58:12 -0000      1.4
+++ __init__.py 7 Jan 2003 00:03:03 -0000       1.5
@@ -4,7 +4,7 @@
 """A package for parsing, handling, and generating email messages.
 """
 
-__version__ = '2.4.3'
+__version__ = '2.5a1'
 
 __all__ = [
     'base64MIME',

Index: _compat21.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/_compat21.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- _compat21.py        11 Sep 2002 22:35:59 -0000      1.1
+++ _compat21.py        7 Jan 2003 00:03:03 -0000       1.2
@@ -31,7 +31,7 @@
 
 
 def _isstring(obj):
-    return isinstance(obj, StringType) or isinstance(obj, UnicodeType)    
+    return isinstance(obj, StringType) or isinstance(obj, UnicodeType)
 
 
 

Index: _parseaddr.py
===================================================================
RCS file: /cvsroot/tmda/tmda/TMDA/pythonlib/email/_parseaddr.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- _parseaddr.py       5 Nov 2002 23:18:28 -0000       1.2
+++ _parseaddr.py       7 Jan 2003 00:03:03 -0000       1.3
@@ -6,6 +6,17 @@
 """
 
 import time
+from types import TupleType
+
+try:
+    True, False
+except NameError:
+    True = 1
+    False = 0
+
+SPACE = ' '
+EMPTYSTRING = ''
+COMMASPACE = ', '
 
 # Parse a date field
 _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
@@ -36,9 +47,16 @@
     Accounts for military timezones.
     """
     data = data.split()
-    if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
+    # The FWS after the comma after the day-of-week is optional, so search and
+    # adjust for this.
+    if data[0].endswith(',') or data[0].lower() in _daynames:
         # There's a dayname here. Skip it
         del data[0]
+    else:
+        i = data[0].rfind(',')
+        if i < 0:
+            return None
+        data[0] = data[0][i+1:]
     if len(data) == 3: # RFC 850 date, deprecated
         stuff = data[0].split('-')
         if len(stuff) == 3:
@@ -55,12 +73,13 @@
     data = data[:5]
     [dd, mm, yy, tm, tz] = data
     mm = mm.lower()
-    if not mm in _monthnames:
+    if mm not in _monthnames:
         dd, mm = mm, dd.lower()
-        if not mm in _monthnames:
+        if mm not in _monthnames:
             return None
-    mm = _monthnames.index(mm)+1
-    if mm > 12: mm = mm - 12
+    mm = _monthnames.index(mm) + 1
+    if mm > 12:
+        mm -= 12
     if dd[-1] == ',':
         dd = dd[:-1]
     i = yy.find(':')
@@ -112,9 +131,10 @@
 def parsedate(data):
     """Convert a time string to a time tuple."""
     t = parsedate_tz(data)
-    if type(t) == type( () ):
+    if isinstance(t, TupleType):
         return t[:9]
-    else: return t
+    else:
+        return t
 
 
 def mktime_tz(data):
@@ -135,8 +155,8 @@
 class AddrlistClass:
     """Address parser class by Ben Escoto.
 
-    To understand what this class does, it helps to have a copy of
-    RFC-822 in front of you.
+    To understand what this class does, it helps to have a copy of RFC 2822 in
+    front of you.
 
     Note: this class interface is deprecated and may be removed in the future.
     Use rfc822.AddressList instead.
@@ -153,6 +173,10 @@
         self.LWS = ' \t'
         self.CR = '\r\n'
         self.atomends = self.specials + self.LWS + self.CR
+        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
+        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
+        # syntax, so allow dots in phrases.
+        self.phraseends = self.atomends.replace('.', '')
         self.field = field
         self.commentlist = []
 
@@ -160,20 +184,25 @@
         """Parse up to the start of the next address."""
         while self.pos < len(self.field):
             if self.field[self.pos] in self.LWS + '\n\r':
-                self.pos = self.pos + 1
+                self.pos += 1
             elif self.field[self.pos] == '(':
                 self.commentlist.append(self.getcomment())
-            else: break
+            else:
+                break
 
     def getaddrlist(self):
         """Parse all addresses.
 
         Returns a list containing all of the addresses.
         """
-        ad = self.getaddress()
-        if ad:
-            return ad + self.getaddrlist()
-        else: return []
+        result = []
+        while True:
+            ad = self.getaddress()
+            if ad:
+                result += ad
+            else:
+                break
+        return result
 
     def getaddress(self):
         """Parse the next address."""
@@ -190,7 +219,7 @@
         if self.pos >= len(self.field):
             # Bad email address technically, no domain.
             if plist:
-                returnlist = [(' '.join(self.commentlist), plist[0])]
+                returnlist = [(SPACE.join(self.commentlist), plist[0])]
 
         elif self.field[self.pos] in '.@':
             # email address is just an addrspec
@@ -198,18 +227,18 @@
             self.pos = oldpos
             self.commentlist = oldcl
             addrspec = self.getaddrspec()
-            returnlist = [(' '.join(self.commentlist), addrspec)]
+            returnlist = [(SPACE.join(self.commentlist), addrspec)]
 
         elif self.field[self.pos] == ':':
             # address is a group
             returnlist = []
 
             fieldlen = len(self.field)
-            self.pos = self.pos + 1
+            self.pos += 1
             while self.pos < len(self.field):
                 self.gotonext()
                 if self.pos < fieldlen and self.field[self.pos] == ';':
-                    self.pos = self.pos + 1
+                    self.pos += 1
                     break
                 returnlist = returnlist + self.getaddress()
 
@@ -218,19 +247,20 @@
             routeaddr = self.getrouteaddr()
 
             if self.commentlist:
-                returnlist = [(' '.join(plist) + ' (' + \
-                         ' '.join(self.commentlist) + ')', routeaddr)]
-            else: returnlist = [(' '.join(plist), routeaddr)]
+                returnlist = [(SPACE.join(plist) + ' (' +
+                               ' '.join(self.commentlist) + ')', routeaddr)]
+            else:
+                returnlist = [(SPACE.join(plist), routeaddr)]
 
         else:
             if plist:
-                returnlist = [(' '.join(self.commentlist), plist[0])]
+                returnlist = [(SPACE.join(self.commentlist), plist[0])]
             elif self.field[self.pos] in self.specials:
-                self.pos = self.pos + 1
+                self.pos += 1
 
         self.gotonext()
         if self.pos < len(self.field) and self.field[self.pos] == ',':
-            self.pos = self.pos + 1
+            self.pos += 1
         return returnlist
 
     def getrouteaddr(self):
@@ -241,74 +271,75 @@
         if self.field[self.pos] != '<':
             return
 
-        expectroute = 0
-        self.pos = self.pos + 1
+        expectroute = False
+        self.pos += 1
         self.gotonext()
-        adlist = ""
+        adlist = ''
         while self.pos < len(self.field):
             if expectroute:
                 self.getdomain()
-                expectroute = 0
+                expectroute = False
             elif self.field[self.pos] == '>':
-                self.pos = self.pos + 1
+                self.pos += 1
                 break
             elif self.field[self.pos] == '@':
-                self.pos = self.pos + 1
-                expectroute = 1
+                self.pos += 1
+                expectroute = True
             elif self.field[self.pos] == ':':
-                self.pos = self.pos + 1
-                expectaddrspec = 1
+                self.pos += 1
             else:
                 adlist = self.getaddrspec()
-                self.pos = self.pos + 1
+                self.pos += 1
                 break
             self.gotonext()
 
         return adlist
 
     def getaddrspec(self):
-        """Parse an RFC-822 addr-spec."""
+        """Parse an RFC 2822 addr-spec."""
         aslist = []
 
         self.gotonext()
         while self.pos < len(self.field):
             if self.field[self.pos] == '.':
                 aslist.append('.')
-                self.pos = self.pos + 1
+                self.pos += 1
             elif self.field[self.pos] == '"':
                 aslist.append('"%s"' % self.getquote())
             elif self.field[self.pos] in self.atomends:
                 break
-            else: aslist.append(self.getatom())
+            else:
+                aslist.append(self.getatom())
             self.gotonext()
 
         if self.pos >= len(self.field) or self.field[self.pos] != '@':
-            return ''.join(aslist)
+            return EMPTYSTRING.join(aslist)
 
         aslist.append('@')
-        self.pos = self.pos + 1
+        self.pos += 1
         self.gotonext()
-        return ''.join(aslist) + self.getdomain()
+        return EMPTYSTRING.join(aslist) + self.getdomain()
 
     def getdomain(self):
         """Get the complete domain name from an address."""
         sdlist = []
         while self.pos < len(self.field):
             if self.field[self.pos] in self.LWS:
-                self.pos = self.pos + 1
+                self.pos += 1
             elif self.field[self.pos] == '(':
                 self.commentlist.append(self.getcomment())
             elif self.field[self.pos] == '[':
                 sdlist.append(self.getdomainliteral())
             elif self.field[self.pos] == '.':
-                self.pos = self.pos + 1
+                self.pos += 1
                 sdlist.append('.')
             elif self.field[self.pos] in self.atomends:
                 break
-            else: sdlist.append(self.getatom())
-        return ''.join(sdlist)
+            else:
+                sdlist.append(self.getatom())
+        return EMPTYSTRING.join(sdlist)
 
-    def getdelimited(self, beginchar, endchars, allowcomments = 1):
+    def getdelimited(self, beginchar, endchars, allowcomments=True):
         """Parse a header fragment delimited by special characters.
 
         `beginchar' is the start character for the fragment.
@@ -318,80 +349,89 @@
         `endchars' is a sequence of allowable end-delimiting characters.
         Parsing stops when one of these is encountered.
 
-        If `allowcomments' is non-zero, embedded RFC-822 comments
-        are allowed within the parsed fragment.
+        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
+        within the parsed fragment.
         """
         if self.field[self.pos] != beginchar:
             return ''
 
         slist = ['']
-        quote = 0
-        self.pos = self.pos + 1
+        quote = False
+        self.pos += 1
         while self.pos < len(self.field):
-            if quote == 1:
+            if quote:
                 slist.append(self.field[self.pos])
-                quote = 0
+                quote = False
             elif self.field[self.pos] in endchars:
-                self.pos = self.pos + 1
+                self.pos += 1
                 break
             elif allowcomments and self.field[self.pos] == '(':
                 slist.append(self.getcomment())
             elif self.field[self.pos] == '\\':
-                quote = 1
+                quote = True
             else:
                 slist.append(self.field[self.pos])
-            self.pos = self.pos + 1
+            self.pos += 1
 
-        return ''.join(slist)
+        return EMPTYSTRING.join(slist)
 
     def getquote(self):
         """Get a quote-delimited fragment from self's field."""
-        return self.getdelimited('"', '"\r', 0)
+        return self.getdelimited('"', '"\r', False)
 
     def getcomment(self):
         """Get a parenthesis-delimited fragment from self's field."""
-        return self.getdelimited('(', ')\r', 1)
+        return self.getdelimited('(', ')\r', True)
 
     def getdomainliteral(self):
-        """Parse an RFC-822 domain-literal."""
-        return '[%s]' % self.getdelimited('[', ']\r', 0)
+        """Parse an RFC 2822 domain-literal."""
+        return '[%s]' % self.getdelimited('[', ']\r', False)
+
+    def getatom(self, atomends=None):
+        """Parse an RFC 2822 atom.
 
-    def getatom(self):
-        """Parse an RFC-822 atom."""
+        Optional atomends specifies a different set of end token delimiters
+        (the default is to use self.atomends).  This is used e.g. in
+        getphraselist() since phrase endings must not include the `.' (which
+        is legal in phrases)."""
         atomlist = ['']
+        if atomends is None:
+            atomends = self.atomends
 
         while self.pos < len(self.field):
-            if self.field[self.pos] in self.atomends:
+            if self.field[self.pos] in atomends:
                 break
-            else: atomlist.append(self.field[self.pos])
-            self.pos = self.pos + 1
+            else:
+                atomlist.append(self.field[self.pos])
+            self.pos += 1
 
-        return ''.join(atomlist)
+        return EMPTYSTRING.join(atomlist)
 
     def getphraselist(self):
-        """Parse a sequence of RFC-822 phrases.
+        """Parse a sequence of RFC 2822 phrases.
 
-        A phrase is a sequence of words, which are in turn either
-        RFC-822 atoms or quoted-strings.  Phrases are canonicalized
-        by squeezing all runs of continuous whitespace into one space.
+        A phrase is a sequence of words, which are in turn either RFC 2822
+        atoms or quoted-strings.  Phrases are canonicalized by squeezing all
+        runs of continuous whitespace into one space.
         """
         plist = []
 
         while self.pos < len(self.field):
             if self.field[self.pos] in self.LWS:
-                self.pos = self.pos + 1
+                self.pos += 1
             elif self.field[self.pos] == '"':
                 plist.append(self.getquote())
             elif self.field[self.pos] == '(':
                 self.commentlist.append(self.getcomment())
-            elif self.field[self.pos] in self.atomends:
+            elif self.field[self.pos] in self.phraseends:
                 break
-            else: plist.append(self.getatom())
+            else:
+                plist.append(self.getatom(self.phraseends))
 
         return plist
 
 class AddressList(AddrlistClass):
-    """An AddressList encapsulates a list of parsed RFC822 addresses."""
+    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
     def __init__(self, field):
         AddrlistClass.__init__(self, field)
         if field:
@@ -403,7 +443,7 @@
         return len(self.addresslist)
 
     def __str__(self):
-        return ", ".join(map(dump_address_pair, self.addresslist))
+        return COMMASPACE.join(map(dump_address_pair, self.addresslist))
 
     def __add__(self, other):
         # Set union

_______________________________________
tmda-cvs mailing list
http://tmda.net/lists/listinfo/tmda-cvs

Reply via email to