On 06/05/2017 11:14 AM, sebb wrote:
> On 5 June 2017 at 09:33, <[email protected]> wrote:
>> Repository: incubator-ponymail
>> Updated Branches:
>> refs/heads/master 2802e2905 -> fda07b8d7
>>
>>
>> Add the missing bits from last commit
>>
>> - Adds back date munging for 'medium'
>> - Removes archived-at and no date as an option for 'redundant'
>> (only Date: header is guaranteed to be consistent here)
>> - Adds the subject variable that was missing.
>> - Some additional comments
>> - Adds missing import
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit:
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/fda07b8d
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/fda07b8d
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/fda07b8d
>>
>> Branch: refs/heads/master
>> Commit: fda07b8d73decb0943c817d6fee69416c2016714
>> Parents: 2802e29
>> Author: Daniel Gruno <[email protected]>
>> Authored: Mon Jun 5 10:32:36 2017 +0200
>> Committer: Daniel Gruno <[email protected]>
>> Committed: Mon Jun 5 10:32:36 2017 +0200
>>
>> ----------------------------------------------------------------------
>> tools/generators.py | 33 ++++++++++++++++++++++++++++++---
>> 1 file changed, 30 insertions(+), 3 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/fda07b8d/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> index 3f9c213..73a8210 100644
>> --- a/tools/generators.py
>> +++ b/tools/generators.py
>> @@ -21,6 +21,7 @@ This file contains the various ID generators for Pony
>> Mail's archivers.
>>
>> import hashlib
>> import email.utils
>> +import time
>>
>> # Full generator: uses the entire email (including server-dependent data)
>> # This is the recommended generator for single-node setups.
>> @@ -28,31 +29,57 @@ def full(msg, body, lid, attachments):
>> mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> return mid
>>
>> -# Medium: Standard generator
>> +# Medium: Standard 0.9 generator - Not recommended for future installations.
>> +# See 'full' or 'redundant' generators instead.
>> def medium(msg, body, lid, attachments):
>> # Use text body
>> xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> # Use List ID
>> xbody += bytes(lid, encoding='ascii')
>> # Use Date header
>> + mdate = None
>> + try:
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + except:
>> + pass
>> + # In keeping with preserving the past, we have kept this next
>> section(s).
>> + # For all intents and purposes, this is not a proper way of maintaining
>> + # a consistent ID in case of missing dates. It is recommended to use
>> + # another generator such as full or redundant here.
>> + if not mdate and msg_metadata.get('archived-at'):
>> + mdate = email.utils.parsedate_tz(msg_metadata.get('archived-at'))
>> + elif not mdate:
>
> The original code has a print() command here to warn about the missing date
That is still there, in archiver.py. I did not put it in the generators,
as that would just duplicate it.
>
>> + mdate = time.gmtime() # Get a standard 9-tuple
>> + mdate = mdate + (0, ) # Fake a TZ (10th element)
>> + mdatestring = time.strftime("%Y/%m/%d %H:%M:%S",
>> time.gmtime(email.utils.mktime_tz(mdate)))
>> xbody += bytes(mdatestring, encoding='ascii')
>> mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> return mid
>>
>> # Redundant: Use data that is guaranteed to be the same across redundant
>> setups
>> -# This is the recommended generator for redundant cluster setups
>> +# This is the recommended generator for redundant cluster setups.
>> +# Unlike 'medium', this only makes use of the Date: header and not the
>> archived-at,
>> +# as the archived-at may change from node to node (and will change if not
>> in the raw mbox file)
>> def redundant(msg, body, lid, attachments):
>> # Use text body
>> xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> # Use List ID
>> xbody += bytes(lid, encoding='ascii')
>> - # Use Date header
>> + # Use Date header. Don't use archived-at, as the archiver sets this if
>> not present.
>> + mdate = None
>> + mdatestring = "(null)" # Default to null, ONLY changed if replicable
>> across imports
>> + try:
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + mdatestring = time.strftime("%Y/%m/%d %H:%M:%S",
>> time.gmtime(email.utils.mktime_tz(mdate)))
>> + except:
>> + pass
>> xbody += bytes(mdatestring, encoding='ascii')
>> # Use sender
>> sender = msg.get('from', None)
>> if sender:
>> xbody += bytes(sender, encoding = 'ascii')
>> # Use subject
>> + subject = msg.get('subject', None)
>> if subject:
>> xbody += bytes(subject, encoding = 'ascii')
>> # Use attachment hashes if present
>>