On 06/05/2017 12:22 AM, sebb wrote:
> On 4 June 2017 at 15:02, <[email protected]> wrote:
>> split generators into a file of its own
>>
>> Also fix up generators:
>> - medium goes back to the way it was
>> - a new 'redundant' generator for cluster setups
>>
>>
>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>> Commit:
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>
>> Branch: refs/heads/master
>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>> Parents: 8b7ede8
>> Author: Daniel Gruno <[email protected]>
>> Authored: Sun Jun 4 15:45:18 2017 +0200
>> Committer: Daniel Gruno <[email protected]>
>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>
>> ----------------------------------------------------------------------
>> tools/archiver.py | 17 ++++-------
>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 80 insertions(+), 11 deletions(-)
>> ----------------------------------------------------------------------
>>
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/archiver.py b/tools/archiver.py
>> index 41933f7..0966b13 100755
>> --- a/tools/archiver.py
>> +++ b/tools/archiver.py
>> @@ -58,6 +58,7 @@ import io
>> import logging
>> import traceback
>> import sys
>> +import generators
>>
>> # Fetch config
>> path = os.path.dirname(os.path.realpath(__file__))
>> @@ -316,20 +317,14 @@ class Archiver(object):
>> if body is not None or attachments:
>> pmid = mid
>> try:
>> - # Use full message as bytes for mid?
>> if archiver_generator == "full":
>> - mid = "%s@%s" %
>> (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + mid = generators.full(msg, body, lid, attachments)
>> elif archiver_generator == "medium":
>> - xbody = body if type(body) is bytes else
>> body.encode('ascii', 'ignore')
>> - xbody += bytes(lid, encoding='ascii')
>> - xbody += bytes(mdatestring, encoding='ascii')
>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> - if attachments:
>> - for a in attachments:
>> - xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = generators.medium(msg, body, lid, attachments)
>> + elif archiver_generator == "redundant":
>> + mid = generators.redundant(msg, body, lid, attachments)
>> else:
>> - # Or revert to the old way?
>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body)
>> is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + mid = generators.legacy(msg, body, lid, attachments)
>> except Exception as err:
>> if logger:
>> logger.warn("Could not generate MID: %s" % err)
>>
>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>> ----------------------------------------------------------------------
>> diff --git a/tools/generators.py b/tools/generators.py
>> new file mode 100644
>> index 0000000..af566fc
>> --- /dev/null
>> +++ b/tools/generators.py
>> @@ -0,0 +1,74 @@
>> +#!/usr/bin/env/python3
>> +# -*- coding: utf-8 -*-
>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>> +# contributor license agreements. See the NOTICE file distributed with
>> +# this work for additional information regarding copyright ownership.
>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>> +# (the "License"); you may not use this file except in compliance with
>> +# the License. You may obtain a copy of the License at
>> +#
>> +# http://www.apache.org/licenses/LICENSE-2.0
>> +#
>> +# Unless required by applicable law or agreed to in writing, software
>> +# distributed under the License is distributed on an "AS IS" BASIS,
>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> +# See the License for the specific language governing permissions and
>> +# limitations under the License.
>> +
>> +"""
>> +This file contains the various ID generators for Pony Mail's archivers.
>> +"""
>> +
>> +import hashlib
>> +import email.utils
>> +
>> +# Full generator: uses the entire email (including sever-depenent data)
>> +# This is the recommended generator for single-node setups.
>> +def full(msg, body, lid, attachments):
>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>> + return mid
>> +
>> +# Medium: Standard generator
>> +def medium(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +# Redundant: Use data that is guaranteed to be the same across redundant
>> setups
>> +# This is the recommended generator for redundant cluster setups
>> +def redundant(msg, body, lid, attachments):
>> + # Use text body
>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>> + # Use List ID
>> + xbody += bytes(lid, encoding='ascii')
>> + # Use Date header
>> + xbody += bytes(mdatestring, encoding='ascii')
>> + # Use sender
>> + sender = msg.get('from', None)
>> + if sender:
>> + xbody += bytes(sender, encoding = 'ascii')
>> + # Use subject
>> + if subject:
>> + xbody += bytes(subject, encoding = 'ascii')
>> + # Use attachment hashes if present
>> + if attachments:
>> + for a in attachments:
>> + xbody += bytes(a['hash'], encoding = 'ascii')
>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>> + return mid
>> +
>> +
>> +# Old school way of making IDs
>> +def legacy(msg, body, lid, attachments):
>
> -1
>
> AFAICT this is not exactly the same as the original code.
>
>> + mdate = email.utils.parsedate_tz(msg.get('date'))
>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is
>> valid
>
> What happens if either of the previous two lines throws an error?
Good catch! Fixed in 2802e2905.
>
>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else
>> body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>> + return mid
>> +
>> +
>> +
>>