On 4 June 2017 at 23:16, Daniel Gruno <[email protected]> wrote:
> On 06/05/2017 12:07 AM, sebb wrote:
>> On 4 June 2017 at 15:02,  <[email protected]> wrote:
>>> split generators into a file of its own
>>>
>>> Also fix up generators:
>>> - medium goes back to the way it was
>>
>> -1
>>
>> This is a very confusing change.
>>
>> The change to the medium generator should be reverted as a separate
>> commit, and the other changes added separately
>
> How would I go about dealing with that? I understand your objection to
> the commit style here, and I agree it should have been two separate
> commits, but I'm not sure I know how to rework that now.

Dunno, I'm not a Git expert.

>>
>>> - a new 'redundant' generator for cluster setups
>>>
>>>
>>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>>> Commit: 
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>>> Tree: 
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>>> Diff: 
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>>
>>> Branch: refs/heads/master
>>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>>> Parents: 8b7ede8
>>> Author: Daniel Gruno <[email protected]>
>>> Authored: Sun Jun 4 15:45:18 2017 +0200
>>> Committer: Daniel Gruno <[email protected]>
>>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>>
>>> ----------------------------------------------------------------------
>>>  tools/archiver.py   | 17 ++++-------
>>>  tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>  2 files changed, 80 insertions(+), 11 deletions(-)
>>> ----------------------------------------------------------------------
>>>
>>>
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>>> ----------------------------------------------------------------------
>>> diff --git a/tools/archiver.py b/tools/archiver.py
>>> index 41933f7..0966b13 100755
>>> --- a/tools/archiver.py
>>> +++ b/tools/archiver.py
>>> @@ -58,6 +58,7 @@ import io
>>>  import logging
>>>  import traceback
>>>  import sys
>>> +import generators
>>>
>>>  # Fetch config
>>>  path = os.path.dirname(os.path.realpath(__file__))
>>> @@ -316,20 +317,14 @@ class Archiver(object):
>>>          if body is not None or attachments:
>>>              pmid = mid
>>>              try:
>>> -                # Use full message as bytes for mid?
>>>                  if archiver_generator == "full":
>>> -                    mid = "%s@%s" % 
>>> (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>> +                    mid = generators.full(msg, body, lid, attachments)
>>>                  elif archiver_generator == "medium":
>>> -                    xbody = body if type(body) is bytes else 
>>> body.encode('ascii', 'ignore')
>>> -                    xbody += bytes(lid, encoding='ascii')
>>> -                    xbody += bytes(mdatestring, encoding='ascii')
>>> -                    mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), 
>>> lid)
>>> -                    if attachments:
>>> -                        for a in attachments:
>>> -                            xbody += bytes(a['hash'], encoding = 'ascii')
>>> +                    mid = generators.medium(msg, body, lid, attachments)
>>> +                elif archiver_generator == "redundant":
>>> +                    mid = generators.redundant(msg, body, lid, attachments)
>>>                  else:
>>> -                    # Or revert to the old way?
>>> -                    mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) 
>>> is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>> +                    mid = generators.legacy(msg, body, lid, attachments)
>>>              except Exception as err:
>>>                  if logger:
>>>                      logger.warn("Could not generate MID: %s" % err)
>>>
>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>>> ----------------------------------------------------------------------
>>> diff --git a/tools/generators.py b/tools/generators.py
>>> new file mode 100644
>>> index 0000000..af566fc
>>> --- /dev/null
>>> +++ b/tools/generators.py
>>> @@ -0,0 +1,74 @@
>>> +#!/usr/bin/env/python3
>>> +# -*- coding: utf-8 -*-
>>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>>> +# contributor license agreements.  See the NOTICE file distributed with
>>> +# this work for additional information regarding copyright ownership.
>>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>>> +# (the "License"); you may not use this file except in compliance with
>>> +# the License.  You may obtain a copy of the License at
>>> +#
>>> +#     http://www.apache.org/licenses/LICENSE-2.0
>>> +#
>>> +# Unless required by applicable law or agreed to in writing, software
>>> +# distributed under the License is distributed on an "AS IS" BASIS,
>>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>>> +# See the License for the specific language governing permissions and
>>> +# limitations under the License.
>>> +
>>> +"""
>>> +This file contains the various ID generators for Pony Mail's archivers.
>>> +"""
>>> +
>>> +import hashlib
>>> +import email.utils
>>> +
>>> +# Full generator: uses the entire email (including sever-depenent data)
>>> +# This is the recommended generator for single-node setups.
>>> +def full(msg, body, lid, attachments):
>>> +    mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>> +    return mid
>>> +
>>> +# Medium: Standard generator
>>> +def medium(msg, body, lid, attachments):
>>> +    # Use text body
>>> +    xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> +    # Use List ID
>>> +    xbody += bytes(lid, encoding='ascii')
>>> +    # Use Date header
>>> +    xbody += bytes(mdatestring, encoding='ascii')
>>> +    mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> +    return mid
>>> +
>>> +# Redundant: Use data that is guaranteed to be the same across redundant 
>>> setups
>>> +# This is the recommended generator for redundant cluster setups
>>> +def redundant(msg, body, lid, attachments):
>>> +    # Use text body
>>> +    xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
>>> +    # Use List ID
>>> +    xbody += bytes(lid, encoding='ascii')
>>> +    # Use Date header
>>> +    xbody += bytes(mdatestring, encoding='ascii')
>>> +    # Use sender
>>> +    sender = msg.get('from', None)
>>> +    if sender:
>>> +        xbody += bytes(sender, encoding = 'ascii')
>>> +    # Use subject
>>> +    if subject:
>>> +        xbody += bytes(subject, encoding = 'ascii')
>>> +    # Use attachment hashes if present
>>> +    if attachments:
>>> +        for a in attachments:
>>> +            xbody += bytes(a['hash'], encoding = 'ascii')
>>> +    mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>> +    return mid
>>> +
>>> +
>>> +# Old school way of making IDs
>>> +def legacy(msg, body, lid, attachments):
>>> +    mdate = email.utils.parsedate_tz(msg.get('date'))
>>> +    uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is 
>>> valid
>>> +    mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else 
>>> body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>> +    return mid
>>> +
>>> +
>>> +
>>>
>

Reply via email to