On 5 June 2017 at 10:04, Daniel Gruno <[email protected]> wrote:
> On 06/05/2017 11:01 AM, sebb wrote:
>> On 5 June 2017 at 09:17, Daniel Gruno <[email protected]> wrote:
>>> I missed a git add in the last commit, sorry. Will add and recommit now.
>>
>> Have you tested that the change is complete?
>>
>> I'm still getting an error.
>
> Tested it with a bunch of mbox files, some with, some without headers,
> subjects, senders etc. All seemed to work.
>
> What is the specific error you are getting, and which generator are you
> using?

I am using the medium generator.

NameError: name 'msg_metadata' is not defined

>>
>>> On 06/05/2017 01:57 AM, sebb wrote:
>>>> On 4 June 2017 at 15:02,  <[email protected]> wrote:
>>>>> split generators into a file of its own
>>>>>
>>>>> Also fix up generators:
>>>>> - medium goes back to the way it was
>>>>> - a new 'redundant' generator for cluster setups
>>>>>
>>>>>
>>>>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
>>>>> Commit: 
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
>>>>> Tree: 
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
>>>>> Diff: 
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>>>>>
>>>>> Branch: refs/heads/master
>>>>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
>>>>> Parents: 8b7ede8
>>>>> Author: Daniel Gruno <[email protected]>
>>>>> Authored: Sun Jun 4 15:45:18 2017 +0200
>>>>> Committer: Daniel Gruno <[email protected]>
>>>>> Committed: Sun Jun 4 15:45:18 2017 +0200
>>>>>
>>>>> ----------------------------------------------------------------------
>>>>>  tools/archiver.py   | 17 ++++-------
>>>>>  tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>  2 files changed, 80 insertions(+), 11 deletions(-)
>>>>> ----------------------------------------------------------------------
>>>>>
>>>>>
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
>>>>> ----------------------------------------------------------------------
>>>>> diff --git a/tools/archiver.py b/tools/archiver.py
>>>>> index 41933f7..0966b13 100755
>>>>> --- a/tools/archiver.py
>>>>> +++ b/tools/archiver.py
>>>>> @@ -58,6 +58,7 @@ import io
>>>>>  import logging
>>>>>  import traceback
>>>>>  import sys
>>>>> +import generators
>>>>>
>>>>>  # Fetch config
>>>>>  path = os.path.dirname(os.path.realpath(__file__))
>>>>> @@ -316,20 +317,14 @@ class Archiver(object):
>>>>>          if body is not None or attachments:
>>>>>              pmid = mid
>>>>>              try:
>>>>> -                # Use full message as bytes for mid?
>>>>>                  if archiver_generator == "full":
>>>>> -                    mid = "%s@%s" % 
>>>>> (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>>>> +                    mid = generators.full(msg, body, lid, attachments)
>>>>>                  elif archiver_generator == "medium":
>>>>> -                    xbody = body if type(body) is bytes else 
>>>>> body.encode('ascii', 'ignore')
>>>>> -                    xbody += bytes(lid, encoding='ascii')
>>>>> -                    xbody += bytes(mdatestring, encoding='ascii')
>>>>> -                    mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), 
>>>>> lid)
>>>>> -                    if attachments:
>>>>> -                        for a in attachments:
>>>>> -                            xbody += bytes(a['hash'], encoding = 'ascii')
>>>>> +                    mid = generators.medium(msg, body, lid, attachments)
>>>>> +                elif archiver_generator == "redundant":
>>>>> +                    mid = generators.redundant(msg, body, lid, 
>>>>> attachments)
>>>>>                  else:
>>>>> -                    # Or revert to the old way?
>>>>> -                    mid = "%s@%s@%s" % (hashlib.sha224(body if 
>>>>> type(body) is bytes else body.encode('ascii', 'ignore')).hexdigest(), 
>>>>> uid_mdate, lid)
>>>>> +                    mid = generators.legacy(msg, body, lid, attachments)
>>>>>              except Exception as err:
>>>>>                  if logger:
>>>>>                      logger.warn("Could not generate MID: %s" % err)
>>>>>
>>>>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
>>>>> ----------------------------------------------------------------------
>>>>> diff --git a/tools/generators.py b/tools/generators.py
>>>>> new file mode 100644
>>>>> index 0000000..af566fc
>>>>> --- /dev/null
>>>>> +++ b/tools/generators.py
>>>>> @@ -0,0 +1,74 @@
>>>>> +#!/usr/bin/env/python3
>>>>> +# -*- coding: utf-8 -*-
>>>>> +# Licensed to the Apache Software Foundation (ASF) under one or more
>>>>> +# contributor license agreements.  See the NOTICE file distributed with
>>>>> +# this work for additional information regarding copyright ownership.
>>>>> +# The ASF licenses this file to You under the Apache License, Version 2.0
>>>>> +# (the "License"); you may not use this file except in compliance with
>>>>> +# the License.  You may obtain a copy of the License at
>>>>> +#
>>>>> +#     http://www.apache.org/licenses/LICENSE-2.0
>>>>> +#
>>>>> +# Unless required by applicable law or agreed to in writing, software
>>>>> +# distributed under the License is distributed on an "AS IS" BASIS,
>>>>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
>>>>> implied.
>>>>> +# See the License for the specific language governing permissions and
>>>>> +# limitations under the License.
>>>>> +
>>>>> +"""
>>>>> +This file contains the various ID generators for Pony Mail's archivers.
>>>>> +"""
>>>>> +
>>>>> +import hashlib
>>>>> +import email.utils
>>>>> +
>>>>> +# Full generator: uses the entire email (including sever-depenent data)
>>>>> +# This is the recommended generator for single-node setups.
>>>>> +def full(msg, body, lid, attachments):
>>>>> +    mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
>>>>> +    return mid
>>>>> +
>>>>> +# Medium: Standard generator
>>>>> +def medium(msg, body, lid, attachments):
>>>>> +    # Use text body
>>>>> +    xbody = body if type(body) is bytes else body.encode('ascii', 
>>>>> 'ignore')
>>>>> +    # Use List ID
>>>>> +    xbody += bytes(lid, encoding='ascii')
>>>>> +    # Use Date header
>>>>> +    xbody += bytes(mdatestring, encoding='ascii')
>>>>
>>>> mdatestring is not defined
>>>>
>>>>> +    mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>>> +    return mid
>>>>> +
>>>>> +# Redundant: Use data that is guaranteed to be the same across redundant 
>>>>> setups
>>>>> +# This is the recommended generator for redundant cluster setups
>>>>> +def redundant(msg, body, lid, attachments):
>>>>> +    # Use text body
>>>>> +    xbody = body if type(body) is bytes else body.encode('ascii', 
>>>>> 'ignore')
>>>>> +    # Use List ID
>>>>> +    xbody += bytes(lid, encoding='ascii')
>>>>> +    # Use Date header
>>>>> +    xbody += bytes(mdatestring, encoding='ascii')
>>>>
>>>> mdatestring is not defined
>>>>
>>>>> +    # Use sender
>>>>> +    sender = msg.get('from', None)
>>>>> +    if sender:
>>>>> +        xbody += bytes(sender, encoding = 'ascii')
>>>>> +    # Use subject
>>>>> +    if subject:
>>>>> +        xbody += bytes(subject, encoding = 'ascii')
>>>>> +    # Use attachment hashes if present
>>>>> +    if attachments:
>>>>> +        for a in attachments:
>>>>> +            xbody += bytes(a['hash'], encoding = 'ascii')
>>>>> +    mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
>>>>> +    return mid
>>>>> +
>>>>> +
>>>>> +# Old school way of making IDs
>>>>> +def legacy(msg, body, lid, attachments):
>>>>> +    mdate = email.utils.parsedate_tz(msg.get('date'))
>>>>> +    uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header 
>>>>> is valid
>>>>> +    mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else 
>>>>> body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
>>>>> +    return mid
>>>>> +
>>>>> +
>>>>> +
>>>>
>>>> Have the generators been tested?
>>>>
>>>
>

Reply via email to