On 5 June 2017 at 09:17, Daniel Gruno <[email protected]> wrote: > I missed a git add in the last commit, sorry. Will add and recommit now.
Have you tested that the change is complete? I'm still getting an error. > On 06/05/2017 01:57 AM, sebb wrote: >> On 4 June 2017 at 15:02, <[email protected]> wrote: >>> split generators into a file of its own >>> >>> Also fix up generators: >>> - medium goes back to the way it was >>> - a new 'redundant' generator for cluster setups >>> >>> >>> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo >>> Commit: >>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036 >>> Tree: >>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036 >>> Diff: >>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036 >>> >>> Branch: refs/heads/master >>> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1 >>> Parents: 8b7ede8 >>> Author: Daniel Gruno <[email protected]> >>> Authored: Sun Jun 4 15:45:18 2017 +0200 >>> Committer: Daniel Gruno <[email protected]> >>> Committed: Sun Jun 4 15:45:18 2017 +0200 >>> >>> ---------------------------------------------------------------------- >>> tools/archiver.py | 17 ++++------- >>> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 80 insertions(+), 11 deletions(-) >>> ---------------------------------------------------------------------- >>> >>> >>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py >>> ---------------------------------------------------------------------- >>> diff --git a/tools/archiver.py b/tools/archiver.py >>> index 41933f7..0966b13 100755 >>> --- a/tools/archiver.py >>> +++ b/tools/archiver.py >>> @@ -58,6 +58,7 @@ import io >>> import logging >>> import traceback >>> import sys >>> +import generators >>> >>> # Fetch config >>> path = os.path.dirname(os.path.realpath(__file__)) >>> @@ -316,20 +317,14 @@ class Archiver(object): >>> if body is not None or attachments: >>> pmid = mid >>> try: >>> - # Use full message as bytes for mid? >>> if archiver_generator == "full": >>> - mid = "%s@%s" % >>> (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) >>> + mid = generators.full(msg, body, lid, attachments) >>> elif archiver_generator == "medium": >>> - xbody = body if type(body) is bytes else >>> body.encode('ascii', 'ignore') >>> - xbody += bytes(lid, encoding='ascii') >>> - xbody += bytes(mdatestring, encoding='ascii') >>> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), >>> lid) >>> - if attachments: >>> - for a in attachments: >>> - xbody += bytes(a['hash'], encoding = 'ascii') >>> + mid = generators.medium(msg, body, lid, attachments) >>> + elif archiver_generator == "redundant": >>> + mid = generators.redundant(msg, body, lid, attachments) >>> else: >>> - # Or revert to the old way? >>> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) >>> is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid) >>> + mid = generators.legacy(msg, body, lid, attachments) >>> except Exception as err: >>> if logger: >>> logger.warn("Could not generate MID: %s" % err) >>> >>> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py >>> ---------------------------------------------------------------------- >>> diff --git a/tools/generators.py b/tools/generators.py >>> new file mode 100644 >>> index 0000000..af566fc >>> --- /dev/null >>> +++ b/tools/generators.py >>> @@ -0,0 +1,74 @@ >>> +#!/usr/bin/env/python3 >>> +# -*- coding: utf-8 -*- >>> +# Licensed to the Apache Software Foundation (ASF) under one or more >>> +# contributor license agreements. See the NOTICE file distributed with >>> +# this work for additional information regarding copyright ownership. >>> +# The ASF licenses this file to You under the Apache License, Version 2.0 >>> +# (the "License"); you may not use this file except in compliance with >>> +# the License. You may obtain a copy of the License at >>> +# >>> +# http://www.apache.org/licenses/LICENSE-2.0 >>> +# >>> +# Unless required by applicable law or agreed to in writing, software >>> +# distributed under the License is distributed on an "AS IS" BASIS, >>> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >>> +# See the License for the specific language governing permissions and >>> +# limitations under the License. >>> + >>> +""" >>> +This file contains the various ID generators for Pony Mail's archivers. >>> +""" >>> + >>> +import hashlib >>> +import email.utils >>> + >>> +# Full generator: uses the entire email (including sever-depenent data) >>> +# This is the recommended generator for single-node setups. >>> +def full(msg, body, lid, attachments): >>> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) >>> + return mid >>> + >>> +# Medium: Standard generator >>> +def medium(msg, body, lid, attachments): >>> + # Use text body >>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore') >>> + # Use List ID >>> + xbody += bytes(lid, encoding='ascii') >>> + # Use Date header >>> + xbody += bytes(mdatestring, encoding='ascii') >> >> mdatestring is not defined >> >>> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) >>> + return mid >>> + >>> +# Redundant: Use data that is guaranteed to be the same across redundant >>> setups >>> +# This is the recommended generator for redundant cluster setups >>> +def redundant(msg, body, lid, attachments): >>> + # Use text body >>> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore') >>> + # Use List ID >>> + xbody += bytes(lid, encoding='ascii') >>> + # Use Date header >>> + xbody += bytes(mdatestring, encoding='ascii') >> >> mdatestring is not defined >> >>> + # Use sender >>> + sender = msg.get('from', None) >>> + if sender: >>> + xbody += bytes(sender, encoding = 'ascii') >>> + # Use subject >>> + if subject: >>> + xbody += bytes(subject, encoding = 'ascii') >>> + # Use attachment hashes if present >>> + if attachments: >>> + for a in attachments: >>> + xbody += bytes(a['hash'], encoding = 'ascii') >>> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) >>> + return mid >>> + >>> + >>> +# Old school way of making IDs >>> +def legacy(msg, body, lid, attachments): >>> + mdate = email.utils.parsedate_tz(msg.get('date')) >>> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is >>> valid >>> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else >>> body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid) >>> + return mid >>> + >>> + >>> + >> >> Have the generators been tested? >> >
