On 06/05/2017 12:07 AM, sebb wrote: > On 4 June 2017 at 15:02, <[email protected]> wrote: >> split generators into a file of its own >> >> Also fix up generators: >> - medium goes back to the way it was > > -1 > > This is a very confusing change. > > The change to the medium generator should be reverted as a separate > commit, and the other changes added separately
How would I go about dealing with that? I understand your objection to the commit style here, and I agree it should have been two separate commits, but I'm not sure I know how to rework that now. > >> - a new 'redundant' generator for cluster setups >> >> >> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo >> Commit: >> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036 >> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036 >> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036 >> >> Branch: refs/heads/master >> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1 >> Parents: 8b7ede8 >> Author: Daniel Gruno <[email protected]> >> Authored: Sun Jun 4 15:45:18 2017 +0200 >> Committer: Daniel Gruno <[email protected]> >> Committed: Sun Jun 4 15:45:18 2017 +0200 >> >> ---------------------------------------------------------------------- >> tools/archiver.py | 17 ++++------- >> tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 80 insertions(+), 11 deletions(-) >> ---------------------------------------------------------------------- >> >> >> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py >> ---------------------------------------------------------------------- >> diff --git a/tools/archiver.py b/tools/archiver.py >> index 41933f7..0966b13 100755 >> --- a/tools/archiver.py >> +++ b/tools/archiver.py >> @@ -58,6 +58,7 @@ import io >> import logging >> import traceback >> import sys >> +import generators >> >> # Fetch config >> path = os.path.dirname(os.path.realpath(__file__)) >> @@ -316,20 +317,14 @@ class Archiver(object): >> if body is not None or attachments: >> pmid = mid >> try: >> - # Use full message as bytes for mid? >> if archiver_generator == "full": >> - mid = "%s@%s" % >> (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) >> + mid = generators.full(msg, body, lid, attachments) >> elif archiver_generator == "medium": >> - xbody = body if type(body) is bytes else >> body.encode('ascii', 'ignore') >> - xbody += bytes(lid, encoding='ascii') >> - xbody += bytes(mdatestring, encoding='ascii') >> - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) >> - if attachments: >> - for a in attachments: >> - xbody += bytes(a['hash'], encoding = 'ascii') >> + mid = generators.medium(msg, body, lid, attachments) >> + elif archiver_generator == "redundant": >> + mid = generators.redundant(msg, body, lid, attachments) >> else: >> - # Or revert to the old way? >> - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) >> is bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid) >> + mid = generators.legacy(msg, body, lid, attachments) >> except Exception as err: >> if logger: >> logger.warn("Could not generate MID: %s" % err) >> >> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py >> ---------------------------------------------------------------------- >> diff --git a/tools/generators.py b/tools/generators.py >> new file mode 100644 >> index 0000000..af566fc >> --- /dev/null >> +++ b/tools/generators.py >> @@ -0,0 +1,74 @@ >> +#!/usr/bin/env/python3 >> +# -*- coding: utf-8 -*- >> +# Licensed to the Apache Software Foundation (ASF) under one or more >> +# contributor license agreements. See the NOTICE file distributed with >> +# this work for additional information regarding copyright ownership. >> +# The ASF licenses this file to You under the Apache License, Version 2.0 >> +# (the "License"); you may not use this file except in compliance with >> +# the License. You may obtain a copy of the License at >> +# >> +# http://www.apache.org/licenses/LICENSE-2.0 >> +# >> +# Unless required by applicable law or agreed to in writing, software >> +# distributed under the License is distributed on an "AS IS" BASIS, >> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >> +# See the License for the specific language governing permissions and >> +# limitations under the License. >> + >> +""" >> +This file contains the various ID generators for Pony Mail's archivers. >> +""" >> + >> +import hashlib >> +import email.utils >> + >> +# Full generator: uses the entire email (including sever-depenent data) >> +# This is the recommended generator for single-node setups. >> +def full(msg, body, lid, attachments): >> + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) >> + return mid >> + >> +# Medium: Standard generator >> +def medium(msg, body, lid, attachments): >> + # Use text body >> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore') >> + # Use List ID >> + xbody += bytes(lid, encoding='ascii') >> + # Use Date header >> + xbody += bytes(mdatestring, encoding='ascii') >> + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) >> + return mid >> + >> +# Redundant: Use data that is guaranteed to be the same across redundant >> setups >> +# This is the recommended generator for redundant cluster setups >> +def redundant(msg, body, lid, attachments): >> + # Use text body >> + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore') >> + # Use List ID >> + xbody += bytes(lid, encoding='ascii') >> + # Use Date header >> + xbody += bytes(mdatestring, encoding='ascii') >> + # Use sender >> + sender = msg.get('from', None) >> + if sender: >> + xbody += bytes(sender, encoding = 'ascii') >> + # Use subject >> + if subject: >> + xbody += bytes(subject, encoding = 'ascii') >> + # Use attachment hashes if present >> + if attachments: >> + for a in attachments: >> + xbody += bytes(a['hash'], encoding = 'ascii') >> + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) >> + return mid >> + >> + >> +# Old school way of making IDs >> +def legacy(msg, body, lid, attachments): >> + mdate = email.utils.parsedate_tz(msg.get('date')) >> + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is >> valid >> + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else >> body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid) >> + return mid >> + >> + >> + >>
