On 4 June 2017 at 15:02, <[email protected]> wrote: > split generators into a file of its own > > Also fix up generators: > - medium goes back to the way it was
-1 This is a very confusing change. The change to the medium generator should be reverted as a separate commit, and the other changes added separately > - a new 'redundant' generator for cluster setups > > > Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo > Commit: > http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036 > Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036 > Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036 > > Branch: refs/heads/master > Commit: e2d8103635db012d13fc6af46d336c96be31d4c1 > Parents: 8b7ede8 > Author: Daniel Gruno <[email protected]> > Authored: Sun Jun 4 15:45:18 2017 +0200 > Committer: Daniel Gruno <[email protected]> > Committed: Sun Jun 4 15:45:18 2017 +0200 > > ---------------------------------------------------------------------- > tools/archiver.py | 17 ++++------- > tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 80 insertions(+), 11 deletions(-) > ---------------------------------------------------------------------- > > > http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py > ---------------------------------------------------------------------- > diff --git a/tools/archiver.py b/tools/archiver.py > index 41933f7..0966b13 100755 > --- a/tools/archiver.py > +++ b/tools/archiver.py > @@ -58,6 +58,7 @@ import io > import logging > import traceback > import sys > +import generators > > # Fetch config > path = os.path.dirname(os.path.realpath(__file__)) > @@ -316,20 +317,14 @@ class Archiver(object): > if body is not None or attachments: > pmid = mid > try: > - # Use full message as bytes for mid? > if archiver_generator == "full": > - mid = "%s@%s" % > (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) > + mid = generators.full(msg, body, lid, attachments) > elif archiver_generator == "medium": > - xbody = body if type(body) is bytes else > body.encode('ascii', 'ignore') > - xbody += bytes(lid, encoding='ascii') > - xbody += bytes(mdatestring, encoding='ascii') > - mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) > - if attachments: > - for a in attachments: > - xbody += bytes(a['hash'], encoding = 'ascii') > + mid = generators.medium(msg, body, lid, attachments) > + elif archiver_generator == "redundant": > + mid = generators.redundant(msg, body, lid, attachments) > else: > - # Or revert to the old way? > - mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is > bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid) > + mid = generators.legacy(msg, body, lid, attachments) > except Exception as err: > if logger: > logger.warn("Could not generate MID: %s" % err) > > http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py > ---------------------------------------------------------------------- > diff --git a/tools/generators.py b/tools/generators.py > new file mode 100644 > index 0000000..af566fc > --- /dev/null > +++ b/tools/generators.py > @@ -0,0 +1,74 @@ > +#!/usr/bin/env/python3 > +# -*- coding: utf-8 -*- > +# Licensed to the Apache Software Foundation (ASF) under one or more > +# contributor license agreements. See the NOTICE file distributed with > +# this work for additional information regarding copyright ownership. > +# The ASF licenses this file to You under the Apache License, Version 2.0 > +# (the "License"); you may not use this file except in compliance with > +# the License. You may obtain a copy of the License at > +# > +# http://www.apache.org/licenses/LICENSE-2.0 > +# > +# Unless required by applicable law or agreed to in writing, software > +# distributed under the License is distributed on an "AS IS" BASIS, > +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > +# See the License for the specific language governing permissions and > +# limitations under the License. > + > +""" > +This file contains the various ID generators for Pony Mail's archivers. > +""" > + > +import hashlib > +import email.utils > + > +# Full generator: uses the entire email (including sever-depenent data) > +# This is the recommended generator for single-node setups. > +def full(msg, body, lid, attachments): > + mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid) > + return mid > + > +# Medium: Standard generator > +def medium(msg, body, lid, attachments): > + # Use text body > + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore') > + # Use List ID > + xbody += bytes(lid, encoding='ascii') > + # Use Date header > + xbody += bytes(mdatestring, encoding='ascii') > + mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) > + return mid > + > +# Redundant: Use data that is guaranteed to be the same across redundant > setups > +# This is the recommended generator for redundant cluster setups > +def redundant(msg, body, lid, attachments): > + # Use text body > + xbody = body if type(body) is bytes else body.encode('ascii', 'ignore') > + # Use List ID > + xbody += bytes(lid, encoding='ascii') > + # Use Date header > + xbody += bytes(mdatestring, encoding='ascii') > + # Use sender > + sender = msg.get('from', None) > + if sender: > + xbody += bytes(sender, encoding = 'ascii') > + # Use subject > + if subject: > + xbody += bytes(subject, encoding = 'ascii') > + # Use attachment hashes if present > + if attachments: > + for a in attachments: > + xbody += bytes(a['hash'], encoding = 'ascii') > + mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid) > + return mid > + > + > +# Old school way of making IDs > +def legacy(msg, body, lid, attachments): > + mdate = email.utils.parsedate_tz(msg.get('date')) > + uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is > valid > + mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else > body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid) > + return mid > + > + > + >
