On 4 June 2017 at 15:02,  <[email protected]> wrote:
> split generators into a file of its own
>
> Also fix up generators:
> - medium goes back to the way it was
> - a new 'redundant' generator for cluster setups
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/repo
> Commit: 
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/commit/e2d81036
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/tree/e2d81036
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-ponymail/diff/e2d81036
>
> Branch: refs/heads/master
> Commit: e2d8103635db012d13fc6af46d336c96be31d4c1
> Parents: 8b7ede8
> Author: Daniel Gruno <[email protected]>
> Authored: Sun Jun 4 15:45:18 2017 +0200
> Committer: Daniel Gruno <[email protected]>
> Committed: Sun Jun 4 15:45:18 2017 +0200
>
> ----------------------------------------------------------------------
>  tools/archiver.py   | 17 ++++-------
>  tools/generators.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 80 insertions(+), 11 deletions(-)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/archiver.py
> ----------------------------------------------------------------------
> diff --git a/tools/archiver.py b/tools/archiver.py
> index 41933f7..0966b13 100755
> --- a/tools/archiver.py
> +++ b/tools/archiver.py
> @@ -58,6 +58,7 @@ import io
>  import logging
>  import traceback
>  import sys
> +import generators
>
>  # Fetch config
>  path = os.path.dirname(os.path.realpath(__file__))
> @@ -316,20 +317,14 @@ class Archiver(object):
>          if body is not None or attachments:
>              pmid = mid
>              try:
> -                # Use full message as bytes for mid?
>                  if archiver_generator == "full":
> -                    mid = "%s@%s" % 
> (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> +                    mid = generators.full(msg, body, lid, attachments)
>                  elif archiver_generator == "medium":
> -                    xbody = body if type(body) is bytes else 
> body.encode('ascii', 'ignore')
> -                    xbody += bytes(lid, encoding='ascii')
> -                    xbody += bytes(mdatestring, encoding='ascii')
> -                    mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> -                    if attachments:
> -                        for a in attachments:
> -                            xbody += bytes(a['hash'], encoding = 'ascii')
> +                    mid = generators.medium(msg, body, lid, attachments)
> +                elif archiver_generator == "redundant":
> +                    mid = generators.redundant(msg, body, lid, attachments)
>                  else:
> -                    # Or revert to the old way?
> -                    mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is 
> bytes else body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> +                    mid = generators.legacy(msg, body, lid, attachments)
>              except Exception as err:
>                  if logger:
>                      logger.warn("Could not generate MID: %s" % err)
>
> http://git-wip-us.apache.org/repos/asf/incubator-ponymail/blob/e2d81036/tools/generators.py
> ----------------------------------------------------------------------
> diff --git a/tools/generators.py b/tools/generators.py
> new file mode 100644
> index 0000000..af566fc
> --- /dev/null
> +++ b/tools/generators.py
> @@ -0,0 +1,74 @@
> +#!/usr/bin/env/python3
> +# -*- coding: utf-8 -*-
> +# Licensed to the Apache Software Foundation (ASF) under one or more
> +# contributor license agreements.  See the NOTICE file distributed with
> +# this work for additional information regarding copyright ownership.
> +# The ASF licenses this file to You under the Apache License, Version 2.0
> +# (the "License"); you may not use this file except in compliance with
> +# the License.  You may obtain a copy of the License at
> +#
> +#     http://www.apache.org/licenses/LICENSE-2.0
> +#
> +# Unless required by applicable law or agreed to in writing, software
> +# distributed under the License is distributed on an "AS IS" BASIS,
> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +# See the License for the specific language governing permissions and
> +# limitations under the License.
> +
> +"""
> +This file contains the various ID generators for Pony Mail's archivers.
> +"""
> +
> +import hashlib
> +import email.utils
> +
> +# Full generator: uses the entire email (including sever-depenent data)
> +# This is the recommended generator for single-node setups.
> +def full(msg, body, lid, attachments):
> +    mid = "%s@%s" % (hashlib.sha224(msg.as_bytes()).hexdigest(), lid)
> +    return mid
> +
> +# Medium: Standard generator
> +def medium(msg, body, lid, attachments):
> +    # Use text body
> +    xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> +    # Use List ID
> +    xbody += bytes(lid, encoding='ascii')
> +    # Use Date header
> +    xbody += bytes(mdatestring, encoding='ascii')
> +    mid = "%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> +    return mid
> +
> +# Redundant: Use data that is guaranteed to be the same across redundant 
> setups
> +# This is the recommended generator for redundant cluster setups
> +def redundant(msg, body, lid, attachments):
> +    # Use text body
> +    xbody = body if type(body) is bytes else body.encode('ascii', 'ignore')
> +    # Use List ID
> +    xbody += bytes(lid, encoding='ascii')
> +    # Use Date header
> +    xbody += bytes(mdatestring, encoding='ascii')
> +    # Use sender
> +    sender = msg.get('from', None)
> +    if sender:
> +        xbody += bytes(sender, encoding = 'ascii')
> +    # Use subject
> +    if subject:
> +        xbody += bytes(subject, encoding = 'ascii')
> +    # Use attachment hashes if present
> +    if attachments:
> +        for a in attachments:
> +            xbody += bytes(a['hash'], encoding = 'ascii')
> +    mid = "r%s@%s" % (hashlib.sha224(xbody).hexdigest(), lid)
> +    return mid
> +
> +
> +# Old school way of making IDs
> +def legacy(msg, body, lid, attachments):

-1

AFAICT this is not exactly the same as the original code.

> +    mdate = email.utils.parsedate_tz(msg.get('date'))
> +    uid_mdate = email.utils.mktime_tz(mdate) # Only set if Date header is 
> valid

What happens if either of the previous two lines throws an error?

> +    mid = "%s@%s@%s" % (hashlib.sha224(body if type(body) is bytes else 
> body.encode('ascii', 'ignore')).hexdigest(), uid_mdate, lid)
> +    return mid
> +
> +
> +
>

Reply via email to