Ethan Glasser-Camp <ethan.glasser.c...@gmail.com> writes:

> I've modified the script so that it would run by mangling filenames,
> which is irreversible (the original tried to encode/decode filenames
> reversibly). Then I got a little carried away, adding --verbose and
> --dry-run options as well as removing a couple trailing
> semicolons. Here's my version, in case it should interest anyone else.

Hi guys,

There was a bug in the previous version I sent. It didn't handle
unlinking tags correctly. Also, I spotted a bug in syncing to untagged
messages. Maybe I should stop using emails as version control.

---- 8< ----


# Copyright 2013, David Bremner <da...@tethera.net>

# Licensed under the same terms as notmuch.

import notmuch
import re
import os, errno
import sys
from collections import defaultdict
import argparse
import hashlib

# skip automatic and maildir tags

skiptags = re.compile(r"^(attachement|signed|encrypted|draft|flagged|passed|replied|unread)$")

# some random person on stack overflow suggests:

def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc: # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else: raise

VERBOSE = False

def log(msg):
    if VERBOSE:
        print(msg)

CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'

encode_re = '([^{0}])'.format(CHARSET)

decode_re = '[%]([0-7][0-9A-Fa-f])'

def encode_one_char(match):
    return('%{:02x}'.format(ord(match.group(1))))

def encode_for_fs(str):
    return re.sub(encode_re,encode_one_char, str,0)

def mangle_message_id(msg_id):
    """
    Return a mangled version of the message id, suitable for use as a filename.
    """
    MAX_LENGTH = 143
    FLAGS_LENGTH = 8    # :2,S...??
    encoded = encode_for_fs(msg_id)
    if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:
        return encoded

    SHA_LENGTH = 8
    TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH
    PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2
    prefix = encoded[:PREFIX_LENGTH]
    suffix = encoded[-SUFFIX_LENGTH:]
    sha = hashlib.sha256()
    sha.update(encoded)
    return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]

def decode_one_char(match):
    return chr(int(match.group(1),16))

def decode_from_fs(str):
    return re.sub(decode_re,decode_one_char, str, 0)

def mk_tag_dir(tagdir):

    mkdir_p (os.path.join(tagdir, 'cur'))
    mkdir_p (os.path.join(tagdir, 'new'))
    mkdir_p (os.path.join(tagdir, 'tmp'))


flagpart = '(:2,[^:]*)'
flagre = re.compile(flagpart + '$');

def path_for_msg (dir, msg):
    filename = msg.get_filename()
    flagsmatch = flagre.search(filename)
    if flagsmatch == None:
        flags = ''
    else:
        flags = flagsmatch.group(1)

    return os.path.join(dir, 'cur', mangle_message_id(msg.get_message_id()) + flags)


def unlink_message(dir, msg):

    dir = os.path.join(dir, 'cur')

    mangled_id = mangle_message_id(msg.get_message_id())
    filepattern = '^' + re.escape(mangled_id)  + flagpart +'?$'

    filere = re.compile(filepattern)

    for file in os.listdir(dir):
        if filere.match(file):
            log("Unlinking {}".format(os.path.join(dir, file)))
            if not opts.dry_run:
                os.unlink(os.path.join(dir, file))

def dir_for_tag(tag):
    enc_tag = encode_for_fs (tag)
    return os.path.join(tagroot, enc_tag)

disk_tags = defaultdict(set)
disk_ids = set()

def read_tags_from_disk(rootdir):

    for root, subFolders, files in os.walk(rootdir):
        for filename in files:
            mangled_id = filename.split(':')[0]
            tag = root.split('/')[-2]
            disk_ids.add(mangled_id)
            disk_tags[mangled_id].add(decode_from_fs(tag))

# Main program

parser = argparse.ArgumentParser(description='Sync notmuch tag database to/from link farm')
parser.add_argument('-l','--link-style',choices=['hard','symbolic', 'adaptive'],
                    default='adaptive')
parser.add_argument('-d','--destination',choices=['disk','notmuch'], default='disk')
parser.add_argument('-t','--threshold', default=50000L, type=int)
parser.add_argument('-n','--dry-run', default=False, action='store_true')
parser.add_argument('-v','--verbose', default=False, action='store_true')

parser.add_argument('tagroot')

opts=parser.parse_args()
VERBOSE = opts.verbose

tagroot=opts.tagroot

sync_from_links = (opts.destination == 'notmuch')

read_tags_from_disk(tagroot)

if sync_from_links:
    db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE)
else:
    db = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)

dbtags = filter (lambda tag: not skiptags.match(tag), db.get_all_tags())

if sync_from_links:
    # have to iterate over even untagged messages
    querystr = '*'
else:
    querystr = ' OR '.join(map (lambda tag: 'tag:'+tag,  dbtags))

q_new = notmuch.Query(db, querystr)
q_new.set_sort(notmuch.Query.SORT.UNSORTED)
for msg in q_new.search_messages():

    # silently ignore empty tags
    db_tags = set(filter (lambda tag: tag != '' and not skiptags.match(tag),
                          msg.get_tags()))

    message_id = msg.get_message_id()

    mangled_id = mangle_message_id(message_id)

    disk_ids.discard(mangled_id)

    missing_on_disk = db_tags.difference(disk_tags[mangled_id])
    missing_in_db = disk_tags[mangled_id].difference(db_tags)

    if sync_from_links:
        msg.freeze()

    filename = msg.get_filename()

    if len(missing_on_disk) > 0:
        if opts.link_style == 'adaptive':
            statinfo = os.stat (filename)
            symlink = (statinfo.st_size > opts.threshold)
        else:
            symlink = opts.link_style == 'symbolic'

    for tag in missing_on_disk:

        if sync_from_links:
            log("Removing tag {} from {}".format(tag, message_id))
            if not opts.dry_run:
                msg.remove_tag(tag,sync_maildir_flags=False)
        else:
            tagdir = dir_for_tag (tag)

            if not opts.dry_run:
                mk_tag_dir (tagdir)

            newlink = path_for_msg (tagdir, msg)

            log("Linking {} to {}".format(filename, newlink))
            if not opts.dry_run:
                if symlink:
                    os.symlink(filename, newlink)
                else:
                    os.link(filename, newlink)


    for tag in missing_in_db:
        if sync_from_links:
            log("Adding {} to message {}".format(tag, message_id))
            if not opts.dry_run:
                msg.add_tag(tag,sync_maildir_flags=False)
        else:
            tagdir = dir_for_tag (tag)
            unlink_message(tagdir,msg)

    if sync_from_links:
        msg.thaw()

# everything remaining in disk_ids is a deleted message
# unless we are syncing back to the database, in which case
# it just might not currently have any non maildir tags.

if not sync_from_links:
    for root, subFolders, files in os.walk(tagroot):
        for filename in files:
            mangled_id = filename.split(':')[0]
            if mangled_id in disk_ids:
                os.unlink(os.path.join(root, filename))


db.close()

# currently empty directories are not pruned.
---- 8< ----

Of course, the next step is to sync using this mechanism. Rsync doesn't
really have a concept of history, which basically makes it unusable for
this purpose [1]. Unison doesn't really understand renames, so it gets
confused when you mark a message as read (which might move it from new
to cur, and definitely changes its tags). Bremner suggested
syncmaildir. Syncmaildir doesn't understand links at all. Bremner
suggested that we could use some parts of syncmaildir to implement the
tag syncing we need.

I didn't have anything else going on this weekend so I tried to
prototype the approach. It turns out to be possible to leverage some
parts of syncmaildir. I translated a bunch of smd-client into a new
program, tagsync-client, that links to messages in an existing notmuch
DB. It seems like it's possible to use it in place of the existing
smd-client by putting lines like this in your config:

SMDCLIENT=~/src/tagsync.git/tagsync-client.py
REMOTESMDCLIENT=~/src/tagsync.git/tagsync-client.py

The sequence of commands I ran:

- linksync.py to dump tags to ~/Mail/.notmuch/exported-tags
- smd-pull mail to sync ~/Mail but excluding .notmuch
- notmuch new
- smd-pull tagsync (using the above client) to sync 
~/Mail/.notmuch/exported-tags
- linksync.py to pull tags from ~/Mail/.notmuch/exported-tags

syncmaildir doesn't cope well with drafts, so it might choke on that,
and it doesn't like symlinks (it thinks they're always to directories),
so be sure to run linksync with -l hard.

Here's the script. It's a work in progress; I have only tested it once in one 
direction.

---- 8< ----

#! /usr/bin/env python
import sys
from sys import stdout, stdin, stderr
import stat
import urllib
import hashlib
import re
import os.path
import argparse
import subprocess
import traceback
import notmuch
import time
PROTOCOL_VERSION = "1.1"

# Not reproducing the autoconf logic
XDELTA = 'xdelta'
MDDIFF = 'mddiff'

VERBOSE = False

def log(msg):
    if VERBOSE:
        stderr.write("INFO: "+msg+"\n")

def __error(msg):
    raise ValueError(msg)

def log_tags_and_fail(msg, *args):
    log_tags(*args)
    __error(msg)

def log_internal_error_and_fail(msg, *args):
    log_internal_error_tags(msg, *args)
    __error(msg)

def log_error(msg):
    return stderr.write("ERROR: {}\n".format(msg))

def log_tag(tag):
    return stderr.write("TAGS: {}\n".format(tag))

def log_progress(msg):
    pass

def log_tags(context='unknown', cause='unknown', human=False, *args):
    if human:
        human = "necessary"
    else:
        human = "avoidable"

    suggestions = {}
    suggestions_string = ""
    if len(args):
        suggestions_string = ' suggested-actions({})'.format(' '.join(args))

    return log_tag("error::context({}) probable-cause({}) human-intervention({})".format(
            context, cause, human) + suggestions_string)

def mkdir_p(filename):
    """Maildir-aware mkdir.

    Creates a directory and all parent directories.

    Moreover, if the last component is 'tmp', 'cur' or 'new', the
    others are created too."""

    # The Lua function throws away the last path component if it
    # doesn't end in /. This allows you to just call mkdir_p on any
    # file and a directory for it to live will be created.
    if not filename.endswith('/'):
        filename, _ = os.path.split(filename)

    if not filename.startswith('/'):
        # This path is relative to HOME, and needs to be translated
        # too.
        filename = translate(filename)
        filename = os.path.expanduser('~/'+filename)

    dirname, basename = os.path.split(filename)
    try:
        os.makedirs(filename)
    except OSError:
        pass   # probably "File exists"
    MAILDIR_SUBDIRS = ['tmp', 'cur', 'new']
    if basename in MAILDIR_SUBDIRS:
        for subdir in MAILDIR_SUBDIRS:
            to_create = os.path.join(dirname, subdir)
            if not os.path.exists(to_create):
                os.mkdir(to_create)


class FakeSubprocess(object):
    def __init__(self, init_function):
        self.init_function = init_function
        self.input = None
        self.output = None
        self.pipe_name = None
        self.removed = None
        self.did_write = None
        self.filter = {}

    def readline(self):
        if not self.input:
            log_internal_error_and_fail("read called before write",
                                        "make_slave_filter_process")


        if not self.removed and self.did_write:
            self.removed = True
            rc = self.input.readline()
            os.unlink(self.pipe_name)
            return rc
        else:
            return self.input.readline()

    def write(self, *args):
        if not self.output:
            self.init_function(self.filter)
            self.input = self.filter['inf']
            self.output = self.filter['outf']
            self.pipe_name = self.filter['pipe']

        self.did_write = True
        self.output.write(*args)

    def flush(self):
        self.output.flush()

    def lines(self):
        return self.input.readlines()

def make_slave_filter_process(cmd, seed="no seed"):
    def init(filter):
        if 'inf' not in filter:
            home = os.getenv('HOME')
            user = os.getenv('USER') or 'nobody'
            mangled_name = re.compile('[ %./]').sub('-', seed)
            attempt = 0
            if home:
                base_dir = home + '/.smd/fifo/'
            else:
                base_dir = '/tmp/'

            rc = subprocess.call([MDDIFF, '--mkdir-p', base_dir])
            if rc != 0:
                log_internal_error_and_fail('unable to create directory',
                                            'make_slave_filter_process')

            while True:
                pipe_name = ''.join([base_dir, 'smd-', user, str(int(time.time())),
                                     mangled_name, str(attempt)])
                attempt += 1
                rc = subprocess.call([MDDIFF, '--mkfifo', pipe_name])
                if rc == 0 or attempt > 10:
                    break
            if rc != 0:
                log_internal_error_and_fail('unable to create fifo',
                                            "make_slave_filter_process")

            inferior = cmd(pipe_name)
            filter['inf'] = inferior.stdout
            filter['outf'] = file(pipe_name, 'w')
            filter['pipe'] = pipe_name

    return FakeSubprocess(init)

_translator = None
def set_translator(p):
    global _translator
    translator_filter = make_slave_filter_process(
        lambda pipe: subprocess.Popen(p, stdin=file(pipe), stdout=subprocess.PIPE),
        "translate")
    if p == 'cat':
        _translator = lambda x: x
    else:
        def translator_fn(x):
            translator_filter.write(x + '\n')
            translator_filter.flush()
            line = translator_filter.readline()
            if not line or line.strip() == 'ERROR':
                log_error("Translator {} on input {} gave an error".format(
                        p, x))
                for l in translator_filter.readlines():
                    log_error(l)
                log_tags_and_fail("Unable to translate mailbox",
                                  'translate', 'bad-translator', True)
            if '..' in line:
                log_error("Translator {} on input {} returned a path containing ..".format(
                        p, x))
                log_tags_and_fail('Translator returned a path containing ..',
                                  'translate', 'bad-translator', True)

            return line

        _translator = translator_fn

def translate(x):
    if _translator:
        return _translator(x)
    return x


mddiff_sha_handler = make_slave_filter_process(
            lambda pipe: subprocess.Popen([MDDIFF, pipe], stdout=subprocess.PIPE),
            "sha_file")

def sha_file(name):
    mddiff_sha_handler.write(name+'\n')
    mddiff_sha_handler.flush()

    data = mddiff_sha_handler.readline()
    if data.startswith('ERROR'):
        log_tags_and_fail("Failed to sha1 message: " + (name or "nil"),
                          'sha_file', 'modify-while-update', False, 'retry')

    hsha, bsha = data.split()
    if not hsha or not bsha:
        log_internal_error_and_fail('mddiff incorrect behavior', 'mddiff')

    return hsha, bsha

def exists_and_sha(name):
    if os.path.exists(name):
        h, b = sha_file(name)
        return True, h, b

    return False, False, False


def touch(f):
    try:
        file(f, 'r')
    except IOError:
        try:
            file(f, 'w')
        except IOError:
            log_error('Unable to touch ' + quote(f))
            log_tags("touch", "bad-permissions", True,
                     "display-permissions(" + quote(f) + ")")
            error("Unable to touch a file")

def quote(s):
    return repr(s)


def assert_exists(name):
    assert os.exists(name), "Not found: "+repr(name)

def url_quote(txt):
    return urllib.quote(txt, safe='')

def url_decode(s):
    return urllib.unquote(s)

def log_internal_error_tags(msg, ctx):
    log_tags('internal-error', ctx, True)
    # Blob of "run gnome-open" junk not copied

def receive(inf, outfile):
    try:
        outf = file(outfile, 'w')
    except IOError:
        log_error("Unable to open " + outfile + " for writing.")
        log_error('It may be caused by bad directory permissions, '+
                  'please check.')
        log_tags("receive", "non-writeable-file", True,
                 "display-permissions(" + quote(outfile) +")")
        error("Unable to write incoming data")

    line = inf.readline()
    if not line or line.strip() == "ABORT":
        log_error("Data transmission failed.")
        log_error("This problem is transient, please retry.")
        log_tags_and_fail('server sent ABORT or connection died',
                          "receive", "network", False, "retry")

    # In the Lua version, this is called "len", but that's a builtin
    # in Python
    chunk_len = int(re.compile(r'^chunk (\d+)').match(line).group(1))
    total = chunk_len
    while chunk_len:
        next_chunk = 16384
        if chunk_len < next_chunk:
            next_chunk = chunk_len
        data = inf.read(next_chunk)
        chunk_len -= len(data)
        outf.write(data)

    # Probably not strictly speaking necessary in Python
    outf.close()

    return total

def handshake(dbfile):
    stdout.write("protocol {}\n".format(PROTOCOL_VERSION))
    touch(dbfile)
    sha_output = subprocess.check_output([MDDIFF, '--sha1sum', dbfile])
    db_sha = sha_output.split()[0]
    err_msg = sha_output[sha_output.index(' ')+1:]

    if db_sha == 'ERROR':
        log_internal_error_and_fail('unreadable db file: '+quote(dbfile), 'handshake')

    stdout.write("dbfile {}\n".format(db_sha))
    stdout.flush()

    line = stdin.readline()
    if not line:
        log_error("Network error.")
        log_error("Unable to get any data from the other endpoint.")
        log_error("This problem may be transient, please retry.")
        log_error("Hint: did you correctly setup the SERVERNAME variable")
        log_error("on your client? Did you add an entry for it in your ssh")
        log_error("configuration file?")
        log_tags_and_fail('Network error', "handshake", "network", False, "retry")

    protocol = re.compile('^protocol (.+)$').match(line)
    if not protocol or protocol.group(1) != PROTOCOL_VERSION:
        log_error('Wrong protocol version.')
        log_error('The same version of syncmaildir must be used on '+
                  'both endpoints')
        log_tags_and_fail('Protocol version mismatch', "handshake", "protocol-mismatch", True)

    line = stdin.readline()
    if not line:
        log_error("The client disconnected during handshake")
        log_tags_and_fail('Network error', "handshake", "network", False, "retry")

    sha = re.compile(r'^dbfile (\S+)$').match(line)
    if not sha or sha.group(1) != db_sha:
        log_error('Local dbfile and remote db file differ.')
        log_error('Remove both files and push/pull again.')
        log_tags_and_fail('Database mismatch', "handshake", "db-mismatch", True, "run(rm "+
                 quote(dbfile)+")")

def dbfile_name(endpoint, mailboxes):
    endpoint = endpoint.rstrip('/')
    mailboxes = mailboxes.rstrip('/')
    subprocess.check_call([MDDIFF, '--mkdir-p', os.path.expanduser('~/.smd/')])
    return os.path.expanduser('~/.smd/{}__{}.db.txt'.format(
            endpoint.replace('/', '_'),
            mailboxes.replace('/', '_').replace('%', '_')
            ))

def receive_delta(inf):
    cmds = []
    while True:
        line = inf.readline()
        if line and line.strip() != "END":
            cmds.append(line)

        if not line or line.strip() == "END":
            break

    if line.strip() != "END":
        log_error('Unable to receive a complete diff')
        log_tags("receive-delta", "network", False, "retry")
        error("network error while receiving delta")

    return cmds

def homefy(filename):
    return os.path.expanduser("~/"+filename)

def execute_add(name, hsha, bsha):
    dir, basename = os.path.split(name)
    # The real smd creates symlinks from workarea to the target
    # directory, I dunno why.
    dest = homefy(name)
    ex, hsha_1, bsha_1 = exists_and_sha(dest)
    if ex:
        if hsha_1 != hsha or bsha_1 != bsha:
            log_error("Failed to add {} since a file with the same name".format(
                    dest))
            log_error('exists but its content is different.')
            log_error("Current hash {}/{}, requested hash {}/{}".format(
                    hsha_1, bsha_1, hsha, bsha))
            log_error('To fix this problem you should rename '+dest)
            log_error('Executing `cd; mv -n '+quote(name)+' '+
                      'FIXME: tmp_for' +'` should work.')
            log_tags("mail-addition", "concurrent-mailbox-edit", True,
                     )
                     #mk_act("mv", name))
            return False

        return True   # already there
    if ':2,' in basename:
        basename = basename[:basename.index(':2,')]
    filenames = original_message_filenames(basename)
    for filename in filenames:
        orig_exists, hsha_orig, bsha_orig = exists_and_sha(filename)
        assert orig_exists
        if hsha_orig == hsha or bsha_orig == bsha:
            os.link(filename, dest)
            return True


    log_error("Something seriously wrong here: we tried to link {}".format(
            filename))
    log_error("to {} but the hashes were wrong. We wanted {}/{}".format(
            dest, hsha, bsha))
    log_error("but we didn't see that in {}".format(filenames))
    log_tags_and_fail('Mail corpus wrong')
    # FIXME: How do we decide whether to use symlinks or not?
    # Seems like syncmaildir can't cope with symlinks, so let's just
    # always use hard links
    return False

def execute_delete(name, hsha, bsha):
    name = homefy(name)
    ex, hsha_1, bsha_1 = exists_and_sha(name)
    assert ex
    assert hsha_1 == hsha
    assert bsha_1 == bsha

    os.unlink(name)
    return True

def execute_copy(name_src, hsha, bsha, name_tgt):
    name_src = homefy(name_src)
    name_tgt = homefy(name_tgt)
    ex_src, hsha_src, bsha_src = exists_and_sha(name_src)
    ex_tgt, hsha_tgt, bsha_tgt = exists_and_sha(name_tgt)

    # Not reproducing all logic
    assert ex_src
    assert not ex_tgt

    assert hsha == hsha_src
    assert bsha == bsha_src
    if stat.S_ISLNK(os.stat(name_src).st_mode):
        link_tgt = os.readlink(name_src)
        os.symlink(link_tgt, name_tgt)
    else:
        os.link(name_src, name_tgt)
    return True

def execute_error(msg):
    log_error('mddiff failed: '+msg)
    if msg.startswith("Unable to open directory"):
        log_tags("mddiff", "directory-disappeared", false)
    else:
        log_tags("mddiff", "unknown", true)

    # return (trace(false))
    return False


def execute(cmd):
    """The main switch, dispatching actions."""
    opcode = cmd.split()[0]

    if opcode == "ADD":
        name, hsha, bsha = re.compile(r'^ADD (\S+) (\S+) (\S+)$').match(cmd).groups()
        name = url_decode(name)
        mkdir_p(name)
        return execute_add(name, hsha, bsha)

    elif opcode == "DELETE":
        name, hsha, bsha = re.compile(r'^DELETE (\S+) (\S+) (\S+)$').match(cmd).groups()
        name = url_decode(name)
        mkdir_p(name)
        return execute_delete(name, hsha, bsha)

    elif opcode == "COPY":
        name_src, hsha, bsha, name_tgt = re.compile(
            r'COPY (\S+) (\S+) (\S+) TO (\S+)$').match(cmd).groups()
        name_src = url_decode(name_src)
        name_tgt = url_decode(name_tgt)
        mkdir_p(name_src)
        mkdir_p(name_tgt)
        return execute_copy(name_src, hsha, bsha, name_tgt)

    elif opcode in ['REPLACEHEADER', 'COPYBODY', 'REPLACE']:
        log_internal_error_and_fail(opcode + ' was called: ' + cmd)
        return False

    elif opcode == "ERROR":
        msg = cmd[cmd.index(' ')+1:]
        return execute_error(msg)

    else:
        error("Unknown opcode " + opcode)
        return False

def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument('-v', '--verbose', action='store_true', default=False)
    parser.add_argument('-d', '--dry-run', action='store_true', default=False)
    parser.add_argument('-t', '--translator', type=str, default='cat')
    parser.add_argument('endpoint')
    parser.add_argument('mailboxes')

    opts = parser.parse_args()

    set_translator(opts.translator)
    read_message_ids()

    dbfile = dbfile_name(opts.endpoint, opts.mailboxes)
    xdelta = dbfile + '.xdelta'
    newdb = dbfile + '.new'

    if opts.mailboxes[0] == '/':
        log_error("Absolute paths are not supported: " + opts.mailboxes)
        log_tags_and_fail("Absolute path detected", "main", "mailbox-has--absolute-path", True)

    handshake(dbfile)
    commands = receive_delta(stdin)
    for cmd in commands:
        try:
            rc = execute(cmd)
            # Just wrap the whole thing in try-except to abort "cleanly"
        except Exception as e:
            log_error("Got an exception when processing {}: {}".format(cmd.strip(), str(e)))
            log_error(traceback.format_exc())
            rc = False
        if not rc:
            stdout.write('ABORT\n')
            stdout.flush()
            sys.exit(3)

        # if len(get_full_email_queue) > queue_max_len:
        #    process_pending_queue()

    # some commands may still be in the queue, we fire them now
    # process_pending_queue()

    # we commit and update the dbfile
    stdout.write('COMMIT\n')
    stdout.flush()
    receive(stdin, xdelta)

    rc = subprocess.call([XDELTA, 'patch', xdelta, dbfile, newdb])
    if rc != 0 and rc != 256:
        log_error('Unable to apply delta to dbfile.')
        stdout.write('ABORT\n')
        stdout.flush()
        sys.exit(4)

    try:
        os.rename(newdb, dbfile)
    except OSError:
        log_error('Unable to rename ' + newdb + ' to ' + dbfile)
        stdout.write('ABORT\n')
        stdout.flush()
        sys.exit(5)

    os.unlink(xdelta)
    stdout.write('DONE\n')
    stdout.flush()

    #log_tag('stats::new-mails(' + statistics.added +
    #'), del-mails(' + statistics.removed + ')')

CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-'

encode_re = '([^{0}])'.format(CHARSET)

def encode_one_char(match):
    return('%{:02x}'.format(ord(match.group(1))))

def encode_for_fs(str):
    return re.sub(encode_re,encode_one_char, str,0)

def mangle_message_id(msg_id):
    """
    Return a mangled version of the message id, suitable for use as a filename.
    """
    MAX_LENGTH = 143
    FLAGS_LENGTH = 8    # :2,S...??
    encoded = encode_for_fs(msg_id)
    if len(encoded) < MAX_LENGTH - FLAGS_LENGTH:
        return encoded

    SHA_LENGTH = 8
    TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH
    PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2
    prefix = encoded[:PREFIX_LENGTH]
    suffix = encoded[-SUFFIX_LENGTH:]
    sha = hashlib.sha256()
    sha.update(encoded)
    return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH]

MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES = {}
DB = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY)
def read_message_ids():
    # We can't base this on tags at all because tags aren't applied yet
    querystr = '*'

    q_new = notmuch.Query(DB, querystr)
    q_new.set_sort(notmuch.Query.SORT.UNSORTED)
    for msg in q_new.search_messages():
        mangled_id = mangle_message_id(msg.get_message_id())
        fiter = msg.get_filenames()
        # list(fiter) gives me a NotInitializedException????
        filenames = []
        while True:
            try:
                filename = next(fiter)
                filenames.append(filename)
            except StopIteration:
                break

        MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_id] = filenames

def original_message_filenames(mangled_filename):
    if mangled_filename not in MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES:
        log_error("{} not in notmuch. Trying to tag nonexistant message?".format(
                mangled_filename))
    return MESSAGE_MANGLED_FILENAMES_TO_ORIGINAL_FILENAMES[mangled_filename]

if __name__ == '__main__':
    try:
        main()
    except Exception as e:
        log_error(str(e))
        log_error(traceback.format_exc())
        sys.exit(6)
_______________________________________________
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch

Reply via email to