David Bremner <da...@tethera.net> writes: > It's still a prototype, and there is not much error checking, and there > are certain issues not dealt with at all (the ones I thought about are > commented).
Hi everyone, I'm very interested in running notmuch on all my laptops and having my mail and its tags be synchronized for me, so at Bremner's direction on IRC, I played around with this script a little. At first it wouldn't run on my computer; the script uses message IDs as filenames, which can be quite long, whereas I keep my mail in my $HOME, which is on an ecryptfs filesystem, and has a filename limit of 143 characters. I've modified the script so that it would run by mangling filenames, which is irreversible (the original tried to encode/decode filenames reversibly). Then I got a little carried away, adding --verbose and --dry-run options as well as removing a couple trailing semicolons. Here's my version, in case it should interest anyone else.
# Copyright 2013, David Bremner <da...@tethera.net> # Licensed under the same terms as notmuch. import notmuch import re import os, errno import sys from collections import defaultdict import argparse import hashlib # skip automatic and maildir tags skiptags = re.compile(r"^(attachement|signed|encrypted|draft|flagged|passed|replied|unread)$") # some random person on stack overflow suggests: def mkdir_p(path): try: os.makedirs(path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise VERBOSE = False def log(msg): if VERBOSE: print(msg) CHARSET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+_@=.,-' encode_re = '([^{0}])'.format(CHARSET) decode_re = '[%]([0-7][0-9A-Fa-f])' def encode_one_char(match): return('%{:02x}'.format(ord(match.group(1)))) def encode_for_fs(str): return re.sub(encode_re,encode_one_char, str,0) def mangle_message_id(msg_id): """ Return a mangled version of the message id, suitable for use as a filename. """ MAX_LENGTH = 143 FLAGS_LENGTH = 8 # :2,S...?? encoded = encode_for_fs(msg_id) if len(encoded) < MAX_LENGTH - FLAGS_LENGTH: return encoded SHA_LENGTH = 8 TRUNCATED_ID_LENGTH = MAX_LENGTH - SHA_LENGTH - FLAGS_LENGTH PREFIX_LENGTH = SUFFIX_LENGTH = (TRUNCATED_ID_LENGTH - 3) // 2 prefix = encoded[:PREFIX_LENGTH] suffix = encoded[-SUFFIX_LENGTH:] sha = hashlib.sha256() sha.update(encoded) return prefix + '...' + suffix + sha.hexdigest()[:SHA_LENGTH] def decode_one_char(match): return chr(int(match.group(1),16)) def decode_from_fs(str): return re.sub(decode_re,decode_one_char, str, 0) def mk_tag_dir(tagdir): mkdir_p (os.path.join(tagdir, 'cur')) mkdir_p (os.path.join(tagdir, 'new')) mkdir_p (os.path.join(tagdir, 'tmp')) flagpart = '(:2,[^:]*)' flagre = re.compile(flagpart + '$') def path_for_msg (dir, msg): filename = msg.get_filename() flagsmatch = flagre.search(filename) if flagsmatch == None: flags = '' else: flags = flagsmatch.group(1) return os.path.join(dir, 'cur', mangle_message_id(msg.get_message_id()) + flags) def unlink_message(dir, msg): dir = os.path.join(dir, 'cur') filepattern = mangle_filename_for_fs(msg.get_message_id()) + flagpart +'?$' filere = re.compile(filepattern) for file in os.listdir(dir): if filere.match(file): log("Unlinking {}".format(os.path.join(dir, file))) if not opts.dry_run: os.unlink(os.path.join(dir, file)) def dir_for_tag(tag): enc_tag = encode_for_fs (tag) return os.path.join(tagroot, enc_tag) disk_tags = defaultdict(set) disk_ids = set() def read_tags_from_disk(rootdir): for root, subFolders, files in os.walk(rootdir): for filename in files: mangled_id = filename.split(':')[0] tag = root.split('/')[-2] disk_ids.add(mangled_id) disk_tags[mangled_id].add(decode_from_fs(tag)) # Main program parser = argparse.ArgumentParser(description='Sync notmuch tag database to/from link farm') parser.add_argument('-l','--link-style',choices=['hard','symbolic', 'adaptive'], default='adaptive') parser.add_argument('-d','--destination',choices=['disk','notmuch'], default='disk') parser.add_argument('-t','--threshold', default=50000L, type=int) parser.add_argument('-n','--dry-run', default=False, action='store_true') parser.add_argument('-v','--verbose', default=False, action='store_true') parser.add_argument('tagroot') opts=parser.parse_args() VERBOSE = opts.verbose tagroot=opts.tagroot sync_from_links = (opts.destination == 'notmuch') read_tags_from_disk(tagroot) if sync_from_links: db = notmuch.Database(mode=notmuch.Database.MODE.READ_WRITE) else: db = notmuch.Database(mode=notmuch.Database.MODE.READ_ONLY) dbtags = filter (lambda tag: not skiptags.match(tag), db.get_all_tags()) querystr = ' OR '.join(map (lambda tag: 'tag:'+tag, dbtags)) q_new = notmuch.Query(db, querystr) q_new.set_sort(notmuch.Query.SORT.UNSORTED) for msg in q_new.search_messages(): # silently ignore empty tags db_tags = set(filter (lambda tag: tag != '' and not skiptags.match(tag), msg.get_tags())) message_id = msg.get_message_id() mangled_id = mangle_message_id(message_id) disk_ids.discard(mangled_id) missing_on_disk = db_tags.difference(disk_tags[mangled_id]) missing_in_db = disk_tags[mangled_id].difference(db_tags) if sync_from_links: msg.freeze() filename = msg.get_filename() if len(missing_on_disk) > 0: if opts.link_style == 'adaptive': statinfo = os.stat (filename) symlink = (statinfo.st_size > opts.threshold) else: symlink = opts.link_style == 'symbolic' for tag in missing_on_disk: if sync_from_links: log("Removing tag {} from {}".format(tag, message_id)) if not opts.dry_run: msg.remove_tag(tag,sync_maildir_flags=False) else: tagdir = dir_for_tag (tag) if not opts.dry_run: mk_tag_dir (tagdir) newlink = path_for_msg (tagdir, msg) log("Linking {} to {}".format(filename, newlink)) if not opts.dry_run: if symlink: os.symlink(filename, newlink) else: os.link(filename, newlink) for tag in missing_in_db: if sync_from_links: log("Adding {} to message {}".format(tag, message_id)) if not opts.dry_run: msg.add_tag(tag,sync_maildir_flags=False) else: tagdir = dir_for_tag (tag) unlink_message(tagdir,msg) if sync_from_links: msg.thaw() # everything remaining in disk_ids is a deleted message # unless we are syncing back to the database, in which case # it just might not currently have any non maildir tags. if not sync_from_links: for root, subFolders, files in os.walk(tagroot): for filename in files: mangled_id = filename.split(':')[0] if mangled_id in disk_ids: os.unlink(os.path.join(root, filename)) db.close() # currently empty directories are not pruned.
_______________________________________________ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch