Hi, I did a quick review.
This feature would obviously require documentation in offlineimap.conf and this must be marked EXPERIMENTAL. On Mon, Dec 19, 2016 at 09:29:29AM +0000, lkcl wrote: > diff --git a/offlineimap/folder/LocalStatusLMDB.py > b/offlineimap/folder/LocalStatusLMDB.py > index e69de29..3108595 100644 > --- a/offlineimap/folder/LocalStatusLMDB.py > +++ b/offlineimap/folder/LocalStatusLMDB.py > @@ -0,0 +1,299 @@ > +# Local status cache virtual folder: LMDB backend > +# Copyright (C) 2009-2016 Stewart Smith and contributors. > +# Copyright (C) 2016 Luke Kenneth Casson Leighton <[email protected]> We decided to use "and contributors" to avoid adding new copyright lines. If this new file is yours, I'd suggest: # Copyright (C) 2016 Luke Kenneth Casson Leighton and contributors. to keep things simple. If you really want to keep Stewart copyright # Copyright (C) 2009-2016 Stewart Smith and contributors. is good, too. > +# > +# This program is free software; you can redistribute it and/or modify > +# it under the terms of the GNU General Public License as published by > +# the Free Software Foundation; either version 2 of the License, or > +# (at your option) any later version. > +# > +# This program is distributed in the hope that it will be useful, > +# but WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > +# GNU General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with this program; if not, write to the Free Software > +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 > USA > + > +import os > +import lmdb Python 2.7.10 (default, Nov 9 2016, 23:16:09) [GCC 4.9.3] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import lmdb Traceback (most recent call last): File "<stdin>", line 1, in <module> ImportError: No module named lmdb >>> We need to have this dependency made optional. Notice this is likely easier to achieve while importing/using LocalStatusLMDB in repository/LocalStatus.py. > +from sys import exc_info > + > +import six > +try: > + # Use ultra-fast json library if available (much faster, > + # see https://blog.hartleybrody.com/python-serialize/) > + import ujson as json > +except ImportError: > + # not available, fall back to standard python json library (slower) > + import json > + > +from .Base import BaseFolder > + > + > +class LocalStatusLMDBFolder(BaseFolder): > + """ LocalStatus backend implemented with an LMDB database > + """ """LocalStatus backend implemented with an LMDB database.""" (style) > + > + # Current version of our db format. > + cur_version = 1 > + > + def __init__(self, name, repository): > + self.sep = '.' # Needs to be set before super().__init__(). > + super(LocalStatusLMDBFolder, self).__init__(name, repository) > + self.root = repository.root > + self.filename = os.path.join(self.getroot(), > self.getfolderbasename()) > + > + self._newfolder = False # Flag if the folder is new. > + > + dirname = os.path.dirname(self.filename) > + if not os.path.exists(dirname): > + os.makedirs(dirname) > + if not os.path.isdir(dirname): > + raise UserWarning("LMDB database path '%s' is not a directory."% > + dirname) Race condition. Not signicant, though. > + > + self._env = None > + > + def openfiles(self): > + """ Open database, check it, upgrade if needed > + """ > + > + # Try to establish connection > + try: > + self._env = lmdb.open(self.filename, max_dbs=10) > + except lmdb.Error as e: > + # Operation had failed. > + six.reraise(UserWarning, > + UserWarning( > + "cannot open database file '%s': %s.\nYou might" > + " want to check the rights to that file and if " > + "it cleanly opens with the 'lmdb<3>' command"% > + (self.filename, e)), > + exc_info()[2]) > + > + with self._env.begin() as txn: I wonder it's missing of locks. This class will be *instanciated* and used more than once in different threads. In order to avoid repeating the same code/patterns in the backends, I guess the best would be to add a new layer (class) to serialize the I/O on top of them. Not sure about that, though. > + # Test if db version is current enough and if db is readable. > + try: > + db = self.env.open_db('metadata') > + with self._env.begin(db=db) as txn: > + cursor = txn.cursor() > + version = int(cursor.get('db_version')) > + except: > + # db file missing or corrupt, recreate it. > + self.__create_db() > + else: > + # Fetch db version and upgrade if needed. > + if version < LocalStatusLMDBFolder.cur_version: > + self.__upgrade_db(version) > + > + def purge(self): > + """ Remove any pre-existing database. Do not call in dry-run mode. > + """ > + > + try: > + os.unlink(self.filename) > + except OSError as e: > + self.ui.debug('', "could not remove file %s: %s"% > + (self.filename, e)) > + > + def storesmessages(self): > + return False > + > + def getfullname(self): > + return self.filename > + > + # Interface from LocalStatusFolder > + def isnewfolder(self): > + return self._newfolder > + > + def __upgrade_db(self, from_ver): > + """ Upgrade the lmdb format from version 'from_ver' to current > + """ > + > + # Future version upgrades come here... > + # if from_ver <= 1: ... #upgrade from 1 to 2 > + # if from_ver <= 2: ... #upgrade from 2 to 3 > + # if from_ver <= 3: ... #upgrade from 3 to 4 > + > + > + def __create_db(self): > + """Create a new db file. > + """ > + self.ui._msg('Creating new Local Status db for %s:%s'% > + (self.repository, self)) > + self._metadata_db = self._env.open_db('metadata') > + self._status_db = self._env.open_db('status') > + with self._env.begin(write=True) as txn: > + txn.put('db_version', str(LocalStatusLMDBFolder.cur_version), > + db=self._metadata_db) > + self._newfolder = True > + > + # Interface from BaseFolder > + def msglist_item_initializer(self, uid): > + # XXX not used (there's no point) > + return { 'uid': uid, > + 'flags': set(), > + 'labels': set(), Trailing spaces for above two lines. > + 'time': 0, > + 'mtime': 0 > + } > + > + # Interface from BaseFolder > + def cachemessagelist(self): > + """ caches in memory all messages in the lmdb status_db > + """ > + self.dropmessagelistcache() > + with self._env.begin(db=self._status_db) as txn: > + for key, val in txn.cursor(): > + uid = int(key) > + print "cachemsglist", uid, val > + # if flags or labels are empty they're stored as null > + # so subst an empty tuple, converts to empty set > + (flags, labels, _time, mtime) = json.loads(val) > + msg = { 'time': _time, > + 'mtime': mtime, > + 'flags': set(flags or () ), ^ > + 'labels': set(labels or () ) ^ 'labels': set(labels or ()) > + } > + self.messagelist[uid] = msg > + > + def closefiles(self): > + self._env.close() > + self._env = None > + self._metadata_db = None > + self._status_db = None > + > + # Interface from LocalStatusFolder > + def save(self): > + pass > + # Noop. every transaction commits to database! > + > + def _save_msg(self, txn, uid, msg): > + # take relevant stuff from msg, convert to tuple. > + # empty sets are saved as null > + msg = (msg['flags'] or None, # empty set evaluates True => save space > + msg['labels'] or None, # ditto > + msg['time'], > + msg['mtime']) > + txn.put(str(uid), json.dumps(msg), db=self._status_db) > + > + def saveall(self): > + """ Saves the entire messagelist to the database. > + """ > + with self._env.begin(write=True) as txn: > + for uid in self.messagelist: > + self._save_msg(txn, uid, self.messagelist[uid]) > + > + # Interface from BaseFolder > + def savemessage(self, uid, content, flags, rtime, mtime=0, labels=None): > + """ Writes a new message, with the specified uid. > + > + See folder/Base for detail. Note that savemessage() does not > + check against dryrun settings, so you need to ensure that > + savemessage is never called in a dryrun mode. > + """ > + > + if uid < 0: > + # We cannot assign a uid. > + return uid > + > + if self.uidexists(uid): # Already have it. > + self.savemessageflags(uid, flags) > + return uid > + > + msg = {'uid': uid, > + 'flags': flags, > + 'time': rtime, > + 'mtime': mtime, Trailing spaces. > + 'labels': labels or set() > + } > + self.messagelist[uid] = msg > + > + with self._env.begin(write=True) as txn: > + self._save_msg(txn, uid, msg) > + > + return uid > + > + # Interface from BaseFolder > + def savemessageflags(self, uid, flags): > + assert self.uidexists(uid) > + msg = self.messagelist[uid] > + msg['flags'] = flags > + > + with self._env.begin(write=True) as txn: > + self._save_msg(txn, uid, msg) > + > + def getmessageflags(self, uid): > + return self.messagelist[uid]['flags'] > + > + def savemessagelabels(self, uid, labels, mtime=None): > + msg = self.messagelist[uid] > + msg['labels'] = labels > + if mtime: > + msg['mtime'] = mtime > + > + with self._env.begin(write=True) as txn: > + self._save_msg(txn, uid, msg) > + > + def _save_by_uids(self, uids): > + with self._env.begin(write=True) as txn: > + for uid in uids: # dict iterates keys, list iterates members > + self._save_msg(txn, uid, self.messagelist[uid]) > + > + def savemessageslabelsbulk(self, labels): > + """ Saves labels from a dictionary in a single database operation. > + """ > + for uid in labels: > + self.messagelist[uid]['labels'] = l > + self._save_by_uids(labels) # use for on dict to get uids as keys > + > + def addmessageslabels(self, uids, labels): > + for uid in uids: > + self.messagelist[uid]['labels'].update(labels) > + self._save_by_uids(uids) > + > + def deletemessageslabels(self, uids, labels): > + for uid in uids: > + self.messagelist[uid]['labels'] -= labels > + self._save_by_uids(uids) # use for on dict to get uids as keys > + > + def getmessagelabels(self, uid): > + return self.messagelist[uid]['labels'] > + > + def savemessagesmtimebulk(self, mtimes): > + """ Saves mtimes from a dictionary in a single database operation. > + """ > + > + for uid in mtimes: > + mt = mtimes[uid] > + self.messagelist[uid]['mtime'] = mt > + self._save_by_uids(mtimes) # use for on dict to get uids as keys > + > + def getmessagemtime(self, uid): > + return self.messagelist[uid]['mtime'] > + > + # Interface from BaseFolder > + def deletemessage(self, uid): > + if not uid in self.messagelist: > + return > + with self._env.begin(write=True) as txn: > + txn.drop(self._status_db, str(uid)) > + del self.messagelist[uid] > + > + # Interface from BaseFolder > + def deletemessages(self, uidlist): > + """ Delete list of UIDs from status cache > + """ > + > + # Weed out ones not in self.messagelist > + uidlist = [uid for uid in uidlist if uid in self.messagelist] > + if len(uidlist) == 0: > + return > + with self._env.begin(write=True) as txn: > + for uid in uidlist: > + txn.drop(self._status_db, str(uid)) > + for uid in uidlist: > + del self.messagelist[uid] > + > diff --git a/offlineimap/repository/LocalStatus.py > b/offlineimap/repository/LocalStatus.py > index f23020f..b21f07b 100644 > --- a/offlineimap/repository/LocalStatus.py > +++ b/offlineimap/repository/LocalStatus.py > @@ -19,6 +19,7 @@ import os > > from offlineimap.folder.LocalStatus import LocalStatusFolder > from offlineimap.folder.LocalStatusSQLite import LocalStatusSQLiteFolder > +from offlineimap.folder.LocalStatusLMDB import LocalStatusLMDBFolder > from offlineimap.repository.Base import BaseRepository > > class LocalStatusRepository(BaseRepository): > @@ -32,6 +33,11 @@ class LocalStatusRepository(BaseRepository): > 'root': os.path.join(account.getaccountmeta(), > 'LocalStatus-sqlite') > } > > + self.backends['lmdb'] = { > + 'class': LocalStatusLMDBFolder, > + 'root': os.path.join(account.getaccountmeta(), > 'LocalStatus-lmdb') > + } > + > self.backends['plain'] = { > 'class': LocalStatusFolder, > 'root': os.path.join(account.getaccountmeta(), 'LocalStatus') > -- Nicolas Sebrecht _______________________________________________ OfflineIMAP-project mailing list: [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/offlineimap-project OfflineIMAP homepages: - https://github.com/OfflineIMAP - http://offlineimap.org
