Hi,

we are doing backups of our imap mailboxes using
simple unix tools like find + afio. Sometimes, when a mailbox
is still locked, deleted files were not expunged immediately
and therefore ended up in the backup.

During testing we found an additional problem with re-creating
a deleted folder and fixed this one in the same run, too.

Attached you'll find a few tools for cyrus imapd 2.4(.17):

1. cyr_getdeleted: Lists deleted IMAP messages for a given mboxspec.
    (+ additional patch to add it to the build system)

2. A patch for cyrus to create a ".deleted" file when a folder
  is marked for deletion. If the folder is re-created before it
  was removed from disk and we find the .deleted file,
  we clear out all *stale* messages.

3. find_wrapper.py: A wrapper around find(1) to automatically
  exclude deleted messages and folders. Useful for backup shell scripts.
  Makes use of the ".deleted" file, too.
 
I'm putting this out here in case these additions might come in handy
for other people running cyrus imapd 2.4. Most of those tools and patches
were developed by my colleague Samir Aguiar.

As written in the "Mailbox deletion race: folders and files are never deleted"
email[1] , we'll especially revisit issue 2. when testing cyrus 2.5.x

Have a nice day,
Thomas

[1] http://lists.andrew.cmu.edu/pipermail/cyrus-devel/2015-February/003133.html
/* Same license as the cyrus imapd 
   (c) 2015 Intra2net AG - Samir Aguiar
*/

#include <config.h>

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "global.h"
#include "cyr_lock.h"
#include "exitcodes.h"
#include "mboxlist.h"
#include "mailbox.h"

/* config.c stuff */
const int config_need_data = 0;

/* current namespace */
static struct namespace deleted_namespace;

static int quiet = 1;

#define log_msg(fmt, ...) \
        do { \
            if (!quiet) \
            { \
                fprintf (stdout, fmt, __VA_ARGS__); \
                fflush(stdout); \
            } \
        } while(0)

#define log_msg_v(fmt, ...) \
        do { \
            fprintf (stdout, fmt, __VA_ARGS__); \
            fflush(stdout); \
        } while(0)

#define fatal_quit(msg) fprintf(stderr, msg); exit(EXIT_FAILURE);

/*
 * Callback function that gets called for each mailbox found.
 *
 * Returns the name of the deleted and expunged files set to be
 * unlinked when the last lock of the mailbox is released.
 *
 * If the mailbox itself was deleted, it returns only its name
 * because the files within are also going to be unlinked later.
 *
 * We can't return a nonzero value because in that case mboxlist_findall()
 * stops searching.
 */
int get_expunged_data(const char *mailbox_name, int matchlen __attribute__((unused)),
                      int maycreate __attribute__((unused)))
{
    struct mailbox *mailbox = NULL;

    /* Load with a shared lock */
    if (mailbox_open_irl(mailbox_name, &mailbox) != 0)
    {
        log_msg("Failed to open mailbox %s\n", mailbox_name);
        return 0;
    }

    if (mailbox->mbtype & MBTYPE_REMOTE)
    {
        log_msg("Skipping remote mailbox: %s", mailbox_name);
        return 0;
    }

    struct index_record *records = NULL;
    struct index_record *record;
    uint32_t recno;
    int alloc = 0;
    int num = 0;

    for (recno = 1; recno <= mailbox->i.num_records; recno++) {
        /* Pre-allocate more space */
        if (alloc <= num) {
            alloc += 64;
            records = xrealloc(records, sizeof(struct index_record) * alloc);
        }
        record = &records[num];

        if (mailbox_read_index_record(mailbox, recno, record))
            continue;

        if (record->system_flags & FLAG_UNLINKED)
        {
            const char *record_path = mailbox_message_fname(mailbox, record->uid);

            // some files stay listed even after unlinked
            if (access(record_path, R_OK) == 0)
                log_msg_v("%s\n", record_path);
        }

        num++;
    }

    const char *path;
    path = mboxname_datapath(mailbox->part, mailbox->name, 0);
    if (path)
        log_msg("Done with mailbox %s\n", path);

    free(records);
    mailbox_close(&mailbox);

    return 0;

}

void usage(const char *name)
{
    log_msg_v("usage: %s [-v] <mailbox path>\n"
              "Print all files to be unlinked from the mailboxes in the given path.\n"
              "Those files correspond to messages that were deleted and expunged\n"
              "and will be unlinked when all connections to the mailbox are closed.\n\n"
              "<mailbox path> has to be a relative path, e.g. user/admin\n\n"
              "Valid arguments:\n"
              "-v\t\tverbose, log every action and not only filenames\n"
              "-h\t\thelp, print this message\n", name);
    exit(EXIT_SUCCESS);
}

int main(int argc, char **argv)
{
    struct mboxlist_entry mbentry;
    struct mailbox *mailbox = NULL;
    int rc;
    char buf[MAX_MAILBOX_PATH];

    if ((geteuid()) == 0 && (become_cyrus() != 0))
    {
        fatal_quit("must run as the Cyrus user");
    }

    int option;
    while ((option = getopt(argc, argv, "hv")) != EOF)
    {
        switch (option)
        {
            case 'v':
                quiet = 0;
                break;
            case 'h':
            default:
                usage(argv[0]);
                return 0;
        }
    }

    if (optind >= argc)
    {
        usage(argv[0]);
        return 0;
    }

    /* No alt_config and no flags */
    cyrus_init(NULL, "cyr_getdeleted", 0);

    mboxlist_init(0);
    mboxlist_open(NULL);

    /* Set namespace -- force standard (internal) */
    if ((rc = mboxname_init_namespace(&deleted_namespace, 1)) != 0)
    {
        fatal_quit(error_message(rc));
    }

    /*
     * The mailbox path has to include the folder name
     * and may not be a absolute path, but relative:
     * e.g. user/admin
     */
    (*deleted_namespace.mboxname_tointernal)(&deleted_namespace, argv[optind],
                                             NULL, buf);

    /*
     * Search for mailboxes whose names start with the string contained
     * in buf and for each result call get_expunged_data().
     */
    (*deleted_namespace.mboxlist_findall)(&deleted_namespace, buf, 1, 0, 0,
                                          get_expunged_data, NULL);

    /* Wrap it up */
    mboxlist_close();
    mboxlist_done();
    cyrus_done();

    return 0;
}
diff -ur a/imap/Makefile.in b/imap/Makefile.in
--- a/imap/Makefile.in	2015-02-02 13:57:28.945536670 +0100
+++ b/imap/Makefile.in	2015-02-02 13:57:11.629536081 +0100
@@ -114,7 +114,7 @@
 SERVICE=../master/service.o
 SERVICETHREAD=../master/service-thread.o
 
-PROGS = imapd lmtpd pop3d \
+PROGS = imapd lmtpd pop3d cyr_getdeleted \
 	fud smmapd reconstruct quota mbpath ipurge cyr_dbtool cyr_synclog \
 	cyrdump chk_cyrus cvt_cyrusdb deliver ctl_mboxlist annotate_restore \
 	ctl_deliver ctl_cyrusdb squatter mbexamine cyr_expire arbitron \
@@ -303,6 +303,10 @@
 	$(CC) $(LDFLAGS) -o $@ cyr_expire.o $(CLIOBJS) \
 	libimap.a $(DEPLIBS) $(LIBS)
 
+cyr_getdeleted: cyr_getdeleted.o $(CLIOBJS) libimap.a $(DEPLIBS)
+	$(CC) $(LDFLAGS) -o $@ cyr_getdeleted.o $(CLIOBJS) \
+	libimap.a $(DEPLIBS) $(LIBS)
+
 fetchnews: fetchnews.o $(CLIOBJS) libimap.a $(DEPLIBS)
 	$(CC) $(LDFLAGS) -o \
 	 $@ fetchnews.o $(CLIOBJS) libimap.a $(DEPLIBS) $(LIBS)
diff -u -r -p cyrus-imapd-2.4.17/imap/mailbox.c cyrus-imapd.remove_on_create/imap/mailbox.c
--- cyrus-imapd-2.4.17/imap/mailbox.c	2015-02-26 14:58:42.620755306 +0100
+++ cyrus-imapd.remove_on_create/imap/mailbox.c	2015-02-26 11:54:12.302851042 +0100
@@ -111,6 +111,7 @@ static struct mailboxlist *open_mailboxe
 
 static int mailbox_index_unlink(struct mailbox *mailbox);
 static int mailbox_index_repack(struct mailbox *mailbox);
+static void mailbox_delete_files(char *path);
 
 static struct mailboxlist *create_listitem(const char *name)
 {
@@ -2801,6 +2802,39 @@ int mailbox_create(const char *name,
 	goto done;
     }
 
+    /* in case we recreate a removed folder that still exists on disk,
+     * make sure we clear the .deleted flag */
+    char *mailbox_dir = strdup(fname);
+    char *p;
+    if (mailbox_dir && (p = strrchr(mailbox_dir, '/')) != NULL && p > mailbox_dir)
+    {
+        *p = '\0';
+
+        const char *flag_filename = "/.deleted";
+        char *del_flag_file = malloc(sizeof(char) *
+                                    (strlen(mailbox_dir) + strlen(flag_filename) + 1));
+
+        if (del_flag_file)
+        {
+            strcpy(del_flag_file, mailbox_dir);
+            strcat(del_flag_file, flag_filename);
+
+            if (unlink(del_flag_file) == 0)
+            {
+                syslog(LOG_INFO, "Found .deleted flag. Deleting any stale messages in %s", mailbox_dir);
+                mailbox_delete_files(mailbox_dir);
+            }
+
+            free(del_flag_file);
+        }
+        else
+        {
+            syslog(LOG_INFO, "Not checking if mailbox created (%s) "
+                             "had been previously deleted (malloc failed)", mailbox_dir);
+        }
+    }
+    free(mailbox_dir);
+
     fname = mailbox_meta_fname(mailbox, META_INDEX);
     if (!fname) {
 	syslog(LOG_ERR, "IOERROR: Mailbox name too long (%s)", mailbox->name);
@@ -2964,6 +2998,42 @@ int mailbox_delete(struct mailbox **mail
     mailbox_index_dirty(mailbox);
     mailbox->i.options |= OPT_MAILBOX_DELETED;
 
+    /*
+     * Creates a dummy file in the directory to indicate that this folder
+     * is going to be removed later.
+     */
+    const char *flag_filename = "/.deleted";
+    const char *mbpath = mboxname_datapath(mailbox->part, mailbox->name, 0);
+
+    if (mbpath)
+    {
+        char *del_flag_file = calloc(strlen(mbpath) + strlen(flag_filename) + 1, sizeof(char));
+
+        if (del_flag_file != NULL)
+        {
+            strcpy(del_flag_file, mbpath);
+            strcat(del_flag_file, flag_filename);
+
+            FILE *flag_file = fopen(del_flag_file, "w");
+            if(flag_file)
+            {
+                syslog(LOG_INFO, "Dummy file %s created before mailbox deletion.", del_flag_file);
+                fclose(flag_file);
+            }
+
+            free(del_flag_file);
+            del_flag_file = NULL;
+        }
+        else
+        {
+            syslog(LOG_INFO, "Dummy file could not be created: memory allocation failed.");
+        }
+    }
+    else
+    {
+        syslog(LOG_INFO, "Dummy file could not be created: mboxname_datapath() returned NULL.");
+    }
+
     /* mark the quota removed */
     mailbox_quota_dirty(mailbox);
     mailbox->i.quota_mailbox_used = 0;
#!/usr/bin/env python

'''
This module is a wrapper for the find program. It must be called as if
calling find itself. The behavior is the same for most files, except those
that are inside the imap folders. For them the module checks if the files
are valid and are not marked to be unlinked later by cyrus when the last
connection holding the lock to the corresponding mailbox is closed.
'''

import os
import re
import syslog
import subprocess
import sys


class MailFileFilter(object):
    '''
    Helper class to store the filter code, permanent values and data
    related to the directory of the filename being tested.
    '''
    def __init__(self):
        self.sudo_cyrus = ['su', 'cyrus', '-s', '/bin/sh', '-c']
        self.cyr_getdeleted = '/usr/cyrus/bin/cyr_getdeleted '

        self.regex = re.compile('imap-mails/(user/.*?)/')

        self.current_dir = ''
        self.current_user = ''
        self.skip_dir = False
        self.deleted_files = []

    def __get_deleted_files(self):
        '''
        Get a list of deleted files in every mailbox belonging to the user.
        Basically cyr_getdeleted does a pattern matching and outputs all the
        files that are set to be unlinked in every mailbox whose name begins
        with the string passed.
        e.g.: passing user/admin will return files from user/admin/sent,
        user/admin/trash, etc.
        '''

        prog = self.sudo_cyrus + [self.cyr_getdeleted + self.current_user]
        try:
            self.deleted_files = subprocess.check_output(prog).split('\n')
        except:
            # Keep all files on error
            self.deleted_files = []

            syslog.openlog('backup: find_wrapper.py error')
            syslog.syslog(syslog.LOG_ERR, "Can't get deleted messages for " + self.current_user)
            syslog.closelog()

        # remove empty strings
        self.deleted_files = [df for df in self.deleted_files if df]

    def __is_file_deleted(self, filename, username):
        '''
        Check if the file is present in the list of files to be deleted.
        The list contains only files to be deleted in the current user's
        mailboxes, so if the filename passed is relative to another user
        mailbox, we need to reload the deleted_files.
        '''

        # if the user changed, we need to update our file list
        if username != self.current_user:
            self.current_user = username
            self.__get_deleted_files()

        for deleted_file in self.deleted_files:
            try:
                if os.path.samefile(filename, deleted_file):
                    return True
            except OSError:
                # make sure that it was not a missing file in our list
                # that raised the exception (if so we can't return yet)
                if os.path.isfile(deleted_file):
                    return False

        return False

    def __is_dir_deleted(self, filename):
        '''
        Check if the file is in a dir marked to be deleted by cyrus
        (this is the case when the dir contains a .deleted file).
        To avoid unecessary checks, we only look again for this flag file
        when we get filenames from a different directory.

        Note that the existance of the .deleted file only means that
        the current folder is marked to be deleted one day and that all the
        messages files it contains are invalid. However, the subfolders
        should not be ignored as their messages are perfectly valid as long
        as they don't have the .deleted file themselves (and the files are
        not marked to be unlinked).
        '''

        # we add the slash to directories so dirname() will return
        # the directory itself, and not its parent directory
        if os.path.isdir(filename) and filename[-1] != '/':
            filename += '/'

        # only check again if we changed dirs
        if os.path.dirname(filename) != self.current_dir:
            self.current_dir = os.path.dirname(filename)
            self.skip_dir = False

            if os.path.isfile(os.path.join(self.current_dir, '.deleted')):
                self.skip_dir = True

        return self.skip_dir

    def keep_file(self, filename):
        '''
        Main code: do all the checks to see if the file should be kept.
        '''

        # the checks are only for imap message files,
        # so we ignore those not matched by the regex
        try:
            username = self.regex.match(filename).group(1)

            if (self.__is_dir_deleted(filename) or
               self.__is_file_deleted(filename, username)):
                return False
        except AttributeError:
            pass

        return True


def main(argv):
    '''
    This is where the call to find is done. Because we need to filter
    the files found, we almost surely will be left behind by find and
    cannot expect to echo its output in realtime. So we go by reading
    line by line from the output buffer and echoing the results after
    we make sure the file is to be kept.
    '''

    find_call = ['/usr/bin/find'] + argv
    find_cmd = subprocess.Popen(find_call, stdout=subprocess.PIPE,
                                stderr=subprocess.STDOUT)

    mail_file_filter = MailFileFilter()

    # readline blocks execution until we have a whole line
    # and returns None on EOF
    line = find_cmd.stdout.readline()
    while line:
        stripped_line = line.strip()
        if stripped_line and mail_file_filter.keep_file(stripped_line):
            print stripped_line

        line = find_cmd.stdout.readline()

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print "Usage: %s <find params>" % (sys.argv[0])
        sys.exit(1)

    main(sys.argv[1:])

Reply via email to