-----BEGIN PGP SIGNED MESSAGE----- Hash: SHA512 Dear list,
I made a patch which extends dovecot-antispam-plugin with support to take the addition and removal of a specific set of flags as trigger for learning. This allows for very tight integration with Thunderbird and KMail, which both set flags for Junk messages (Thunderbird sets "Junk", KMail sets "JUNK"). With this patch and according configuration, dovecot-antispam-plugin can trigger on the addition (learn as spam) and removal (learn as ham) of these flags. Combined with a sieve script which sets these flags for incoming mail which has been classified as spam, this allows for a tight and intuitive feedback loop between users and the server-side spam classification system. I tested that patch with dovecot 2.2 from Debian stretch and so far it works pretty well. I realise that dovecot-antispam-plugin has not been updated since 2013. It also has not been migrated to Github yet. If it is to be migrated there, I would be happy to submit a pull request there for discussion and merging. Otherwise, someone would have to indicate how to go forward with applying this patch to the official version, if that is desired. The patch is based on current tip and feedback is appreciated. I took some inspiration from dovecot-core/src/plugins/notify/notify-storage.c -- this requires the use of struct mail_private to be able to override the vfunc for mail_update_keywords, which I actually do not like. However, when I tried to make this patch base on the use of the notify plugin (via notify_register), I ran into the issue that the transaction is partially freed before the transaction_commit event from notify is emitted, which is unfortunate and breaks at least the mailtrain backend (which I use). This is in theory fixable, but I wanted to keep the patch as small as possible, as I have no idea whether this will eventually end up upstream or whether I’ll have to maintain that for myself. If upstream is not interested in this patch, please let me know! In that case, I would consider making an own plugin, which fewer backends (possibly only a mailtrain-like one) out of this, but frankly, I would consider that a waste of resources. In the hope that this is useful for others and hoping for feedback, Jonas Wielicki -----BEGIN PGP SIGNATURE----- iQIzBAEBCgAdFiEEG/EPV+Xzd5wEoQQIwGIDJZdiWIoFAlg4Fd0ACgkQwGIDJZdi WIpM3Q//TWVHDFx5LK14y0jOnxzE7kGthc6hakILnWqYgl7B/wlljD1HLrec//hg 9LEancjURnv3sPU/kEEpl6RRDX5kDMbXXKXMq7/1R7bNdmeMz5NEebAo7ktlCii7 ikk7MPL0F8UViY7IoW7y5NxfqMUldYseejn0GwKiVkEyqlyGLRbP7ijyzMWjpIWp eqOg/b5bNX6ARMC1O/hKhvHw4AhvSO2j73da60Xchu39gvDa2kC+MTU/kX4wb6po kPOEgFxGhpxG0+UMzUEBPLYwY3Q9x+R/PzlpUF6xEjYxgE7leVRVfCHiZLsMUGH1 itozwSzoAyDYV+Rbzk8OEBL6EGBJQHqYmNybULZsJkxhUenNeC4GRf7NcJ0jAisG rSfwr/+kD9+ErKNgtNJzfceLYGNWti44whV6kXxRxqGlIfJsJB7ZMdrj8+wSdn0p slXUoyHMUkRkN92tIPqZF5cb+5Ke+O+5hHE07Wohxw3eNoBSqtQWo81aigjaCraC ivcZX3HswTapyKtpgWMzjoWqp982crr+wtAI+AlEUKoCEqF54EnxM1kB1qoe0HcH nMgMdIkXI4VzPaUNUK206ffEtlnKNIqF3SkOkBX263L0ZJgXyCFRRj6W3z78VDdP He62tN/FPPBtmeVrmCj2ZDE+6gqtMQN52FsN4mpaq4BrCYlYB7g= =3+CU -----END PGP SIGNATURE-----
>From 6589757e6f97028c66a20357f30205d58f00a0ff Mon Sep 17 00:00:00 2001 From: Jonas Wielicki <[email protected]> Date: Fri, 25 Nov 2016 11:30:20 +0100 Subject: [PATCH] Trigger learning on change of configurable IMAP Flags This allows tight integration with Thunderbird and KMail. --- src/antispam-plugin.c | 2 + src/mailbox.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/mailbox.h | 1 + src/user.c | 25 ++++++++++-- src/user.h | 2 + 5 files changed, 133 insertions(+), 3 deletions(-) diff --git a/src/antispam-plugin.c b/src/antispam-plugin.c index 52694a3..e9cb260 100644 --- a/src/antispam-plugin.c +++ b/src/antispam-plugin.c @@ -1,5 +1,6 @@ #include "lib.h" #include "mail-storage-hooks.h" +#include "notify-plugin.h" #include "antispam-plugin.h" #include "user.h" @@ -8,6 +9,7 @@ static struct mail_storage_hooks antispam_plugin_hooks = { .mail_user_created = antispam_user_created, + .mail_allocated = antispam_mail_allocated, .mailbox_allocated = antispam_mailbox_allocated }; diff --git a/src/mailbox.c b/src/mailbox.c index 5a0a413..3334275 100644 --- a/src/mailbox.c +++ b/src/mailbox.c @@ -4,12 +4,17 @@ #include "mailbox.h" #include "backends.h" +#include <stdbool.h> + static MODULE_CONTEXT_DEFINE_INIT(antispam_storage_module, &mail_storage_module_register); static MODULE_CONTEXT_DEFINE_INIT(antispam_transaction_module, &mail_storage_module_register); +static MODULE_CONTEXT_DEFINE_INIT(antispam_mail_module, + &mail_module_register); #define TRANSACTION_CONTEXT(obj) MODULE_CONTEXT(obj, antispam_transaction_module) +#define MAIL_CONTEXT(obj) MODULE_CONTEXT(obj, antispam_mail_module) struct antispam_transaction { @@ -25,6 +30,35 @@ enum mailbox_copy_type MCT_DENY }; + +static bool in_flags(const char *kwd, char *const *flags) +{ + char *const *curr_flag = flags; + for (; *curr_flag; ++curr_flag) { + if (strcmp(kwd, *curr_flag) == 0) { + return true; + } + } + return false; +} + + +static void find_relevant_flags(struct antispam_user *asu, + const char *const *kwds, + bool *has_spam) +{ + const char *const *curr_kwd = kwds; + for (; *curr_kwd; ++curr_kwd) { + *has_spam = *has_spam || in_flags(*curr_kwd, asu->flags_spam); + if (*has_spam) { + // found, no need to continue + break; + } + } +} + + + static enum mailbox_class antispam_mailbox_classify(struct mailbox *box) { const char *name = mailbox_get_name(box); @@ -234,6 +268,78 @@ static void antispam_transaction_rollback(struct mailbox_transaction_context i_free(ast); } +static void antispam_mail_update_keywords( + struct mail *_mail, enum modify_type modify_type, + struct mail_keywords *keywords) +{ + struct mail_private *mail = (struct mail_private *)_mail; + union mail_module_context *lmail = MAIL_CONTEXT(mail); + struct antispam_user *asu = USER_CONTEXT(_mail->box->storage->user); + struct antispam_transaction *ast = TRANSACTION_CONTEXT(_mail->transaction); + + const char *const *old_keywords = NULL; + const char *const *new_keywords = NULL; + + old_keywords = mail_get_keywords(_mail); + lmail->super.update_keywords(_mail, modify_type, keywords); + new_keywords = mail_get_keywords(_mail); + + bool old_has_spam = false; + bool new_has_spam = false; + + find_relevant_flags(asu, old_keywords, + &old_has_spam); + find_relevant_flags(asu, mail_get_keywords(_mail), + &new_has_spam); + + const bool learn_as_ham = old_has_spam && !new_has_spam; + const bool learn_as_spam = !old_has_spam && new_has_spam; + + i_debug("antispam: keywords changed: old_spam = %d, new_spam = %d\n", + old_has_spam, new_has_spam); + + if (learn_as_ham && learn_as_spam) { + // wat. + i_debug("antispam: wat. both learn as ham and as spam? no way."); + return; + } + + if (learn_as_spam) { + i_debug("antispam: learning as spam"); + asu->backend->handle_mail( + _mail->transaction, + ast->data, + _mail, + true + ); + } + + if (learn_as_ham) { + i_debug("antispam: learning as ham"); + asu->backend->handle_mail( + _mail->transaction, + ast->data, + _mail, + false + ); + } +} + +void antispam_mail_allocated(struct mail *_mail) +{ + // XXX: I feel bad about that one + struct mail_private *mail = (struct mail_private*)_mail; + struct mail_vfuncs *v = mail->vlast; + union mail_module_context *lmail; + + lmail = p_new(mail->pool, union mail_module_context, 1); + lmail->super = *v; + mail->vlast = &lmail->super; + + v->update_keywords = antispam_mail_update_keywords; + MODULE_CONTEXT_SET_SELF(mail, antispam_mail_module, lmail); +} + void antispam_mailbox_allocated(struct mailbox *box) { struct antispam_mailbox *asmb; diff --git a/src/mailbox.h b/src/mailbox.h index a2c13e6..7800487 100644 --- a/src/mailbox.h +++ b/src/mailbox.h @@ -23,5 +23,6 @@ struct antispam_mailbox }; void antispam_mailbox_allocated(struct mailbox *box); +void antispam_mail_allocated(struct mail *_mail); #endif diff --git a/src/user.c b/src/user.c index dc90e20..276fa77 100644 --- a/src/user.c +++ b/src/user.c @@ -25,6 +25,22 @@ static void parse_folders(struct mail_user *user, const char *infix, T_END; } +static void parse_flags(struct mail_user *user, const char *suffix, + char ***result) +{ + const char *tmp; + + T_BEGIN + { + tmp = config(user, suffix); + if (tmp) + { + *result = p_strsplit(user->pool, tmp, ";"); + } + } + T_END; +} + static bool check_folders(char ***folders) { int i; @@ -86,11 +102,14 @@ void antispam_user_created(struct mail_user *user) parse_folders(user, "trash", asu->folders_trash); parse_folders(user, "unsure", asu->folders_unsure); + asu->flags_spam = NULL; + parse_flags(user, "spam_flags", &asu->flags_spam); + if (!(check_folders(asu->folders_spam) || check_folders(asu->folders_trash) - || check_folders(asu->folders_unsure))) + || check_folders(asu->folders_unsure) || asu->flags_spam)) { - i_error("antispam plugin folders are not configured for this user"); - goto bailout; + i_error("antispam plugin folders and flags are not configured for this user"); + goto bailout; } MODULE_CONTEXT_SET(user, antispam_user_module, asu); diff --git a/src/user.h b/src/user.h index d643d06..663cd60 100644 --- a/src/user.h +++ b/src/user.h @@ -61,6 +61,8 @@ struct antispam_user char **folders_trash[NUM_MT]; char **folders_unsure[NUM_MT]; + char **flags_spam; + // backend config vars pointer struct antispam_backend *backend; void *backend_config; -- 2.10.2
