[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
First of all, apologies for taking so long to get back to this. On Fri, 27 Nov 2009, Carl Worth wrote: > The auto-detection is just three additional stats (at most) for each > directory, right? That seems cheap enough to me. If that's cheap enough, then I won't disagree with auto-detection. Jan Janak's patch seems to take most of the disk access cost out of it, in any case. > That seems orthogonal to me. Would the dovecot index files be easy to > skip with a pattern-based blacklist? Yes, and that's a much more elegant solution. > > I'll be happy to implement them, although I'd like for others to > > chime in on the configure-as-Maildir vs. autodetect-Maildir issue. > > And thanks for your patience in working through my patch. I didn't mean to call a vote--rather to solicit the opinions of others with possibly even more exotic mail storage configurations. A new patch is attached. Apologies for the rather verbose Maildir handling logic, but I couldn't find a way to minimize the calls to is_maildir that was both neat and readable. -- Michiel --- notmuch-client.h |1 + notmuch-new.c| 93 +++-- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index 50a30fe..7bc84a1 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -77,6 +77,7 @@ typedef struct { int saw_read_only_directory; int output_is_a_tty; int verbose; +int tag_maildir; int total_files; int processed_files; diff --git a/notmuch-new.c b/notmuch-new.c index 9d20616..8742ab4 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -109,6 +109,60 @@ is_maildir (struct dirent **entries, int count) return 0; } +/* Tag new mail according to its Maildir attribute flags. + * + * Test if the mail file's filename contains any of the + * standard Maildir attributes, and translate these to + * the corresponding standard notmuch tags. + * + * If the message is not marked as 'seen', or if no + * flags are present, tag as 'inbox, unread'. + */ +static void +derive_tags_from_maildir_flags (notmuch_message_t *message, + const char * path) +{ +int seen = FALSE; +int end_of_flags = FALSE; +size_t l = strlen(path); + +/* Non-experimental message flags start with this */ +char * i = strstr(path, ":2,"); +i = (i) ? i : strstr(path, "!2,"); /* This format is used on VFAT */ +if (i != NULL) { + i += 3; + for (; i < (path + l) && !end_of_flags; i++) { + switch (*i) { + case 'F' : + notmuch_message_add_tag (message, "flagged"); + break; + case 'R': /* replied */ + notmuch_message_add_tag (message, "answered"); + break; + case 'D': + notmuch_message_add_tag (message, "draft"); + break; + case 'S': /* seen */ + seen = TRUE; + break; + case 'T': /* trashed */ + notmuch_message_add_tag (message, "deleted"); + break; + case 'P': /* passed */ + notmuch_message_add_tag (message, "forwarded"); + break; + default: + end_of_flags = TRUE; + break; + } + } +} + +if (i == NULL || !seen) { + tag_inbox_and_unread (message); +} +} + /* Examine 'path' recursively as follows: * * o Ask the filesystem for the mtime of 'path' (path_mtime) @@ -142,6 +196,7 @@ add_files_recursive (notmuch_database_t *notmuch, notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; notmuch_message_t *message = NULL; struct dirent **namelist = NULL; +int maildir_detected = -1; /* -1 = unset */ int num_entries; /* If we're told to, we bail out on encountering a read-only @@ -189,13 +244,37 @@ add_files_recursive (notmuch_database_t *notmuch, if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0 || (entry->d_type == DT_DIR && -(strcmp (entry->d_name, "tmp") == 0) && -is_maildir (namelist, num_entries)) || - strcmp (entry->d_name, ".notmuch") ==0) +strcmp (entry->d_name, ".notmuch") == 0)) { continue; } + + /* If this directory is a Maildir folder, we need to +* ignore any subdirectories marked tmp/, and scan for +* Maildir attributes on messages contained in the sub- +* directories 'new' and 'cur'. */ + if (maildir_detected != 0 && + entry->d_type == DT_DIR && + ((strcmp (entry->d_name, "tmp") == 0) || +(strcmp (entry->d_name, "new") == 0) || +(strcmp (entry->d_name, "cur") == 0))) { + + /* is_maildir scans the entire directory. No need to + do this more than once, if at all */ + if (maildir_detected == -1) { +
Re: [notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
First of all, apologies for taking so long to get back to this. On Fri, 27 Nov 2009, Carl Worth wrote: > The auto-detection is just three additional stats (at most) for each > directory, right? That seems cheap enough to me. If that's cheap enough, then I won't disagree with auto-detection. Jan Janak's patch seems to take most of the disk access cost out of it, in any case. > That seems orthogonal to me. Would the dovecot index files be easy to > skip with a pattern-based blacklist? Yes, and that's a much more elegant solution. > > I'll be happy to implement them, although I'd like for others to > > chime in on the configure-as-Maildir vs. autodetect-Maildir issue. > > And thanks for your patience in working through my patch. I didn't mean to call a vote--rather to solicit the opinions of others with possibly even more exotic mail storage configurations. A new patch is attached. Apologies for the rather verbose Maildir handling logic, but I couldn't find a way to minimize the calls to is_maildir that was both neat and readable. -- Michiel --- notmuch-client.h |1 + notmuch-new.c| 93 +++-- 2 files changed, 90 insertions(+), 4 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index 50a30fe..7bc84a1 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -77,6 +77,7 @@ typedef struct { int saw_read_only_directory; int output_is_a_tty; int verbose; +int tag_maildir; int total_files; int processed_files; diff --git a/notmuch-new.c b/notmuch-new.c index 9d20616..8742ab4 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -109,6 +109,60 @@ is_maildir (struct dirent **entries, int count) return 0; } +/* Tag new mail according to its Maildir attribute flags. + * + * Test if the mail file's filename contains any of the + * standard Maildir attributes, and translate these to + * the corresponding standard notmuch tags. + * + * If the message is not marked as 'seen', or if no + * flags are present, tag as 'inbox, unread'. + */ +static void +derive_tags_from_maildir_flags (notmuch_message_t *message, + const char * path) +{ +int seen = FALSE; +int end_of_flags = FALSE; +size_t l = strlen(path); + +/* Non-experimental message flags start with this */ +char * i = strstr(path, ":2,"); +i = (i) ? i : strstr(path, "!2,"); /* This format is used on VFAT */ +if (i != NULL) { + i += 3; + for (; i < (path + l) && !end_of_flags; i++) { + switch (*i) { + case 'F' : + notmuch_message_add_tag (message, "flagged"); + break; + case 'R': /* replied */ + notmuch_message_add_tag (message, "answered"); + break; + case 'D': + notmuch_message_add_tag (message, "draft"); + break; + case 'S': /* seen */ + seen = TRUE; + break; + case 'T': /* trashed */ + notmuch_message_add_tag (message, "deleted"); + break; + case 'P': /* passed */ + notmuch_message_add_tag (message, "forwarded"); + break; + default: + end_of_flags = TRUE; + break; + } + } +} + +if (i == NULL || !seen) { + tag_inbox_and_unread (message); +} +} + /* Examine 'path' recursively as follows: * * o Ask the filesystem for the mtime of 'path' (path_mtime) @@ -142,6 +196,7 @@ add_files_recursive (notmuch_database_t *notmuch, notmuch_status_t status, ret = NOTMUCH_STATUS_SUCCESS; notmuch_message_t *message = NULL; struct dirent **namelist = NULL; +int maildir_detected = -1; /* -1 = unset */ int num_entries; /* If we're told to, we bail out on encountering a read-only @@ -189,13 +244,37 @@ add_files_recursive (notmuch_database_t *notmuch, if (strcmp (entry->d_name, ".") == 0 || strcmp (entry->d_name, "..") == 0 || (entry->d_type == DT_DIR && -(strcmp (entry->d_name, "tmp") == 0) && -is_maildir (namelist, num_entries)) || - strcmp (entry->d_name, ".notmuch") ==0) +strcmp (entry->d_name, ".notmuch") == 0)) { continue; } + + /* If this directory is a Maildir folder, we need to +* ignore any subdirectories marked tmp/, and scan for +* Maildir attributes on messages contained in the sub- +* directories 'new' and 'cur'. */ + if (maildir_detected != 0 && + entry->d_type == DT_DIR && + ((strcmp (entry->d_name, "tmp") == 0) || +(strcmp (entry->d_name, "new") == 0) || +(strcmp (entry->d_name, "cur") == 0))) { + + /* is_maildir scans the entire directory. No need to + do this more than once, if at all */ + if (maildir_detected == -1) { +
Re: [notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Thu, 26 Nov 2009 22:12:02 +0100, Michiel Buddingh' wrote: > Carl Worth wrote: > I considered that approach; ideally, we could test for the presence of > all three of cur, tmp and new--but this is rather messy to do in the > current treewalk structure. Taking any one of them as proof positive of > a Maildir might lead to unpleasant surprises--it's not all that incon- > ceivable for someone to name a mail folder 'tmp'. The auto-detection is just three additional stats (at most) for each directory, right? That seems cheap enough to me. > There's another matter; Some mail stores will place (large) index files > in folder roots, i.e. one level above cur/, tmp/ and new/. Looking > at the ones dovecot (an IMAP server) uses, I can make out a from header, > a subject header, and a message-id, as plaintext in the first 100k or > so. It's not all that inconceivable that notmuch might register it as > a 'real' email, with unpleasant consequences for the index. That seems orthogonal to me. Would the dovecot index files be easy to skip with a pattern-based blacklist? > I've seen some patches fly by that add support for multiple mail > stores. Turning on Maildir support on a per-directory basis might > resolve that problem while still supporting heterogenous mail archives > to some degree. I am not convinced we can do the right thing > automatically without causing some grief to a subset of users. With sup, I had the opposite extreme compared to current notmuch. Every maildir in the hierarchy had to be configured independently. That was a lot of pain, and is precisely why notmuch started out by simply taking a single top-level directory. > Haven't tested it, but it seems you can put > > [core] > whitespace = trailing-space,space-before-tab Yes. According to the documentation of git-config, those two values are the default. But the documentation also only says that these will make "git diff" display the undesired whitespace in red, and "git apply --whitespace=error" refuse to apply. I can't find a builtin way to make "git commit" complain, or I would recommend that. > I'll be happy to implement them, although I'd like for others to chime > in on the configure-as-Maildir vs. autodetect-Maildir issue. And thanks > for your patience in working through my patch. No problem at all. I'll look forward to the next version of the patch. Consider mine as a vote for autodetection of maildir rather than configuration. -Carl ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Thu, 26 Nov 2009 22:12:02 +0100, Michiel Buddingh' wrote: > Carl Worth wrote: > I considered that approach; ideally, we could test for the presence of > all three of cur, tmp and new--but this is rather messy to do in the > current treewalk structure. Taking any one of them as proof positive of > a Maildir might lead to unpleasant surprises--it's not all that incon- > ceivable for someone to name a mail folder 'tmp'. The auto-detection is just three additional stats (at most) for each directory, right? That seems cheap enough to me. > There's another matter; Some mail stores will place (large) index files > in folder roots, i.e. one level above cur/, tmp/ and new/. Looking > at the ones dovecot (an IMAP server) uses, I can make out a from header, > a subject header, and a message-id, as plaintext in the first 100k or > so. It's not all that inconceivable that notmuch might register it as > a 'real' email, with unpleasant consequences for the index. That seems orthogonal to me. Would the dovecot index files be easy to skip with a pattern-based blacklist? > I've seen some patches fly by that add support for multiple mail > stores. Turning on Maildir support on a per-directory basis might > resolve that problem while still supporting heterogenous mail archives > to some degree. I am not convinced we can do the right thing > automatically without causing some grief to a subset of users. With sup, I had the opposite extreme compared to current notmuch. Every maildir in the hierarchy had to be configured independently. That was a lot of pain, and is precisely why notmuch started out by simply taking a single top-level directory. > Haven't tested it, but it seems you can put > > [core] > whitespace = trailing-space,space-before-tab Yes. According to the documentation of git-config, those two values are the default. But the documentation also only says that these will make "git diff" display the undesired whitespace in red, and "git apply --whitespace=error" refuse to apply. I can't find a builtin way to make "git commit" complain, or I would recommend that. > I'll be happy to implement them, although I'd like for others to chime > in on the configure-as-Maildir vs. autodetect-Maildir issue. And thanks > for your patience in working through my patch. No problem at all. I'll look forward to the next version of the patch. Consider mine as a vote for autodetection of maildir rather than configuration. -Carl
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On 26-11 22:53, Ingmar Vanhassel wrote: > Excerpts from Michiel Buddingh''s message of Thu Nov 26 22:12:02 +0100 2009: > > Haven't tested it, but it seems you can put > > > > [core] > > whitespace = trailing-space,space-before-tab > > > > into your ~/.gitconfig now. I've also set emacs to mark trailing > > whitespace with big red markers. > > I think it should be in notmuch/.gitattributes, which doesn't relies on > new contributors to set that up. It also doesn't force this on for every > git repo one has. This enforces the restrictions also on people's local commits, even those they may never want to submit for inclusion into the official repository. Asking people to follow certain rules when submitting patches for inclusion is sensible, but I don't think notmuch should enforce this on all local commits with notmuch/.gitattributes. -- Jan
Re: [notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On 26-11 22:53, Ingmar Vanhassel wrote: > Excerpts from Michiel Buddingh''s message of Thu Nov 26 22:12:02 +0100 2009: > > Haven't tested it, but it seems you can put > > > > [core] > > whitespace = trailing-space,space-before-tab > > > > into your ~/.gitconfig now. I've also set emacs to mark trailing > > whitespace with big red markers. > > I think it should be in notmuch/.gitattributes, which doesn't relies on > new contributors to set that up. It also doesn't force this on for every > git repo one has. This enforces the restrictions also on people's local commits, even those they may never want to submit for inclusion into the official repository. Asking people to follow certain rules when submitting patches for inclusion is sensible, but I don't think notmuch should enforce this on all local commits with notmuch/.gitattributes. -- Jan ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Excerpts from Michiel Buddingh''s message of Thu Nov 26 22:12:02 +0100 2009: > Haven't tested it, but it seems you can put > > [core] > whitespace = trailing-space,space-before-tab > > into your ~/.gitconfig now. I've also set emacs to mark trailing > whitespace with big red markers. I think it should be in notmuch/.gitattributes, which doesn't relies on new contributors to set that up. It also doesn't force this on for every git repo one has. -- Exherbo KDE, X.org maintainer
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Carl Worth wrote: > > +" The other value is 'storage_type', which can currently be set to\n" > > +" 'maildir' or 'none'.\n"; > > This part of the patch I don't like. I've got a mail collection spanning > over a decade, and it's seen a lot of strange things. Most of my mail is > in maildir format, but not quite all of it. And I actually like the > ability to just shove random new messages into the mail store manually > without having to create a maildir name for it. > > So I don't think a global configuration makes sense here. Meanwhile, > it's really easy to detect the presence of a maildir. Whenever we see > child directories of "cur", "new", and "tmp" then we should turn on the > processing of maildir flags for when processing mail in "cur" and "new". I considered that approach; ideally, we could test for the presence of all three of cur, tmp and new--but this is rather messy to do in the current treewalk structure. Taking any one of them as proof positive of a Maildir might lead to unpleasant surprises--it's not all that incon- ceivable for someone to name a mail folder 'tmp'. There's another matter; Some mail stores will place (large) index files in folder roots, i.e. one level above cur/, tmp/ and new/. Looking at the ones dovecot (an IMAP server) uses, I can make out a from header, a subject header, and a message-id, as plaintext in the first 100k or so. It's not all that inconceivable that notmuch might register it as a 'real' email, with unpleasant consequences for the index. I've seen some patches fly by that add support for multiple mail stores. Turning on Maildir support on a per-directory basis might resolve that problem while still supporting heterogenous mail archives to some degree. I am not convinced we can do the right thing automatically without causing some grief to a subset of users. > > @@ -257,7 +262,7 @@ notmuch_config_open (void *ctx, > > talloc_free (email); > > } > > } > > - > > + > > /* When we create a new configuration file here, we add some > > * comments to help the user understand what can be done. */ > > if (is_new) { > > [nit] Trailing whitespace inserted there as well. > Hmm... I was going to say that git ships with a pre-commit hook you can > turn on that checks for trailing whitespace and aborts the commit if > it's present. But it looks like the currently shipping pre-commit.sample > hook doesn't do this anymore. Haven't tested it, but it seems you can put [core] whitespace = trailing-space,space-before-tab into your ~/.gitconfig now. I've also set emacs to mark trailing whitespace with big red markers. > OK, now we're into the meat of things. Clearly, you're directly > supporting the documented flags of maildir. But we need to do a few > things differently here. Most importantly, notmuch is already using an > "unread" tag, so maildir's S flag should map that *that* rather than > adding new "unseen" and "seen" flags. So messages with the S flag would > not get the "unread" tag and messages without S would get the "unread" > tag. When writing the patch, I assumed there might be a minor (but important) distinction between marking a mail 'seen' (i.e. the MUA storing the fact that the file has been visited) and 'read' (i.e. the user marking the contents of a mail as being read and understood). As I found out later, notmuch's interpretation of 'read' and 'unread' is the former, so there is no distinction. > The "flagged" and "replied" tags seem reasonable enough. But for > "trashed" and "passed" I think I'd rather see the tag names as "deleted" > and "forwarded". (Since I can imagine adding commands to notmuch for > "delete" and "forward" but not for "trash" nor "pass"). Fair enough. > Oh, and setting the "inbox" tag correctly here based on the maildir tags > is the final and most important thing. It looks like that's missing from > the above. So, a missing "S" flag should map to adding both the "inbox" > and "unread" tags. Makes sense, will do. > > + if (state->storage_type == MAILDIR) { > > + char * leaf = basename(next); > > You could save the basename call by examining the leaf name when it is > available as a standalone string up in the caller. Which would require testing with S_ISDIR twice, which is uglier, but essentially free, so I'll grant it's the better thing to do. > So this patch is close, but needs a few fixes. I'll be happy to implement them, although I'd like for others to chime in on the configure-as-Maildir vs. autodetect-Maildir issue. And thanks for your patience in working through my patch. -- Michiel Buddingh'
Re: [notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Excerpts from Michiel Buddingh''s message of Thu Nov 26 22:12:02 +0100 2009: > Haven't tested it, but it seems you can put > > [core] > whitespace = trailing-space,space-before-tab > > into your ~/.gitconfig now. I've also set emacs to mark trailing > whitespace with big red markers. I think it should be in notmuch/.gitattributes, which doesn't relies on new contributors to set that up. It also doesn't force this on for every git repo one has. -- Exherbo KDE, X.org maintainer ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
Re: [notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Carl Worth wrote: > > +" The other value is 'storage_type', which can currently be set to\n" > > +" 'maildir' or 'none'.\n"; > > This part of the patch I don't like. I've got a mail collection spanning > over a decade, and it's seen a lot of strange things. Most of my mail is > in maildir format, but not quite all of it. And I actually like the > ability to just shove random new messages into the mail store manually > without having to create a maildir name for it. > > So I don't think a global configuration makes sense here. Meanwhile, > it's really easy to detect the presence of a maildir. Whenever we see > child directories of "cur", "new", and "tmp" then we should turn on the > processing of maildir flags for when processing mail in "cur" and "new". I considered that approach; ideally, we could test for the presence of all three of cur, tmp and new--but this is rather messy to do in the current treewalk structure. Taking any one of them as proof positive of a Maildir might lead to unpleasant surprises--it's not all that incon- ceivable for someone to name a mail folder 'tmp'. There's another matter; Some mail stores will place (large) index files in folder roots, i.e. one level above cur/, tmp/ and new/. Looking at the ones dovecot (an IMAP server) uses, I can make out a from header, a subject header, and a message-id, as plaintext in the first 100k or so. It's not all that inconceivable that notmuch might register it as a 'real' email, with unpleasant consequences for the index. I've seen some patches fly by that add support for multiple mail stores. Turning on Maildir support on a per-directory basis might resolve that problem while still supporting heterogenous mail archives to some degree. I am not convinced we can do the right thing automatically without causing some grief to a subset of users. > > @@ -257,7 +262,7 @@ notmuch_config_open (void *ctx, > > talloc_free (email); > > } > > } > > - > > + > > /* When we create a new configuration file here, we add some > > * comments to help the user understand what can be done. */ > > if (is_new) { > > [nit] Trailing whitespace inserted there as well. > Hmm... I was going to say that git ships with a pre-commit hook you can > turn on that checks for trailing whitespace and aborts the commit if > it's present. But it looks like the currently shipping pre-commit.sample > hook doesn't do this anymore. Haven't tested it, but it seems you can put [core] whitespace = trailing-space,space-before-tab into your ~/.gitconfig now. I've also set emacs to mark trailing whitespace with big red markers. > OK, now we're into the meat of things. Clearly, you're directly > supporting the documented flags of maildir. But we need to do a few > things differently here. Most importantly, notmuch is already using an > "unread" tag, so maildir's S flag should map that *that* rather than > adding new "unseen" and "seen" flags. So messages with the S flag would > not get the "unread" tag and messages without S would get the "unread" > tag. When writing the patch, I assumed there might be a minor (but important) distinction between marking a mail 'seen' (i.e. the MUA storing the fact that the file has been visited) and 'read' (i.e. the user marking the contents of a mail as being read and understood). As I found out later, notmuch's interpretation of 'read' and 'unread' is the former, so there is no distinction. > The "flagged" and "replied" tags seem reasonable enough. But for > "trashed" and "passed" I think I'd rather see the tag names as "deleted" > and "forwarded". (Since I can imagine adding commands to notmuch for > "delete" and "forward" but not for "trash" nor "pass"). Fair enough. > Oh, and setting the "inbox" tag correctly here based on the maildir tags > is the final and most important thing. It looks like that's missing from > the above. So, a missing "S" flag should map to adding both the "inbox" > and "unread" tags. Makes sense, will do. > > + if (state->storage_type == MAILDIR) { > > + char * leaf = basename(next); > > You could save the basename call by examining the leaf name when it is > available as a standalone string up in the caller. Which would require testing with S_ISDIR twice, which is uglier, but essentially free, so I'll grant it's the better thing to do. > So this patch is close, but needs a few fixes. I'll be happy to implement them, although I'd like for others to chime in on the configure-as-Maildir vs. autodetect-Maildir issue. And thanks for your patience in working through my patch. -- Michiel Buddingh' ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Sun, 22 Nov 2009 10:33:53 +0100, Michiel Buddingh' wrote: > On the positive side, this allows us to map these flags onto tags, > at least for indexing (the patch at the bottom implements this), and, > if I can pry you away from your principles, later for modification > as well. Hi Michiel, I'm finally getting around to reviewing this patch. I think the functionality of respecting these maildir flags for initial import is going to be really important. So thanks for looking at this! Some comments on the patches below. > +enum storage_type > +notmuch_config_get_storage_type (notmuch_config_t *config); > + > notmuch_bool_t > debugger_is_active (void); > > + > + > + > #endif [nit] Try to look out for introducing excess whitespace like that. > +" The other value is 'storage_type', which can currently be set to\n" > +" 'maildir' or 'none'.\n"; This part of the patch I don't like. I've got a mail collection spanning over a decade, and it's seen a lot of strange things. Most of my mail is in maildir format, but not quite all of it. And I actually like the ability to just shove random new messages into the mail store manually without having to create a maildir name for it. So I don't think a global configuration makes sense here. Meanwhile, it's really easy to detect the presence of a maildir. Whenever we see child directories of "cur", "new", and "tmp" then we should turn on the processing of maildir flags for when processing mail in "cur" and "new". > @@ -257,7 +262,7 @@ notmuch_config_open (void *ctx, > talloc_free (email); > } > } > - > + > /* When we create a new configuration file here, we add some > * comments to help the user understand what can be done. */ > if (is_new) { [nit] Trailing whitespace inserted there as well. Hmm... I was going to say that git ships with a pre-commit hook you can turn on that checks for trailing whitespace and aborts the commit if it's present. But it looks like the currently shipping pre-commit.sample hook doesn't do this anymore. So I'll have to find out what the current recommended way is to get this behavior. Does anybody here know? > + i += 3; > + for (; i < (path + l) && !end_of_flags; i++) { > + switch (*i) { > + case 'F': /* flagged */ > + notmuch_message_add_tag (message, "flagged"); > + break; > + case 'R': /* the message has been replied to */ > + notmuch_message_add_tag (message, "replied"); > + break; > + case 'D': > + notmuch_message_add_tag (message, "draft"); > + break; > + case 'S': /* Indicate a message has been read */ > + notmuch_message_add_tag (message, "seen"); > + seen = TRUE; > + break; > + case 'T': /* Indicates a message has been marked as trash */ > + notmuch_message_add_tag (message, "trashed"); > + break; > + case 'P': /* indicates a message has been forwarded */ > + notmuch_message_add_tag (message, "passed"); > + break; > + default: > + end_of_flags = TRUE; > + break; > + } > + } > +} > + > +if (i == NULL || !seen) { > + /* mark messages without any flags, or the seen flag as 'unseen' */ > + notmuch_message_add_tag (message, "unseen"); > +} OK, now we're into the meat of things. Clearly, you're directly supporting the documented flags of maildir. But we need to do a few things differently here. Most importantly, notmuch is already using an "unread" tag, so maildir's S flag should map that *that* rather than adding new "unseen" and "seen" flags. So messages with the S flag would not get the "unread" tag and messages without S would get the "unread" tag. The "flagged" and "replied" tags seem reasonable enough. But for "trashed" and "passed" I think I'd rather see the tag names as "deleted" and "forwarded". (Since I can imagine adding commands to notmuch for "delete" and "forward" but not for "trash" nor "pass"). Finally, all of the new behavior here needs to be documented in both notmuch.1 and notmuch.c where the current behavior with respect to "inbox" and "unread" is already documented. Oh, and setting the "inbox" tag correctly here based on the maildir tags is the final and most important thing. It looks like that's missing from the above. So, a missing "S" flag should map to adding both the "inbox" and "unread" tags. > (state->ignore_read_only_directories)) { > state->saw_read_only_directory = TRUE; > - goto DONE; > + goto DONE; > } Trailing whitespace strikes again! :-) > + if (state->storage_type == MAILDIR) { > + char * leaf = basename(next); You could save the basename call by examining the leaf name when it is available as a standalone string up in the caller. So this patch is close, but needs a few fixes. Thanks again, -Carl
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Hi Michiel, > "MB" == Michiel Buddingh' writes: MB> Dirk-Jan C. Binnema wrote: Michiel> + Michiel> +static void Michiel> +derive_tags_from_maildir_flags (notmuch_message_t Michiel> *message, const char * Michiel> path) >> >> I see you don't handle the "N" -- is that deliberate? Also, a >> minor addition may to also allow for '!' instead of ':' as a >> separator, as that's the semi-official way to use Maildirs on >> (V)FAT filesystems (which don't allow for colons in filenames). MB> Not deliberate. Simply unaware of the "N" flag, nor aware of MB> practices for storing Maildirs on (V)FAT. Ah, you are right. In my code, http://gitorious.org/mu/mainline/blobs/master/msg/mu-msg-flags.c I handled it because apparently some programs where using that. But I guess it's better to stick to DJB's Maildir spec for now... Regarding the VFAT-limitations, see eg. the Note in: http://docs.python.org/lib/mailbox-maildir.html anyway, a minor point. Best wishes, Dirk. -- Dirk-Jan C. Binnema Helsinki, Finland e:djcb at djcbsoftware.nl w:www.djcbsoftware.nl pgp: D09C E664 897D 7D39 5047 A178 E96A C7A1 017D DA3C
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Dirk-Jan C. Binnema wrote: > Michiel> + > Michiel> +static void > Michiel> +derive_tags_from_maildir_flags (notmuch_message_t > Michiel> *message, const char * > Michiel> path) > > I see you don't handle the "N" -- is that deliberate? Also, a > minor addition may to also allow for '!' instead of ':' as a > separator, as that's the semi-official way to use Maildirs on > (V)FAT filesystems (which don't allow for colons in filenames). Not deliberate. Simply unaware of the "N" flag, nor aware of practices for storing Maildirs on (V)FAT. I've used only this file as a reference. http://cr.yp.to/proto/maildir.html mvg, Michiel Buddingh'
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Carl Worth wrote: > > A Maildir-aware notmuch could incorporate this to be far more > > resistant to bulk mail moves done by other clients, by using > > filename lookups to avoid accessing and parsing the mail > > files themselves. > I don't think opening a file to read out a message ID will ever be > a bottleneck. But yes, we could take advantage of the unique name > if we insisted that the storage have it. I'm not so sure. On traditional unix-like filesystems, every file access is another potential disk seek. People use a lot of different strategies for keeping their mail accessible and performant; you yourself, I gather, use non-semantic directories mainly intended to keep the number of files per directory low; others store their mail per month, or per year. I might choose to move all my mail from 2008 from my INBOX to the folder Archive.2008; such moves may change the paths of thousands of messages at a time. > Personally, I still regularly end up indexing stuff that's not > in proper maildir format, (like just manually copying a mail > file "foo" down into a directory that I know isn't being delivered > to by any MDA but that I want notmuch to index.) Maybe that's just > me, because I'm always bringing up little things for debugging, > etc. But it is convenient at least. Oh true. And it occurs to me that notmuch is a quite sensible companion tool to MH users (if they're still around in any numbers) > Actually, I don't think that's true at all. Notmuch is definitely > intended to become a lot more than it is right now. And if it's not > making it easy for you to deal with mail the way you'd like to, then > we definitely do want to look into expanding notmuch to be able to > address that. Thanks for your consideration; on the other hand, I do think that it is a good idea not to make matters more complex than they need to be, so I can certainly sympathise with the principles you've set for this project. regards, Michiel Buddingh'
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
[quick reply to the text without looking at the patch yet] On Sun, 22 Nov 2009 10:33:53 +0100, Michiel Buddingh' wrote: > Thanks, and apologies for the accidental base64 encoding. Thanks for noticing. That would have been a tricky patch to reply to to comment on. > On the positive side, this allows us to map these flags onto tags, > at least for indexing (the patch at the bottom implements this), and, > if I can pry you away from your principles, later for modification > as well. Yes, we might end up doing this. We'll see. > I respect your desire to stick to that principle. But I also know > that purity and simplicity, generally speaking, are unattainable > luxuries for most applications that handle mail. Hehe. Thanks for the entertainment. You sound like someone who's already got some of the battle scars from trying to write mail-handling software. One is certainly forced to see an awful lot of awfully strange things. > Another thing; notmuch currently indexes by message-id (or SHA-1 hash > if that is not present). Maildir, on the other hand, uses file names > as stable (when stripped of the parts trailing the colon) unique > (knock on wood) identifiers. A Maildir-aware notmuch could incorporate > this to be far more resistant to bulk mail moves done by other clients, > by using filename lookups to avoid accessing and parsing the mail > files themselves. I don't think opening a file to read out a message ID will ever be a bottleneck. But yes, we could take advantage of the unique name if we insisted that the storage have it. Personally, I still regularly end up indexing stuff that's not in proper maildir format, (like just manually copying a mail file "foo" down into a directory that I know isn't being delivered to by any MDA but that I want notmuch to index.) Maybe that's just me, because I'm always bringing up little things for debugging, etc. But it is convenient at least. So I think I'd like to be careful to avoid notmuch ever *requiring* maildir storage. (But yes, it should obviously support maildir well since it's so common.) > I should re-iterate that I'm new to notmuch, and it's obvious that I'm > trying to beat it into becoming something it was never intended to be; Actually, I don't think that's true at all. Notmuch is definitely intended to become a lot more than it is right now. And if it's not making it easy for you to deal with mail the way you'd like to, then we definitely do want to look into expanding notmuch to be able to address that. > on the other hand, I'd like people to chime in on this. I've got the patch tagged to give a closer look at it later. Thanks again, -Carl
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
Hi Michiel, > "Michiel" == Michiel Buddingh' writes: Michiel> Another thing; notmuch currently indexes by message-id (or SHA-1 Michiel> hash if that is not present). Maildir, on the other hand, uses Michiel> file names as stable (when stripped of the parts trailing the Michiel> colon) unique (knock on wood) identifiers. A Maildir-aware Michiel> notmuch could incorporate this to be far more resistant to bulk Michiel> mail moves done by other clients, by using filename lookups to Michiel> avoid accessing and parsing the mail files themselves. I think that is a good idea. Michiel> I should re-iterate that I'm new to notmuch, and it's obvious Michiel> that I'm trying to beat it into becoming something it was never Michiel> intended to be; on the other hand, I'd like people to chime in on Michiel> this. I'm new here too, so please be gentle as I learn how things work Michiel> + Michiel> +static void Michiel> +derive_tags_from_maildir_flags (notmuch_message_t *message, const char * Michiel> path) I see you don't handle the "N" -- is that deliberate? Also, a minor addition may to also allow for '!' instead of ':' as a separator, as that's the semi-official way to use Maildirs on (V)FAT filesystems (which don't allow for colons in filenames). Best wishes, Dirk. -- Dirk-Jan C. Binnema Helsinki, Finland e:djcb at djcbsoftware.nl w:www.djcbsoftware.nl pgp: D09C E664 897D 7D39 5047 A178 E96A C7A1 017D DA3C
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
> "Carl" == Carl Worth writes: Carl> On Sun, 22 Nov 2009 00:25:43 +0100, Michiel Buddingh' Carl> wrote: Carl> Hi Michel, welcome to Notmuch! >> Any attempt to match tags up to directories will eventually have >> to deal with with the fact that tags can't be neatly mapped onto >> them. If I remove a directory-tag from a message, does this >> mean the message is removed from that directory? What if a >> message has two directory-tags, does it mean it's present in both >> directories? Carl> Right. We definitely don't want a strong mapping here. I think one Carl> answer could be that the initial import is different and can take the Carl> directory names as a "hint" for initializing tag values. After that, Carl> they are just tags in the database and the user can do whatever they Carl> want. In 'mu' I took a slightly different approach, by simply allowing for searches on the path to the message; ie., by adding the path as just another property of the message, just like subject: or from:. Now, the trouble with that the path of a message is not very stable -- people move messages around. In 'mu' this was handled by simply considering the moved message as a new one, and removing message from the database whose paths do not point to message anymore. This makes things quite robust against people moving things around or deleting them. Tags could be connected to the hopefully unique Maildir file name then. Carl> notmuch search tag:foo and not tag:foo-search Carl> notmuch search tag:foo-search and not tag:foo Carl> So even if in the end the user comes up with something that could Carl> replace the original tag, it's probably still beneficial to have it Carl> there when starting out. So, an alternative would be something like: notmuch search path:inbox But this requires notmuch to be able to update things when paths change. Best wishes, Dirk. -- Dirk-Jan C. Binnema Helsinki, Finland e:djcb at djcbsoftware.nl w:www.djcbsoftware.nl pgp: D09C E664 897D 7D39 5047 A178 E96A C7A1 017D DA3C
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Sun, 22 Nov 2009 05:04:56 +0100, Carl Worth wrote: > Hi Michel, welcome to Notmuch! Thanks, and apologies for the accidental base64 encoding. First things first: >> In the mean time, I've made a smaller, hopefully more harmless >> patch to let 'notmuch new' mark messages stored in a Maildir 'cur' >> folder as 'read' rather than 'unread'. > > Can others who have more experience weigh in here? Will this do the > right thing for you? Do mail clients wait to move things into "cur" > after the user has actually read them, or is that just a place where > they are moved when the MUA _receives_ them? You're absolutely right, and I'm a fool (because I _knew_ this, but forgot). Maildir stores flags (seen, replied, flagged, trashed, passed) in file names. On the positive side, this allows us to map these flags onto tags, at least for indexing (the patch at the bottom implements this), and, if I can pry you away from your principles, later for modification as well. >> Any attempt to match tags up to directories will eventually have >> to deal with with the fact that tags can't be neatly mapped onto >> them. If I remove a directory-tag from a message, does this >> mean the message is removed from that directory? What if a >> message has two directory-tags, does it mean it's present in both >> directories? > > Right. We definitely don't want a strong mapping here. I propose that the maildir 'storage_type' could make an exception for standard Maildir flags. It'll take relatively little effort to special-case the abovementioned flags, and it'd be a huge boon to interoperability. >> At the same time, this kind of interoperability would be highly >> desirable to those of us who access their mail using other >> clients (webmail, mobile phones, etc.) that expect hierarchical >> ordering. > > That kind of thing is going to be "harder". > > So far we're trying to stick with the principle that notmuch itself > doesn't mess with the data store. I respect your desire to stick to that principle. But I also know that purity and simplicity, generally speaking, are unattainable luxuries for most applications that handle mail. > But then, we also want notmuch to be > very scriptable, so someone might write a tool that uses notmuch search > to export a set of hierarchical maildirs based on the tag names. (These > could even just be populated with symlinks like mairix does.) So > something like that could be really useful for integrating. That is a very interesting idea. On the other hand, interoperability with Maildir mail stores is unlikely to be a corner case. The MTA is probably going to deliver new mail to a Maildir, procmail understands it, etc. I'd feel more comfortable relegating this integration to a scripted glue layer if I knew for certain such a glue layer would be up to the task. > I'm very much of the opinion that the user shouldn't care at all about > the storage of the actual mail files that notmuch indexes. The user certainly shouldn't, but I'm not sure that notmuch can remain as agnostic about the actual storage of messages as planned. Another thing; notmuch currently indexes by message-id (or SHA-1 hash if that is not present). Maildir, on the other hand, uses file names as stable (when stripped of the parts trailing the colon) unique (knock on wood) identifiers. A Maildir-aware notmuch could incorporate this to be far more resistant to bulk mail moves done by other clients, by using filename lookups to avoid accessing and parsing the mail files themselves. I should re-iterate that I'm new to notmuch, and it's obvious that I'm trying to beat it into becoming something it was never intended to be; on the other hand, I'd like people to chime in on this. again via webmail, Michiel Buddingh' --- notmuch-client.h | 10 ++ notmuch-config.c | 33 +-- notmuch-new.c| 95 +++-- 3 files changed, 124 insertions(+), 14 deletions(-) diff --git a/notmuch-client.h b/notmuch-client.h index ea77686..c39be06 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -69,12 +69,16 @@ #define STRNCMP_LITERAL(var, literal) \ strncmp ((var), (literal), sizeof (literal) - 1) +enum storage_type { UNSET, NONE, MAILDIR }; + typedef void (*add_files_callback_t) (notmuch_message_t *message); typedef struct { int ignore_read_only_directories; int saw_read_only_directory; +enum storage_type storage_type; + int total_files; int processed_files; int added_messages; @@ -179,7 +183,13 @@ notmuch_config_set_user_other_email (notmuch_config_t *config, const char *other_email[], size_t length); +enum storage_type +notmuch_config_get_storage_type (notmuch_config_t *config); + notmuch_bool_t debugger_is_active (void); + + + #endif diff --git a/notmuch-config.c b/notmuch-config.c index aaa0372..13bd341 100644 --- a/notmu
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Sun, 22 Nov 2009 00:25:43 +0100, Michiel Buddingh' wrote: > (hi, new here, just subscribed today. Wanted to reply to Carl's > earlier message I read in the archives, but since I don't have that, > I'm replying to Bart's reply to that message) Hi Michel, welcome to Notmuch! > Any attempt to match tags up to directories will eventually have > to deal with with the fact that tags can't be neatly mapped onto > them. If I remove a directory-tag from a message, does this > mean the message is removed from that directory? What if a > message has two directory-tags, does it mean it's present in both > directories? Right. We definitely don't want a strong mapping here. I think one answer could be that the initial import is different and can take the directory names as a "hint" for initializing tag values. After that, they are just tags in the database and the user can do whatever they want. This would mean that the user will need some other way to apply tags to future messages that might be delivered to those same directories. And we've got search-based tagging for that, (as well as manual tagging for the case where a user was manually filing messages into folders before). And while developing these search-based tags the user can use a different tag name. Say there was a directory named "foo", then the user might experiment with a search-based tag named "foo-search" and explore the results of things like: notmuch search tag:foo and not tag:foo-search notmuch search tag:foo-search and not tag:foo So even if in the end the user comes up with something that could replace the original tag, it's probably still beneficial to have it there when starting out. > At the same time, this kind of interoperability would be highly > desirable to those of us who access their mail using other > clients (webmail, mobile phones, etc.) that expect hierarchical > ordering. That kind of thing is going to be "harder". So far we're trying to stick with the principle that notmuch itself doesn't mess with the data store. But then, we also want notmuch to be very scriptable, so someone might write a tool that uses notmuch search to export a set of hierarchical maildirs based on the tag names. (These could even just be populated with symlinks like mairix does.) So something like that could be really useful for integrating. I'm very much of the opinion that the user shouldn't care at all about the storage of the actual mail files that notmuch indexes. > In the mean time, I've made a smaller, hopefully more harmless > patch to let 'notmuch new' mark messages stored in a Maildir 'cur' > folder as 'read' rather than 'unread'. Can others who have more experience weigh in here? Will this do the right thing for you? Do mail clients wait to move things into "cur" after the user has actually read them, or is that just a place where they are moved when the MUA _receives_ them? -Carl
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Sat, 21 Nov 2009 19:39:22 +0100, Carl Worth wrote: > On Wed, 18 Nov 2009 21:25:28 +0530, aneesh.kumar at linux.vnet.ibm.com > (Aneesh Kumar K.V) wrote: > A fantastic patch, Aneesh! I think this will be handy for a lot of > people importing mail from all kinds of systems. This is pushed now. And sadly, I just pulled it out again. I realized that I actually don't want my mail tagged based on the maildir directories I'm using, (they are arbitrarily-named directories used only to keep the per-directory number of files below about 10 thousand). So we'll probably need to make this an opt-in feature from the configuration file. -Carl
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
On Wed, 18 Nov 2009 21:25:28 +0530, aneesh.kumar at linux.vnet.ibm.com (Aneesh Kumar K.V) wrote: > This patch adds maildir directory name as the tag name for > messages. This helps in adding tags using filtering already > provided by procmail. A fantastic patch, Aneesh! I think this will be handy for a lot of people importing mail from all kinds of systems. This is pushed now. Thanks again, -Carl
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
* Carl Worth [091121 15:28]: > And sadly, I just pulled it out again. > > I realized that I actually don't want my mail tagged based on the > maildir directories I'm using, (they are arbitrarily-named directories > used only to keep the per-directory number of files below about 10 > thousand). > > So we'll probably need to make this an opt-in feature from the > configuration file. I think notmuch needs something that will add tags based on the attributes of a message (headers or body), as it imports data from a maildir. I am currently considering having procmail deliver to date based (-MM) folders and have notmuch determine what tags they should get. -Bart -- WebSig: http://www.jukie.net/~bart/sig/
[notmuch] [PATCH] notmuch: Add Maildir directory name as tag name for messages
>From 24711481dfe770417aa0a13308a9bb842dfb03f4 Mon Sep 17 00:00:00 2001 From: Aneesh Kumar K.V Date: Wed, 18 Nov 2009 21:20:12 +0530 Subject: [PATCH] notmuch: Add Maildir directory name as tag name for messages This patch adds maildir directory name as the tag name for messages. This helps in adding tags using filtering already provided by procmail. Signed-off-by: Aneesh Kumar K.V --- notmuch-new.c | 12 ++-- 1 files changed, 10 insertions(+), 2 deletions(-) diff --git a/notmuch-new.c b/notmuch-new.c index 5405a9f..50d0a5a 100644 --- a/notmuch-new.c +++ b/notmuch-new.c @@ -96,6 +96,7 @@ add_files_print_progress (add_files_state_t *state) static notmuch_status_t add_files_recursive (notmuch_database_t *notmuch, const char *path, +const char *tag, struct stat *st, add_files_state_t *state) { @@ -186,6 +187,7 @@ add_files_recursive (notmuch_database_t *notmuch, case NOTMUCH_STATUS_SUCCESS: state->added_messages++; tag_inbox_and_unread (message); + notmuch_message_add_tag (message, tag); break; /* Non-fatal issues (go on to next file) */ case NOTMUCH_STATUS_DUPLICATE_MESSAGE_ID: @@ -223,7 +225,13 @@ add_files_recursive (notmuch_database_t *notmuch, } } } else if (S_ISDIR (st->st_mode)) { - status = add_files_recursive (notmuch, next, st, state); + if ((strcmp (entry->d_name, "cur") == 0) || + (strcmp (entry->d_name, "new") == 0) || + (strcmp (entry->d_name, "tmp") == 0)) { + status = add_files_recursive (notmuch, next, tag, st, state); + } else { + status = add_files_recursive (notmuch, next, entry->d_name, st, state); + } if (status && ret == NOTMUCH_STATUS_SUCCESS) ret = status; } @@ -285,7 +293,7 @@ add_files (notmuch_database_t *notmuch, timerval.it_value.tv_usec = 0; setitimer (ITIMER_REAL, &timerval, NULL); -status = add_files_recursive (notmuch, path, &st, state); +status = add_files_recursive (notmuch, path, basename(path), &st, state); /* Now stop the timer. */ timerval.it_interval.tv_sec = 0; -- 1.6.5.2.74.g610f9