[PATCH 5/5] lib: Add "lastmod:" queries for filtering by last modification
From: Austin ClementsThe implementation is essentially the same as the date range search prior to Jani's fancy date parser. --- doc/man7/notmuch-search-terms.rst | 8 lib/database-private.h| 1 + lib/database.cc | 4 test/T570-revision-tracking.sh| 17 + 4 files changed, 30 insertions(+) diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst index 1d27ac1..e71a525 100644 --- a/doc/man7/notmuch-search-terms.rst +++ b/doc/man7/notmuch-search-terms.rst @@ -54,6 +54,8 @@ indicate user-supplied values): - date:.. +- lastmod:.. + The **from:** prefix is used to match the name or address of the sender of an email message. @@ -124,6 +126,12 @@ The time range can also be specified using timestamps with a syntax of: Each timestamp is a number representing the number of seconds since 1970-01-01 00:00:00 UTC. +The **lastmod:** prefix can be used to restrict the result by the +database revision number of when messages were last modified (tags +were added/removed or filenames changed). This is usually used in +conjunction with the **--uuid** argument to **notmuch search** +to find messages that have changed since an earlier query. + Operators - diff --git a/lib/database-private.h b/lib/database-private.h index 4e93257..3fb10f7 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -176,6 +176,7 @@ struct _notmuch_database { Xapian::TermGenerator *term_gen; Xapian::ValueRangeProcessor *value_range_processor; Xapian::ValueRangeProcessor *date_range_processor; +Xapian::ValueRangeProcessor *last_mod_range_processor; }; /* Prior to database version 3, features were implied by the database diff --git a/lib/database.cc b/lib/database.cc index fc78769..bab3334 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1000,6 +1000,7 @@ notmuch_database_open_verbose (const char *path, notmuch->term_gen->set_stemmer (Xapian::Stem ("english")); notmuch->value_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP); notmuch->date_range_processor = new ParseTimeValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP); + notmuch->last_mod_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, "lastmod:"); notmuch->query_parser->set_default_op (Xapian::Query::OP_AND); notmuch->query_parser->set_database (*notmuch->xapian_db); @@ -1007,6 +1008,7 @@ notmuch_database_open_verbose (const char *path, notmuch->query_parser->set_stemming_strategy (Xapian::QueryParser::STEM_SOME); notmuch->query_parser->add_valuerangeprocessor (notmuch->value_range_processor); notmuch->query_parser->add_valuerangeprocessor (notmuch->date_range_processor); + notmuch->query_parser->add_valuerangeprocessor (notmuch->last_mod_range_processor); for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { prefix_t *prefix = _PREFIX_EXTERNAL[i]; @@ -1085,6 +1087,8 @@ notmuch_database_close (notmuch_database_t *notmuch) notmuch->value_range_processor = NULL; delete notmuch->date_range_processor; notmuch->date_range_processor = NULL; +delete notmuch->last_mod_range_processor; +notmuch->last_mod_range_processor = NULL; return status; } diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh index 20b44cb..0936011 100755 --- a/test/T570-revision-tracking.sh +++ b/test/T570-revision-tracking.sh @@ -73,4 +73,21 @@ test_expect_success 'tag succeeds with correct uuid' \ test_expect_code 1 'tag fails with incorrect uuid' \ "notmuch tag --uuid=this-is-no-uuid '*' +test2" +test_begin_subtest 'lastmod:0.. matches everything' +total=$(notmuch count '*') +modtotal=$(notmuch count lastmod:0..) +test_expect_equal "$total" "$modtotal" + +test_begin_subtest 'lastmod:100.. matches nothing' +modtotal=$(notmuch count lastmod:100..) +test_expect_equal 0 "$modtotal" + +test_begin_subtest 'exclude one message using lastmod' +lastmod=$(notmuch count --lastmod '*' | cut -f3) +total=$(notmuch count '*') +notmuch tag +4EFC743A.3060609 at april.org id:4EFC743A.3060609 at april.org +subtotal=$(notmuch count lastmod:..$lastmod) +result=$(($subtotal == $total-1)) +test_expect_equal 1 "$result" + test_done -- 2.5.0
[PATCH 4/5] cli: add global option "--uuid"
The function notmuch_exit_if_unmatched_db_uuid is split from notmuch_process_shared_options because it needs an open notmuch database. There are two exceptional cases in uuid handling. 1) notmuch config and notmuch setup don't currently open the database, so it doesn't make sense to check the UUID. 2) notmuch compact opens the database inside the library, so we either need to open the database just to check uuid, or change the API. --- doc/man1/notmuch.rst | 11 +-- notmuch-client.h | 4 notmuch-compact.c | 5 + notmuch-config.c | 4 notmuch-count.c| 2 ++ notmuch-dump.c | 2 ++ notmuch-insert.c | 2 ++ notmuch-new.c | 3 ++- notmuch-reply.c| 2 ++ notmuch-restore.c | 2 ++ notmuch-search.c | 2 ++ notmuch-setup.c| 4 notmuch-show.c | 2 ++ notmuch-tag.c | 2 ++ notmuch.c | 18 ++ test/T570-revision-tracking.sh | 27 +++ test/random-corpus.c | 2 ++ 17 files changed, 91 insertions(+), 3 deletions(-) diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst index 0401c91..3acfbdb 100644 --- a/doc/man1/notmuch.rst +++ b/doc/man1/notmuch.rst @@ -51,9 +51,16 @@ Supported global options for ``notmuch`` include Specify the configuration file to use. This overrides any configuration file specified by ${NOTMUCH\_CONFIG}. +``--uuid=HEX`` + Enforce that the database UUID (a unique identifier which + persists until e.g. the database is compacted) + is HEX; exit with an error if it is not. This is useful to + detect rollover in modification counts on messages. You can + find this UUID using e.g. ``notmuch count --lastmod`` + All global options except ``--config`` can also be specified after the -command. For example, ``notmuch subcommand --version`` is equivalent to -``notmuch --version subcommand``. +command. For example, ``notmuch subcommand --uuid=HEX`` is +equivalent to ``notmuch --uuid=HEX subcommand``. COMMANDS diff --git a/notmuch-client.h b/notmuch-client.h index 78680aa..4a4f86c 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -466,7 +466,11 @@ notmuch_database_dump (notmuch_database_t *notmuch, notmuch_bool_t gzip_output); #include "command-line-arguments.h" + +extern char *notmuch_requested_db_uuid; extern const notmuch_opt_desc_t notmuch_shared_options []; +void notmuch_exit_if_unmatched_db_uuid (notmuch_database_t *notmuch); + void notmuch_process_shared_options (const char* subcommand_name); int notmuch_minimal_options (const char* subcommand_name, int argc, char **argv); diff --git a/notmuch-compact.c b/notmuch-compact.c index 5be551d..9373721 100644 --- a/notmuch-compact.c +++ b/notmuch-compact.c @@ -46,6 +46,11 @@ notmuch_compact_command (notmuch_config_t *config, int argc, char *argv[]) if (opt_index < 0) return EXIT_FAILURE; +if (notmuch_requested_db_uuid) { + fprintf (stderr, "Error: --uuid not implemented for compact\n"); + return EXIT_FAILURE; +} + notmuch_process_shared_options (argv[0]); if (! quiet) diff --git a/notmuch-config.c b/notmuch-config.c index 9348278..d252bb2 100644 --- a/notmuch-config.c +++ b/notmuch-config.c @@ -878,6 +878,10 @@ notmuch_config_command (notmuch_config_t *config, int argc, char *argv[]) if (opt_index < 0) return EXIT_FAILURE; +if (notmuch_requested_db_uuid) + fprintf (stderr, "Warning: ignoring --uuid=%s\n", +notmuch_requested_db_uuid); + /* skip at least subcommand argument */ argc-= opt_index; argv+= opt_index; diff --git a/notmuch-count.c b/notmuch-count.c index 182710a..f26e726 100644 --- a/notmuch-count.c +++ b/notmuch-count.c @@ -189,6 +189,8 @@ notmuch_count_command (notmuch_config_t *config, int argc, char *argv[]) NOTMUCH_DATABASE_MODE_READ_ONLY, )) return EXIT_FAILURE; +notmuch_exit_if_unmatched_db_uuid (notmuch); + query_str = query_string_from_args (config, argc-opt_index, argv+opt_index); if (query_str == NULL) { fprintf (stderr, "Out of memory.\n"); diff --git a/notmuch-dump.c b/notmuch-dump.c index fab22bd..24fc2f2 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -215,6 +215,8 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) NOTMUCH_DATABASE_MODE_READ_WRITE, )) return EXIT_FAILURE; +notmuch_exit_if_unmatched_db_uuid (notmuch); + char *output_file_name = NULL; int opt_index; diff --git a/notmuch-insert.c b/notmuch-insert.c index c277d62..5205c17 100644 --- a/notmuch-insert.c +++ b/notmuch-insert.c @@ -536,6 +536,8 @@ notmuch_insert_command
[PATCH 3/5] cli/count: add --lastmod
In the short term we need a way to get lastmod information e.g. for the test suite. In the long term we probably want to add lastmod information to at least the structured output for several other clients (e.g. show, search). --- doc/man1/notmuch-count.rst | 5 + notmuch-count.c| 32 +++- test/T570-revision-tracking.sh | 12 3 files changed, 40 insertions(+), 9 deletions(-) diff --git a/doc/man1/notmuch-count.rst b/doc/man1/notmuch-count.rst index ca78c18..99de13a 100644 --- a/doc/man1/notmuch-count.rst +++ b/doc/man1/notmuch-count.rst @@ -47,6 +47,11 @@ Supported options for **count** include (or threads) in the database will be output. This option is not compatible with specifying search terms on the command line. +``--lastmod`` + Append lastmod (counter for number of database updates) and UUID + to the output. lastmod values are only comparable between databases + with the same UUID. + ``--input=``\ Read input from given file, instead of from stdin. Implies ``--batch``. diff --git a/notmuch-count.c b/notmuch-count.c index 57a88a8..182710a 100644 --- a/notmuch-count.c +++ b/notmuch-count.c @@ -25,6 +25,7 @@ enum { OUTPUT_THREADS, OUTPUT_MESSAGES, OUTPUT_FILES, +OUTPUT_LASTMOD, }; /* The following is to allow future options to be added more easily */ @@ -67,10 +68,13 @@ count_files (notmuch_query_t *query) static int print_count (notmuch_database_t *notmuch, const char *query_str, -const char **exclude_tags, size_t exclude_tags_length, int output) +const char **exclude_tags, size_t exclude_tags_length, int output, int print_lastmod) { notmuch_query_t *query; size_t i; +unsigned long revision; +const char *uuid; +int ret = 0; query = notmuch_query_create (notmuch, query_str); if (query == NULL) { @@ -83,24 +87,31 @@ print_count (notmuch_database_t *notmuch, const char *query_str, switch (output) { case OUTPUT_MESSAGES: - printf ("%u\n", notmuch_query_count_messages (query)); + printf ("%u", notmuch_query_count_messages (query)); break; case OUTPUT_THREADS: - printf ("%u\n", notmuch_query_count_threads (query)); + printf ("%u", notmuch_query_count_threads (query)); break; case OUTPUT_FILES: - printf ("%u\n", count_files (query)); + printf ("%u", count_files (query)); break; } +if (print_lastmod) { + revision = notmuch_database_get_revision (notmuch, ); + printf ("\t%s\t%lu\n", uuid, revision); +} else { + fputs ("\n", stdout); +} + notmuch_query_destroy (query); -return 0; +return ret; } static int count_file (notmuch_database_t *notmuch, FILE *input, const char **exclude_tags, - size_t exclude_tags_length, int output) + size_t exclude_tags_length, int output, int print_lastmod) { char *line = NULL; ssize_t line_len; @@ -110,7 +121,7 @@ count_file (notmuch_database_t *notmuch, FILE *input, const char **exclude_tags, while (!ret && (line_len = getline (, _size, input)) != -1) { chomp_newline (line); ret = print_count (notmuch, line, exclude_tags, exclude_tags_length, - output); + output, print_lastmod); } if (line) @@ -130,6 +141,7 @@ notmuch_count_command (notmuch_config_t *config, int argc, char *argv[]) const char **search_exclude_tags = NULL; size_t search_exclude_tags_length = 0; notmuch_bool_t batch = FALSE; +notmuch_bool_t print_lastmod = FALSE; FILE *input = stdin; char *input_file_name = NULL; int ret; @@ -139,11 +151,13 @@ notmuch_count_command (notmuch_config_t *config, int argc, char *argv[]) (notmuch_keyword_t []){ { "threads", OUTPUT_THREADS }, { "messages", OUTPUT_MESSAGES }, { "files", OUTPUT_FILES }, + { "modifications", OUTPUT_LASTMOD }, { 0, 0 } } }, { NOTMUCH_OPT_KEYWORD, , "exclude", 'x', (notmuch_keyword_t []){ { "true", EXCLUDE_TRUE }, { "false", EXCLUDE_FALSE }, { 0, 0 } } }, + { NOTMUCH_OPT_BOOLEAN, _lastmod, "lastmod", 'l', 0 }, { NOTMUCH_OPT_BOOLEAN, , "batch", 0, 0 }, { NOTMUCH_OPT_STRING, _file_name, "input", 'i', 0 }, { NOTMUCH_OPT_INHERIT, (void *) _shared_options, NULL, 0, 0 }, @@ -188,10 +202,10 @@ notmuch_count_command (notmuch_config_t *config, int argc, char *argv[]) if (batch) ret = count_file (notmuch, input, search_exclude_tags, - search_exclude_tags_length, output); + search_exclude_tags_length, output, print_lastmod); else ret = print_count
[PATCH 2/5] lib: API to retrieve database revision and UUID
From: Austin ClementsThis exposes the committed database revision to library users along with a UUID that can be used to detect when revision numbers are no longer comparable (e.g., because the database has been replaced). --- lib/database-private.h | 1 + lib/database.cc| 11 +++ lib/notmuch.h | 18 ++ test/T570-revision-tracking.sh | 37 + test/test-lib.sh | 5 + 5 files changed, 72 insertions(+) create mode 100755 test/T570-revision-tracking.sh diff --git a/lib/database-private.h b/lib/database-private.h index 5c5a2bb..4e93257 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -170,6 +170,7 @@ struct _notmuch_database { * under a higher revision number, which can be generated with * notmuch_database_new_revision. */ unsigned long revision; +const char *uuid; Xapian::QueryParser *query_parser; Xapian::TermGenerator *term_gen; diff --git a/lib/database.cc b/lib/database.cc index 52e2e8f..fc78769 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -992,6 +992,8 @@ notmuch_database_open_verbose (const char *path, notmuch->revision = 0; else notmuch->revision = Xapian::sortable_unserialise (last_mod); + notmuch->uuid = talloc_strdup ( + notmuch, notmuch->xapian_db->get_uuid ().c_str ()); notmuch->query_parser = new Xapian::QueryParser; notmuch->term_gen = new Xapian::TermGenerator; @@ -1666,6 +1668,15 @@ DONE: return NOTMUCH_STATUS_SUCCESS; } +unsigned long +notmuch_database_get_revision (notmuch_database_t *notmuch, + const char **uuid) +{ +if (uuid) + *uuid = notmuch->uuid; +return notmuch->revision; +} + /* We allow the user to use arbitrarily long paths for directories. But * we have a term-length limit. So if we exceed that, we'll use the * SHA-1 of the path for the database term. diff --git a/lib/notmuch.h b/lib/notmuch.h index b1f5bfa..8639b38 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -468,6 +468,24 @@ notmuch_status_t notmuch_database_end_atomic (notmuch_database_t *notmuch); /** + * Return the committed database revision and UUID. + * + * The database revision number increases monotonically with each + * commit to the database. Hence, all messages and message changes + * committed to the database (that is, visible to readers) have a last + * modification revision <= the committed database revision. Any + * messages committed in the future will be assigned a modification + * revision > the committed database revision. + * + * The UUID is a NUL-terminated opaque string that uniquely identifies + * this database. Two revision numbers are only comparable if they + * have the same database UUID. + */ +unsigned long +notmuch_database_get_revision (notmuch_database_t *notmuch, + const char **uuid); + +/** * Retrieve a directory object from the database for 'path'. * * Here, 'path' should be a path relative to the path of 'database' diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh new file mode 100755 index 000..e0a5703 --- /dev/null +++ b/test/T570-revision-tracking.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +test_description="database revision tracking" + +. ./test-lib.sh || exit 1 + +add_email_corpus + +test_begin_subtest "notmuch_database_get_revision" +test_C ${MAIL_DIR} <<'EOF' +#include +#include +#include +int main (int argc, char** argv) +{ + notmuch_database_t *db; + notmuch_status_t stat; + unsigned long revision; + const char *uuid; + + unsigned long rev; + + stat = notmuch_database_open (argv[1], NOTMUCH_DATABASE_MODE_READ_ONLY, ); + if (stat) + fputs ("open failed\n", stderr); + revision = notmuch_database_get_revision (db, ); + printf("%s\t%lu\n", uuid, revision); +} +EOF +notmuch_uuid_sanitize < OUTPUT > CLEAN +cat <<'EOF' >EXPECTED +== stdout == +UUID 53 +== stderr == +EOF +test_expect_equal_file EXPECTED CLEAN + +test_done diff --git a/test/test-lib.sh b/test/test-lib.sh index 0bf7163..126911f 100644 --- a/test/test-lib.sh +++ b/test/test-lib.sh @@ -720,6 +720,11 @@ notmuch_date_sanitize () sed \ -e 's/^Date: Fri, 05 Jan 2001 .*/Date: GENERATED_DATE/' } + +notmuch_uuid_sanitize () +{ +sed 's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g' +} # End of notmuch helper functions # Use test_set_prereq to tell that a particular prerequisite is available. -- 2.5.0
[PATCH 1/5] lib: Add per-message last modification tracking
From: Austin ClementsThis adds a new document value that stores the revision of the last modification to message metadata, where the revision number increases monotonically with each database commit. An alternative would be to store the wall-clock time of the last modification of each message. In principle this is simpler and has the advantage that any process can determine the current timestamp without support from libnotmuch. However, even assuming a computer's clock never goes backward and ignoring clock skew in networked environments, this has a fatal flaw. Xapian uses (optimistic) snapshot isolation, which means reads can be concurrent with writes. Given this, consider the following time line with a write and two read transactions: write |-X-A--| read 1 |---B---| read 2 |---| The write transaction modifies message X and records the wall-clock time of the modification at A. The writer hangs around for a while and later commits its change. Read 1 is concurrent with the write, so it doesn't see the change to X. It does some query and records the wall-clock time of its results at B. Transaction read 2 later starts after the write commits and queries for changes since wall-clock time B (say the reads are performing an incremental backup). Even though read 1 could not see the change to X, read 2 is told (correctly) that X has not changed since B, the time of the last read. In fact, X changed before wall-clock time A, but the change was not visible until *after* wall-clock time B, so read 2 misses the change to X. This is tricky to solve in full-blown snapshot isolation, but because Xapian serializes writes, we can use a simple, monotonically increasing database revision number. Furthermore, maintaining this revision number requires no more IO than a wall-clock time solution because Xapian already maintains statistics on the upper (and lower) bound of each value stream. --- lib/database-private.h | 16 +++- lib/database.cc| 50 -- lib/message.cc | 22 ++ lib/notmuch-private.h | 10 +- 4 files changed, 94 insertions(+), 4 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 24243db..5c5a2bb 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -100,6 +100,12 @@ enum _notmuch_features { * * Introduced: version 3. */ NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1 << 5, + +/* If set, messages store the revision number of the last + * modification in NOTMUCH_VALUE_LAST_MOD. + * + * Introduced: version 3. */ +NOTMUCH_FEATURE_LAST_MOD = 1 << 6, }; /* In C++, a named enum is its own type, so define bitwise operators @@ -145,6 +151,8 @@ struct _notmuch_database { notmuch_database_mode_t mode; int atomic_nesting; +/* TRUE if changes have been made in this atomic section */ +notmuch_bool_t atomic_dirty; Xapian::Database *xapian_db; /* Bit mask of features used by this database. This is a @@ -158,6 +166,11 @@ struct _notmuch_database { * next library call. May be NULL */ char *status_string; +/* Highest committed revision number. Modifications are recorded + * under a higher revision number, which can be generated with + * notmuch_database_new_revision. */ +unsigned long revision; + Xapian::QueryParser *query_parser; Xapian::TermGenerator *term_gen; Xapian::ValueRangeProcessor *value_range_processor; @@ -179,7 +192,8 @@ struct _notmuch_database { * will have it). */ #define NOTMUCH_FEATURES_CURRENT \ (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \ - NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS) + NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \ + NOTMUCH_FEATURE_LAST_MOD) /* Return the list of terms from the given iterator matching a prefix. * The prefix will be stripped from the strings in the returned list. diff --git a/lib/database.cc b/lib/database.cc index 6a15174..52e2e8f 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -101,6 +101,9 @@ typedef struct { * * SUBJECT:The value of the "Subject" header * + * LAST_MOD: The revision number as of the last tag or + * filename change. + * * In addition, terms from the content of the message are added with * "from", "to", "attachment", and "subject" prefixes for use by the * user in searching. Similarly, terms from the path of the mail @@ -310,6 +313,8 @@ static const struct { * them. */ { NOTMUCH_FEATURE_INDEXED_MIMETYPES, "indexed MIME types", "w"}, +{ NOTMUCH_FEATURE_LAST_MOD, + "modification tracking", "w"}, }; const char * @@ -737,6 +742,23 @@ _notmuch_database_ensure_writable (notmuch_database_t *notmuch) return NOTMUCH_STATUS_SUCCESS; } +/* Allocate a revision number for the next
revision tracking patches round 4
This obsoletes id:1439112285-6681-1-git-send-email-david at tethera.net The main differences since that series are in the command line syntax for notmuch-count. We also now consider passing --uuid to notmuch-compact an error, since the user might reasonably expect that to do something sensible, but we don't because of the existing API.
revision tracking patches round 4
This obsoletes id:1439112285-6681-1-git-send-email-da...@tethera.net The main differences since that series are in the command line syntax for notmuch-count. We also now consider passing --uuid to notmuch-compact an error, since the user might reasonably expect that to do something sensible, but we don't because of the existing API. ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
[PATCH 4/5] cli: add global option --uuid
The function notmuch_exit_if_unmatched_db_uuid is split from notmuch_process_shared_options because it needs an open notmuch database. There are two exceptional cases in uuid handling. 1) notmuch config and notmuch setup don't currently open the database, so it doesn't make sense to check the UUID. 2) notmuch compact opens the database inside the library, so we either need to open the database just to check uuid, or change the API. --- doc/man1/notmuch.rst | 11 +-- notmuch-client.h | 4 notmuch-compact.c | 5 + notmuch-config.c | 4 notmuch-count.c| 2 ++ notmuch-dump.c | 2 ++ notmuch-insert.c | 2 ++ notmuch-new.c | 3 ++- notmuch-reply.c| 2 ++ notmuch-restore.c | 2 ++ notmuch-search.c | 2 ++ notmuch-setup.c| 4 notmuch-show.c | 2 ++ notmuch-tag.c | 2 ++ notmuch.c | 18 ++ test/T570-revision-tracking.sh | 27 +++ test/random-corpus.c | 2 ++ 17 files changed, 91 insertions(+), 3 deletions(-) diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst index 0401c91..3acfbdb 100644 --- a/doc/man1/notmuch.rst +++ b/doc/man1/notmuch.rst @@ -51,9 +51,16 @@ Supported global options for ``notmuch`` include Specify the configuration file to use. This overrides any configuration file specified by ${NOTMUCH\_CONFIG}. +``--uuid=HEX`` + Enforce that the database UUID (a unique identifier which + persists until e.g. the database is compacted) + is HEX; exit with an error if it is not. This is useful to + detect rollover in modification counts on messages. You can + find this UUID using e.g. ``notmuch count --lastmod`` + All global options except ``--config`` can also be specified after the -command. For example, ``notmuch subcommand --version`` is equivalent to -``notmuch --version subcommand``. +command. For example, ``notmuch subcommand --uuid=HEX`` is +equivalent to ``notmuch --uuid=HEX subcommand``. COMMANDS diff --git a/notmuch-client.h b/notmuch-client.h index 78680aa..4a4f86c 100644 --- a/notmuch-client.h +++ b/notmuch-client.h @@ -466,7 +466,11 @@ notmuch_database_dump (notmuch_database_t *notmuch, notmuch_bool_t gzip_output); #include command-line-arguments.h + +extern char *notmuch_requested_db_uuid; extern const notmuch_opt_desc_t notmuch_shared_options []; +void notmuch_exit_if_unmatched_db_uuid (notmuch_database_t *notmuch); + void notmuch_process_shared_options (const char* subcommand_name); int notmuch_minimal_options (const char* subcommand_name, int argc, char **argv); diff --git a/notmuch-compact.c b/notmuch-compact.c index 5be551d..9373721 100644 --- a/notmuch-compact.c +++ b/notmuch-compact.c @@ -46,6 +46,11 @@ notmuch_compact_command (notmuch_config_t *config, int argc, char *argv[]) if (opt_index 0) return EXIT_FAILURE; +if (notmuch_requested_db_uuid) { + fprintf (stderr, Error: --uuid not implemented for compact\n); + return EXIT_FAILURE; +} + notmuch_process_shared_options (argv[0]); if (! quiet) diff --git a/notmuch-config.c b/notmuch-config.c index 9348278..d252bb2 100644 --- a/notmuch-config.c +++ b/notmuch-config.c @@ -878,6 +878,10 @@ notmuch_config_command (notmuch_config_t *config, int argc, char *argv[]) if (opt_index 0) return EXIT_FAILURE; +if (notmuch_requested_db_uuid) + fprintf (stderr, Warning: ignoring --uuid=%s\n, +notmuch_requested_db_uuid); + /* skip at least subcommand argument */ argc-= opt_index; argv+= opt_index; diff --git a/notmuch-count.c b/notmuch-count.c index 182710a..f26e726 100644 --- a/notmuch-count.c +++ b/notmuch-count.c @@ -189,6 +189,8 @@ notmuch_count_command (notmuch_config_t *config, int argc, char *argv[]) NOTMUCH_DATABASE_MODE_READ_ONLY, notmuch)) return EXIT_FAILURE; +notmuch_exit_if_unmatched_db_uuid (notmuch); + query_str = query_string_from_args (config, argc-opt_index, argv+opt_index); if (query_str == NULL) { fprintf (stderr, Out of memory.\n); diff --git a/notmuch-dump.c b/notmuch-dump.c index fab22bd..24fc2f2 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -215,6 +215,8 @@ notmuch_dump_command (notmuch_config_t *config, int argc, char *argv[]) NOTMUCH_DATABASE_MODE_READ_WRITE, notmuch)) return EXIT_FAILURE; +notmuch_exit_if_unmatched_db_uuid (notmuch); + char *output_file_name = NULL; int opt_index; diff --git a/notmuch-insert.c b/notmuch-insert.c index c277d62..5205c17 100644 --- a/notmuch-insert.c +++ b/notmuch-insert.c @@ -536,6 +536,8 @@ notmuch_insert_command
[PATCH 2/5] lib: API to retrieve database revision and UUID
From: Austin Clements acleme...@csail.mit.edu This exposes the committed database revision to library users along with a UUID that can be used to detect when revision numbers are no longer comparable (e.g., because the database has been replaced). --- lib/database-private.h | 1 + lib/database.cc| 11 +++ lib/notmuch.h | 18 ++ test/T570-revision-tracking.sh | 37 + test/test-lib.sh | 5 + 5 files changed, 72 insertions(+) create mode 100755 test/T570-revision-tracking.sh diff --git a/lib/database-private.h b/lib/database-private.h index 5c5a2bb..4e93257 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -170,6 +170,7 @@ struct _notmuch_database { * under a higher revision number, which can be generated with * notmuch_database_new_revision. */ unsigned long revision; +const char *uuid; Xapian::QueryParser *query_parser; Xapian::TermGenerator *term_gen; diff --git a/lib/database.cc b/lib/database.cc index 52e2e8f..fc78769 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -992,6 +992,8 @@ notmuch_database_open_verbose (const char *path, notmuch-revision = 0; else notmuch-revision = Xapian::sortable_unserialise (last_mod); + notmuch-uuid = talloc_strdup ( + notmuch, notmuch-xapian_db-get_uuid ().c_str ()); notmuch-query_parser = new Xapian::QueryParser; notmuch-term_gen = new Xapian::TermGenerator; @@ -1666,6 +1668,15 @@ DONE: return NOTMUCH_STATUS_SUCCESS; } +unsigned long +notmuch_database_get_revision (notmuch_database_t *notmuch, + const char **uuid) +{ +if (uuid) + *uuid = notmuch-uuid; +return notmuch-revision; +} + /* We allow the user to use arbitrarily long paths for directories. But * we have a term-length limit. So if we exceed that, we'll use the * SHA-1 of the path for the database term. diff --git a/lib/notmuch.h b/lib/notmuch.h index b1f5bfa..8639b38 100644 --- a/lib/notmuch.h +++ b/lib/notmuch.h @@ -468,6 +468,24 @@ notmuch_status_t notmuch_database_end_atomic (notmuch_database_t *notmuch); /** + * Return the committed database revision and UUID. + * + * The database revision number increases monotonically with each + * commit to the database. Hence, all messages and message changes + * committed to the database (that is, visible to readers) have a last + * modification revision = the committed database revision. Any + * messages committed in the future will be assigned a modification + * revision the committed database revision. + * + * The UUID is a NUL-terminated opaque string that uniquely identifies + * this database. Two revision numbers are only comparable if they + * have the same database UUID. + */ +unsigned long +notmuch_database_get_revision (notmuch_database_t *notmuch, + const char **uuid); + +/** * Retrieve a directory object from the database for 'path'. * * Here, 'path' should be a path relative to the path of 'database' diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh new file mode 100755 index 000..e0a5703 --- /dev/null +++ b/test/T570-revision-tracking.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +test_description=database revision tracking + +. ./test-lib.sh || exit 1 + +add_email_corpus + +test_begin_subtest notmuch_database_get_revision +test_C ${MAIL_DIR} 'EOF' +#include stdio.h +#include string.h +#include notmuch.h +int main (int argc, char** argv) +{ + notmuch_database_t *db; + notmuch_status_t stat; + unsigned long revision; + const char *uuid; + + unsigned long rev; + + stat = notmuch_database_open (argv[1], NOTMUCH_DATABASE_MODE_READ_ONLY, db); + if (stat) + fputs (open failed\n, stderr); + revision = notmuch_database_get_revision (db, uuid); + printf(%s\t%lu\n, uuid, revision); +} +EOF +notmuch_uuid_sanitize OUTPUT CLEAN +cat 'EOF' EXPECTED +== stdout == +UUID 53 +== stderr == +EOF +test_expect_equal_file EXPECTED CLEAN + +test_done diff --git a/test/test-lib.sh b/test/test-lib.sh index 0bf7163..126911f 100644 --- a/test/test-lib.sh +++ b/test/test-lib.sh @@ -720,6 +720,11 @@ notmuch_date_sanitize () sed \ -e 's/^Date: Fri, 05 Jan 2001 .*/Date: GENERATED_DATE/' } + +notmuch_uuid_sanitize () +{ +sed 's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g' +} # End of notmuch helper functions # Use test_set_prereq to tell that a particular prerequisite is available. -- 2.5.0 ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
[PATCH 5/5] lib: Add lastmod: queries for filtering by last modification
From: Austin Clements amdra...@mit.edu The implementation is essentially the same as the date range search prior to Jani's fancy date parser. --- doc/man7/notmuch-search-terms.rst | 8 lib/database-private.h| 1 + lib/database.cc | 4 test/T570-revision-tracking.sh| 17 + 4 files changed, 30 insertions(+) diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst index 1d27ac1..e71a525 100644 --- a/doc/man7/notmuch-search-terms.rst +++ b/doc/man7/notmuch-search-terms.rst @@ -54,6 +54,8 @@ indicate user-supplied values): - date:since..until +- lastmod:since..until + The **from:** prefix is used to match the name or address of the sender of an email message. @@ -124,6 +126,12 @@ The time range can also be specified using timestamps with a syntax of: Each timestamp is a number representing the number of seconds since 1970-01-01 00:00:00 UTC. +The **lastmod:** prefix can be used to restrict the result by the +database revision number of when messages were last modified (tags +were added/removed or filenames changed). This is usually used in +conjunction with the **--uuid** argument to **notmuch search** +to find messages that have changed since an earlier query. + Operators - diff --git a/lib/database-private.h b/lib/database-private.h index 4e93257..3fb10f7 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -176,6 +176,7 @@ struct _notmuch_database { Xapian::TermGenerator *term_gen; Xapian::ValueRangeProcessor *value_range_processor; Xapian::ValueRangeProcessor *date_range_processor; +Xapian::ValueRangeProcessor *last_mod_range_processor; }; /* Prior to database version 3, features were implied by the database diff --git a/lib/database.cc b/lib/database.cc index fc78769..bab3334 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -1000,6 +1000,7 @@ notmuch_database_open_verbose (const char *path, notmuch-term_gen-set_stemmer (Xapian::Stem (english)); notmuch-value_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP); notmuch-date_range_processor = new ParseTimeValueRangeProcessor (NOTMUCH_VALUE_TIMESTAMP); + notmuch-last_mod_range_processor = new Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, lastmod:); notmuch-query_parser-set_default_op (Xapian::Query::OP_AND); notmuch-query_parser-set_database (*notmuch-xapian_db); @@ -1007,6 +1008,7 @@ notmuch_database_open_verbose (const char *path, notmuch-query_parser-set_stemming_strategy (Xapian::QueryParser::STEM_SOME); notmuch-query_parser-add_valuerangeprocessor (notmuch-value_range_processor); notmuch-query_parser-add_valuerangeprocessor (notmuch-date_range_processor); + notmuch-query_parser-add_valuerangeprocessor (notmuch-last_mod_range_processor); for (i = 0; i ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) { prefix_t *prefix = BOOLEAN_PREFIX_EXTERNAL[i]; @@ -1085,6 +1087,8 @@ notmuch_database_close (notmuch_database_t *notmuch) notmuch-value_range_processor = NULL; delete notmuch-date_range_processor; notmuch-date_range_processor = NULL; +delete notmuch-last_mod_range_processor; +notmuch-last_mod_range_processor = NULL; return status; } diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh index 20b44cb..0936011 100755 --- a/test/T570-revision-tracking.sh +++ b/test/T570-revision-tracking.sh @@ -73,4 +73,21 @@ test_expect_success 'tag succeeds with correct uuid' \ test_expect_code 1 'tag fails with incorrect uuid' \ notmuch tag --uuid=this-is-no-uuid '*' +test2 +test_begin_subtest 'lastmod:0.. matches everything' +total=$(notmuch count '*') +modtotal=$(notmuch count lastmod:0..) +test_expect_equal $total $modtotal + +test_begin_subtest 'lastmod:100.. matches nothing' +modtotal=$(notmuch count lastmod:100..) +test_expect_equal 0 $modtotal + +test_begin_subtest 'exclude one message using lastmod' +lastmod=$(notmuch count --lastmod '*' | cut -f3) +total=$(notmuch count '*') +notmuch tag +4efc743a.3060...@april.org id:4efc743a.3060...@april.org +subtotal=$(notmuch count lastmod:..$lastmod) +result=$(($subtotal == $total-1)) +test_expect_equal 1 $result + test_done -- 2.5.0 ___ notmuch mailing list notmuch@notmuchmail.org http://notmuchmail.org/mailman/listinfo/notmuch
[PATCH 1/5] lib: Add per-message last modification tracking
From: Austin Clements amdra...@mit.edu This adds a new document value that stores the revision of the last modification to message metadata, where the revision number increases monotonically with each database commit. An alternative would be to store the wall-clock time of the last modification of each message. In principle this is simpler and has the advantage that any process can determine the current timestamp without support from libnotmuch. However, even assuming a computer's clock never goes backward and ignoring clock skew in networked environments, this has a fatal flaw. Xapian uses (optimistic) snapshot isolation, which means reads can be concurrent with writes. Given this, consider the following time line with a write and two read transactions: write |-X-A--| read 1 |---B---| read 2 |---| The write transaction modifies message X and records the wall-clock time of the modification at A. The writer hangs around for a while and later commits its change. Read 1 is concurrent with the write, so it doesn't see the change to X. It does some query and records the wall-clock time of its results at B. Transaction read 2 later starts after the write commits and queries for changes since wall-clock time B (say the reads are performing an incremental backup). Even though read 1 could not see the change to X, read 2 is told (correctly) that X has not changed since B, the time of the last read. In fact, X changed before wall-clock time A, but the change was not visible until *after* wall-clock time B, so read 2 misses the change to X. This is tricky to solve in full-blown snapshot isolation, but because Xapian serializes writes, we can use a simple, monotonically increasing database revision number. Furthermore, maintaining this revision number requires no more IO than a wall-clock time solution because Xapian already maintains statistics on the upper (and lower) bound of each value stream. --- lib/database-private.h | 16 +++- lib/database.cc| 50 -- lib/message.cc | 22 ++ lib/notmuch-private.h | 10 +- 4 files changed, 94 insertions(+), 4 deletions(-) diff --git a/lib/database-private.h b/lib/database-private.h index 24243db..5c5a2bb 100644 --- a/lib/database-private.h +++ b/lib/database-private.h @@ -100,6 +100,12 @@ enum _notmuch_features { * * Introduced: version 3. */ NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1 5, + +/* If set, messages store the revision number of the last + * modification in NOTMUCH_VALUE_LAST_MOD. + * + * Introduced: version 3. */ +NOTMUCH_FEATURE_LAST_MOD = 1 6, }; /* In C++, a named enum is its own type, so define bitwise operators @@ -145,6 +151,8 @@ struct _notmuch_database { notmuch_database_mode_t mode; int atomic_nesting; +/* TRUE if changes have been made in this atomic section */ +notmuch_bool_t atomic_dirty; Xapian::Database *xapian_db; /* Bit mask of features used by this database. This is a @@ -158,6 +166,11 @@ struct _notmuch_database { * next library call. May be NULL */ char *status_string; +/* Highest committed revision number. Modifications are recorded + * under a higher revision number, which can be generated with + * notmuch_database_new_revision. */ +unsigned long revision; + Xapian::QueryParser *query_parser; Xapian::TermGenerator *term_gen; Xapian::ValueRangeProcessor *value_range_processor; @@ -179,7 +192,8 @@ struct _notmuch_database { * will have it). */ #define NOTMUCH_FEATURES_CURRENT \ (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \ - NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS) + NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \ + NOTMUCH_FEATURE_LAST_MOD) /* Return the list of terms from the given iterator matching a prefix. * The prefix will be stripped from the strings in the returned list. diff --git a/lib/database.cc b/lib/database.cc index 6a15174..52e2e8f 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -101,6 +101,9 @@ typedef struct { * * SUBJECT:The value of the Subject header * + * LAST_MOD: The revision number as of the last tag or + * filename change. + * * In addition, terms from the content of the message are added with * from, to, attachment, and subject prefixes for use by the * user in searching. Similarly, terms from the path of the mail @@ -310,6 +313,8 @@ static const struct { * them. */ { NOTMUCH_FEATURE_INDEXED_MIMETYPES, indexed MIME types, w}, +{ NOTMUCH_FEATURE_LAST_MOD, + modification tracking, w}, }; const char * @@ -737,6 +742,23 @@ _notmuch_database_ensure_writable (notmuch_database_t *notmuch) return NOTMUCH_STATUS_SUCCESS; } +/* Allocate a revision number for the next change. */ +unsigned