[PATCH 5/5] lib: Add "lastmod:" queries for filtering by last modification

2015-08-14 Thread David Bremner
From: Austin Clements 

The implementation is essentially the same as the date range search
prior to Jani's fancy date parser.
---
 doc/man7/notmuch-search-terms.rst |  8 
 lib/database-private.h|  1 +
 lib/database.cc   |  4 
 test/T570-revision-tracking.sh| 17 +
 4 files changed, 30 insertions(+)

diff --git a/doc/man7/notmuch-search-terms.rst 
b/doc/man7/notmuch-search-terms.rst
index 1d27ac1..e71a525 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -54,6 +54,8 @@ indicate user-supplied values):

 -  date:..

+-  lastmod:..
+
 The **from:** prefix is used to match the name or address of the sender
 of an email message.

@@ -124,6 +126,12 @@ The time range can also be specified using timestamps with 
a syntax of:
 Each timestamp is a number representing the number of seconds since
 1970-01-01 00:00:00 UTC.

+The **lastmod:** prefix can be used to restrict the result by the
+database revision number of when messages were last modified (tags
+were added/removed or filenames changed).  This is usually used in
+conjunction with the **--uuid** argument to **notmuch search**
+to find messages that have changed since an earlier query.
+
 Operators
 -

diff --git a/lib/database-private.h b/lib/database-private.h
index 4e93257..3fb10f7 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -176,6 +176,7 @@ struct _notmuch_database {
 Xapian::TermGenerator *term_gen;
 Xapian::ValueRangeProcessor *value_range_processor;
 Xapian::ValueRangeProcessor *date_range_processor;
+Xapian::ValueRangeProcessor *last_mod_range_processor;
 };

 /* Prior to database version 3, features were implied by the database
diff --git a/lib/database.cc b/lib/database.cc
index fc78769..bab3334 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1000,6 +1000,7 @@ notmuch_database_open_verbose (const char *path,
notmuch->term_gen->set_stemmer (Xapian::Stem ("english"));
notmuch->value_range_processor = new Xapian::NumberValueRangeProcessor 
(NOTMUCH_VALUE_TIMESTAMP);
notmuch->date_range_processor = new ParseTimeValueRangeProcessor 
(NOTMUCH_VALUE_TIMESTAMP);
+   notmuch->last_mod_range_processor = new 
Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, "lastmod:");

notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
notmuch->query_parser->set_database (*notmuch->xapian_db);
@@ -1007,6 +1008,7 @@ notmuch_database_open_verbose (const char *path,
notmuch->query_parser->set_stemming_strategy 
(Xapian::QueryParser::STEM_SOME);
notmuch->query_parser->add_valuerangeprocessor 
(notmuch->value_range_processor);
notmuch->query_parser->add_valuerangeprocessor 
(notmuch->date_range_processor);
+   notmuch->query_parser->add_valuerangeprocessor 
(notmuch->last_mod_range_processor);

for (i = 0; i < ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
prefix_t *prefix = _PREFIX_EXTERNAL[i];
@@ -1085,6 +1087,8 @@ notmuch_database_close (notmuch_database_t *notmuch)
 notmuch->value_range_processor = NULL;
 delete notmuch->date_range_processor;
 notmuch->date_range_processor = NULL;
+delete notmuch->last_mod_range_processor;
+notmuch->last_mod_range_processor = NULL;

 return status;
 }
diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh
index 20b44cb..0936011 100755
--- a/test/T570-revision-tracking.sh
+++ b/test/T570-revision-tracking.sh
@@ -73,4 +73,21 @@ test_expect_success 'tag succeeds with correct uuid' \
 test_expect_code 1 'tag fails with incorrect uuid' \
 "notmuch tag --uuid=this-is-no-uuid '*' +test2"

+test_begin_subtest 'lastmod:0.. matches everything'
+total=$(notmuch count '*')
+modtotal=$(notmuch count lastmod:0..)
+test_expect_equal "$total" "$modtotal"
+
+test_begin_subtest 'lastmod:100.. matches nothing'
+modtotal=$(notmuch count lastmod:100..)
+test_expect_equal 0 "$modtotal"
+
+test_begin_subtest 'exclude one message using lastmod'
+lastmod=$(notmuch count --lastmod '*' | cut -f3)
+total=$(notmuch count '*')
+notmuch tag +4EFC743A.3060609 at april.org id:4EFC743A.3060609 at april.org
+subtotal=$(notmuch count lastmod:..$lastmod)
+result=$(($subtotal == $total-1))
+test_expect_equal 1 "$result"
+
 test_done
-- 
2.5.0



[PATCH 4/5] cli: add global option "--uuid"

2015-08-14 Thread David Bremner
The function notmuch_exit_if_unmatched_db_uuid is split from
notmuch_process_shared_options because it needs an open notmuch
database.

There are two exceptional cases in uuid handling.

1) notmuch config and notmuch setup don't currently open the database,
   so it doesn't make sense to check the UUID.

2) notmuch compact opens the database inside the library, so we either
   need to open the database just to check uuid, or change the API.
---
 doc/man1/notmuch.rst   | 11 +--
 notmuch-client.h   |  4 
 notmuch-compact.c  |  5 +
 notmuch-config.c   |  4 
 notmuch-count.c|  2 ++
 notmuch-dump.c |  2 ++
 notmuch-insert.c   |  2 ++
 notmuch-new.c  |  3 ++-
 notmuch-reply.c|  2 ++
 notmuch-restore.c  |  2 ++
 notmuch-search.c   |  2 ++
 notmuch-setup.c|  4 
 notmuch-show.c |  2 ++
 notmuch-tag.c  |  2 ++
 notmuch.c  | 18 ++
 test/T570-revision-tracking.sh | 27 +++
 test/random-corpus.c   |  2 ++
 17 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst
index 0401c91..3acfbdb 100644
--- a/doc/man1/notmuch.rst
+++ b/doc/man1/notmuch.rst
@@ -51,9 +51,16 @@ Supported global options for ``notmuch`` include
Specify the configuration file to use. This overrides any
configuration file specified by ${NOTMUCH\_CONFIG}.

+``--uuid=HEX``
+   Enforce that the database UUID (a unique identifier which
+   persists until e.g. the database is compacted)
+   is HEX; exit with an error if it is not. This is useful to
+   detect rollover in modification counts on messages. You can
+   find this UUID using e.g. ``notmuch count --lastmod``
+
 All global options except ``--config`` can also be specified after the
-command. For example, ``notmuch subcommand --version`` is equivalent to
-``notmuch --version subcommand``.
+command. For example, ``notmuch subcommand --uuid=HEX`` is
+equivalent to ``notmuch --uuid=HEX subcommand``.

 COMMANDS
 
diff --git a/notmuch-client.h b/notmuch-client.h
index 78680aa..4a4f86c 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -466,7 +466,11 @@ notmuch_database_dump (notmuch_database_t *notmuch,
   notmuch_bool_t gzip_output);

 #include "command-line-arguments.h"
+
+extern char *notmuch_requested_db_uuid;
 extern const notmuch_opt_desc_t  notmuch_shared_options [];
+void notmuch_exit_if_unmatched_db_uuid (notmuch_database_t *notmuch);
+
 void notmuch_process_shared_options (const char* subcommand_name);
 int notmuch_minimal_options (const char* subcommand_name,
 int argc, char **argv);
diff --git a/notmuch-compact.c b/notmuch-compact.c
index 5be551d..9373721 100644
--- a/notmuch-compact.c
+++ b/notmuch-compact.c
@@ -46,6 +46,11 @@ notmuch_compact_command (notmuch_config_t *config, int argc, 
char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;

+if (notmuch_requested_db_uuid) {
+   fprintf (stderr, "Error: --uuid not implemented for compact\n");
+   return EXIT_FAILURE;
+}
+
 notmuch_process_shared_options (argv[0]);

 if (! quiet)
diff --git a/notmuch-config.c b/notmuch-config.c
index 9348278..d252bb2 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -878,6 +878,10 @@ notmuch_config_command (notmuch_config_t *config, int 
argc, char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;

+if (notmuch_requested_db_uuid)
+   fprintf (stderr, "Warning: ignoring --uuid=%s\n",
+notmuch_requested_db_uuid);
+
 /* skip at least subcommand argument */
 argc-= opt_index;
 argv+= opt_index;
diff --git a/notmuch-count.c b/notmuch-count.c
index 182710a..f26e726 100644
--- a/notmuch-count.c
+++ b/notmuch-count.c
@@ -189,6 +189,8 @@ notmuch_count_command (notmuch_config_t *config, int argc, 
char *argv[])
   NOTMUCH_DATABASE_MODE_READ_ONLY, ))
return EXIT_FAILURE;

+notmuch_exit_if_unmatched_db_uuid (notmuch);
+
 query_str = query_string_from_args (config, argc-opt_index, 
argv+opt_index);
 if (query_str == NULL) {
fprintf (stderr, "Out of memory.\n");
diff --git a/notmuch-dump.c b/notmuch-dump.c
index fab22bd..24fc2f2 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -215,6 +215,8 @@ notmuch_dump_command (notmuch_config_t *config, int argc, 
char *argv[])
   NOTMUCH_DATABASE_MODE_READ_WRITE, ))
return EXIT_FAILURE;

+notmuch_exit_if_unmatched_db_uuid (notmuch);
+
 char *output_file_name = NULL;
 int opt_index;

diff --git a/notmuch-insert.c b/notmuch-insert.c
index c277d62..5205c17 100644
--- a/notmuch-insert.c
+++ b/notmuch-insert.c
@@ -536,6 +536,8 @@ notmuch_insert_command 

[PATCH 3/5] cli/count: add --lastmod

2015-08-14 Thread David Bremner
In the short term we need a way to get lastmod information e.g. for
the test suite. In the long term we probably want to add lastmod
information to at least the structured output for several other
clients (e.g. show, search).
---
 doc/man1/notmuch-count.rst |  5 +
 notmuch-count.c| 32 +++-
 test/T570-revision-tracking.sh | 12 
 3 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/doc/man1/notmuch-count.rst b/doc/man1/notmuch-count.rst
index ca78c18..99de13a 100644
--- a/doc/man1/notmuch-count.rst
+++ b/doc/man1/notmuch-count.rst
@@ -47,6 +47,11 @@ Supported options for **count** include
 (or threads) in the database will be output. This option is not
 compatible with specifying search terms on the command line.

+``--lastmod``
+   Append lastmod (counter for number of database updates) and UUID
+   to the output. lastmod values are only comparable between databases
+   with the same UUID.
+
 ``--input=``\ 
 Read input from given file, instead of from stdin. Implies
 ``--batch``.
diff --git a/notmuch-count.c b/notmuch-count.c
index 57a88a8..182710a 100644
--- a/notmuch-count.c
+++ b/notmuch-count.c
@@ -25,6 +25,7 @@ enum {
 OUTPUT_THREADS,
 OUTPUT_MESSAGES,
 OUTPUT_FILES,
+OUTPUT_LASTMOD,
 };

 /* The following is to allow future options to be added more easily */
@@ -67,10 +68,13 @@ count_files (notmuch_query_t *query)

 static int
 print_count (notmuch_database_t *notmuch, const char *query_str,
-const char **exclude_tags, size_t exclude_tags_length, int output)
+const char **exclude_tags, size_t exclude_tags_length, int output, 
int print_lastmod)
 {
 notmuch_query_t *query;
 size_t i;
+unsigned long revision;
+const char *uuid;
+int ret = 0;

 query = notmuch_query_create (notmuch, query_str);
 if (query == NULL) {
@@ -83,24 +87,31 @@ print_count (notmuch_database_t *notmuch, const char 
*query_str,

 switch (output) {
 case OUTPUT_MESSAGES:
-   printf ("%u\n", notmuch_query_count_messages (query));
+   printf ("%u", notmuch_query_count_messages (query));
break;
 case OUTPUT_THREADS:
-   printf ("%u\n", notmuch_query_count_threads (query));
+   printf ("%u", notmuch_query_count_threads (query));
break;
 case OUTPUT_FILES:
-   printf ("%u\n", count_files (query));
+   printf ("%u", count_files (query));
break;
 }

+if (print_lastmod) {
+   revision = notmuch_database_get_revision (notmuch, );
+   printf ("\t%s\t%lu\n", uuid, revision);
+} else {
+   fputs ("\n", stdout);
+}
+
 notmuch_query_destroy (query);

-return 0;
+return ret;
 }

 static int
 count_file (notmuch_database_t *notmuch, FILE *input, const char 
**exclude_tags,
-   size_t exclude_tags_length, int output)
+   size_t exclude_tags_length, int output, int print_lastmod)
 {
 char *line = NULL;
 ssize_t line_len;
@@ -110,7 +121,7 @@ count_file (notmuch_database_t *notmuch, FILE *input, const 
char **exclude_tags,
 while (!ret && (line_len = getline (, _size, input)) != -1) {
chomp_newline (line);
ret = print_count (notmuch, line, exclude_tags, exclude_tags_length,
-  output);
+  output, print_lastmod);
 }

 if (line)
@@ -130,6 +141,7 @@ notmuch_count_command (notmuch_config_t *config, int argc, 
char *argv[])
 const char **search_exclude_tags = NULL;
 size_t search_exclude_tags_length = 0;
 notmuch_bool_t batch = FALSE;
+notmuch_bool_t print_lastmod = FALSE;
 FILE *input = stdin;
 char *input_file_name = NULL;
 int ret;
@@ -139,11 +151,13 @@ notmuch_count_command (notmuch_config_t *config, int 
argc, char *argv[])
  (notmuch_keyword_t []){ { "threads", OUTPUT_THREADS },
  { "messages", OUTPUT_MESSAGES },
  { "files", OUTPUT_FILES },
+ { "modifications", OUTPUT_LASTMOD },
  { 0, 0 } } },
{ NOTMUCH_OPT_KEYWORD, , "exclude", 'x',
  (notmuch_keyword_t []){ { "true", EXCLUDE_TRUE },
  { "false", EXCLUDE_FALSE },
  { 0, 0 } } },
+   { NOTMUCH_OPT_BOOLEAN, _lastmod, "lastmod", 'l', 0 },
{ NOTMUCH_OPT_BOOLEAN, , "batch", 0, 0 },
{ NOTMUCH_OPT_STRING, _file_name, "input", 'i', 0 },
{ NOTMUCH_OPT_INHERIT, (void *) _shared_options, NULL, 0, 0 },
@@ -188,10 +202,10 @@ notmuch_count_command (notmuch_config_t *config, int 
argc, char *argv[])

 if (batch)
ret = count_file (notmuch, input, search_exclude_tags,
- search_exclude_tags_length, output);
+ search_exclude_tags_length, output, print_lastmod);
 else
ret = print_count 

[PATCH 2/5] lib: API to retrieve database revision and UUID

2015-08-14 Thread David Bremner
From: Austin Clements 

This exposes the committed database revision to library users along
with a UUID that can be used to detect when revision numbers are no
longer comparable (e.g., because the database has been replaced).
---
 lib/database-private.h |  1 +
 lib/database.cc| 11 +++
 lib/notmuch.h  | 18 ++
 test/T570-revision-tracking.sh | 37 +
 test/test-lib.sh   |  5 +
 5 files changed, 72 insertions(+)
 create mode 100755 test/T570-revision-tracking.sh

diff --git a/lib/database-private.h b/lib/database-private.h
index 5c5a2bb..4e93257 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -170,6 +170,7 @@ struct _notmuch_database {
  * under a higher revision number, which can be generated with
  * notmuch_database_new_revision. */
 unsigned long revision;
+const char *uuid;

 Xapian::QueryParser *query_parser;
 Xapian::TermGenerator *term_gen;
diff --git a/lib/database.cc b/lib/database.cc
index 52e2e8f..fc78769 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -992,6 +992,8 @@ notmuch_database_open_verbose (const char *path,
notmuch->revision = 0;
else
notmuch->revision = Xapian::sortable_unserialise (last_mod);
+   notmuch->uuid = talloc_strdup (
+   notmuch, notmuch->xapian_db->get_uuid ().c_str ());

notmuch->query_parser = new Xapian::QueryParser;
notmuch->term_gen = new Xapian::TermGenerator;
@@ -1666,6 +1668,15 @@ DONE:
 return NOTMUCH_STATUS_SUCCESS;
 }

+unsigned long
+notmuch_database_get_revision (notmuch_database_t *notmuch,
+   const char **uuid)
+{
+if (uuid)
+   *uuid = notmuch->uuid;
+return notmuch->revision;
+}
+
 /* We allow the user to use arbitrarily long paths for directories. But
  * we have a term-length limit. So if we exceed that, we'll use the
  * SHA-1 of the path for the database term.
diff --git a/lib/notmuch.h b/lib/notmuch.h
index b1f5bfa..8639b38 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -468,6 +468,24 @@ notmuch_status_t
 notmuch_database_end_atomic (notmuch_database_t *notmuch);

 /**
+ * Return the committed database revision and UUID.
+ *
+ * The database revision number increases monotonically with each
+ * commit to the database.  Hence, all messages and message changes
+ * committed to the database (that is, visible to readers) have a last
+ * modification revision <= the committed database revision.  Any
+ * messages committed in the future will be assigned a modification
+ * revision > the committed database revision.
+ *
+ * The UUID is a NUL-terminated opaque string that uniquely identifies
+ * this database.  Two revision numbers are only comparable if they
+ * have the same database UUID.
+ */
+unsigned long
+notmuch_database_get_revision (notmuch_database_t *notmuch,
+   const char **uuid);
+
+/**
  * Retrieve a directory object from the database for 'path'.
  *
  * Here, 'path' should be a path relative to the path of 'database'
diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh
new file mode 100755
index 000..e0a5703
--- /dev/null
+++ b/test/T570-revision-tracking.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+test_description="database revision tracking"
+
+. ./test-lib.sh || exit 1
+
+add_email_corpus
+
+test_begin_subtest "notmuch_database_get_revision"
+test_C ${MAIL_DIR} <<'EOF'
+#include 
+#include 
+#include 
+int main (int argc, char** argv)
+{
+   notmuch_database_t *db;
+   notmuch_status_t stat;
+   unsigned long revision;
+   const char *uuid;
+
+   unsigned long rev;
+
+   stat = notmuch_database_open (argv[1], NOTMUCH_DATABASE_MODE_READ_ONLY, 
);
+   if (stat)
+   fputs ("open failed\n", stderr);
+   revision = notmuch_database_get_revision (db, );
+   printf("%s\t%lu\n", uuid, revision);
+}
+EOF
+notmuch_uuid_sanitize < OUTPUT > CLEAN
+cat <<'EOF' >EXPECTED
+== stdout ==
+UUID   53
+== stderr ==
+EOF
+test_expect_equal_file EXPECTED CLEAN
+
+test_done
diff --git a/test/test-lib.sh b/test/test-lib.sh
index 0bf7163..126911f 100644
--- a/test/test-lib.sh
+++ b/test/test-lib.sh
@@ -720,6 +720,11 @@ notmuch_date_sanitize ()
 sed \
-e 's/^Date: Fri, 05 Jan 2001 .*/Date: GENERATED_DATE/'
 }
+
+notmuch_uuid_sanitize ()
+{
+sed 
's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g'
+}
 # End of notmuch helper functions

 # Use test_set_prereq to tell that a particular prerequisite is available.
-- 
2.5.0



[PATCH 1/5] lib: Add per-message last modification tracking

2015-08-14 Thread David Bremner
From: Austin Clements 

This adds a new document value that stores the revision of the last
modification to message metadata, where the revision number increases
monotonically with each database commit.

An alternative would be to store the wall-clock time of the last
modification of each message.  In principle this is simpler and has
the advantage that any process can determine the current timestamp
without support from libnotmuch.  However, even assuming a computer's
clock never goes backward and ignoring clock skew in networked
environments, this has a fatal flaw.  Xapian uses (optimistic)
snapshot isolation, which means reads can be concurrent with writes.
Given this, consider the following time line with a write and two read
transactions:

   write  |-X-A--|
   read 1   |---B---|
   read 2  |---|

The write transaction modifies message X and records the wall-clock
time of the modification at A.  The writer hangs around for a while
and later commits its change.  Read 1 is concurrent with the write, so
it doesn't see the change to X.  It does some query and records the
wall-clock time of its results at B.  Transaction read 2 later starts
after the write commits and queries for changes since wall-clock time
B (say the reads are performing an incremental backup).  Even though
read 1 could not see the change to X, read 2 is told (correctly) that
X has not changed since B, the time of the last read.  In fact, X
changed before wall-clock time A, but the change was not visible until
*after* wall-clock time B, so read 2 misses the change to X.

This is tricky to solve in full-blown snapshot isolation, but because
Xapian serializes writes, we can use a simple, monotonically
increasing database revision number.  Furthermore, maintaining this
revision number requires no more IO than a wall-clock time solution
because Xapian already maintains statistics on the upper (and lower)
bound of each value stream.
---
 lib/database-private.h | 16 +++-
 lib/database.cc| 50 --
 lib/message.cc | 22 ++
 lib/notmuch-private.h  | 10 +-
 4 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 24243db..5c5a2bb 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -100,6 +100,12 @@ enum _notmuch_features {
  *
  * Introduced: version 3. */
 NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1 << 5,
+
+/* If set, messages store the revision number of the last
+ * modification in NOTMUCH_VALUE_LAST_MOD.
+ *
+ * Introduced: version 3. */
+NOTMUCH_FEATURE_LAST_MOD = 1 << 6,
 };

 /* In C++, a named enum is its own type, so define bitwise operators
@@ -145,6 +151,8 @@ struct _notmuch_database {

 notmuch_database_mode_t mode;
 int atomic_nesting;
+/* TRUE if changes have been made in this atomic section */
+notmuch_bool_t atomic_dirty;
 Xapian::Database *xapian_db;

 /* Bit mask of features used by this database.  This is a
@@ -158,6 +166,11 @@ struct _notmuch_database {
  * next library call. May be NULL */
 char *status_string;

+/* Highest committed revision number.  Modifications are recorded
+ * under a higher revision number, which can be generated with
+ * notmuch_database_new_revision. */
+unsigned long revision;
+
 Xapian::QueryParser *query_parser;
 Xapian::TermGenerator *term_gen;
 Xapian::ValueRangeProcessor *value_range_processor;
@@ -179,7 +192,8 @@ struct _notmuch_database {
  * will have it). */
 #define NOTMUCH_FEATURES_CURRENT \
 (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \
- NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS)
+ NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \
+ NOTMUCH_FEATURE_LAST_MOD)

 /* Return the list of terms from the given iterator matching a prefix.
  * The prefix will be stripped from the strings in the returned list.
diff --git a/lib/database.cc b/lib/database.cc
index 6a15174..52e2e8f 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -101,6 +101,9 @@ typedef struct {
  *
  * SUBJECT:The value of the "Subject" header
  *
+ * LAST_MOD:   The revision number as of the last tag or
+ * filename change.
+ *
  * In addition, terms from the content of the message are added with
  * "from", "to", "attachment", and "subject" prefixes for use by the
  * user in searching. Similarly, terms from the path of the mail
@@ -310,6 +313,8 @@ static const struct {
  * them. */
 { NOTMUCH_FEATURE_INDEXED_MIMETYPES,
   "indexed MIME types", "w"},
+{ NOTMUCH_FEATURE_LAST_MOD,
+  "modification tracking", "w"},
 };

 const char *
@@ -737,6 +742,23 @@ _notmuch_database_ensure_writable (notmuch_database_t 
*notmuch)
 return NOTMUCH_STATUS_SUCCESS;
 }

+/* Allocate a revision number for the next 

revision tracking patches round 4

2015-08-14 Thread David Bremner
This obsoletes

 id:1439112285-6681-1-git-send-email-david at tethera.net

The main differences since that series are in the command line syntax
for notmuch-count.

We also now consider passing --uuid to notmuch-compact an error, since
the user might reasonably expect that to do something sensible, but we
don't because of the existing API.



revision tracking patches round 4

2015-08-14 Thread David Bremner
This obsoletes

 id:1439112285-6681-1-git-send-email-da...@tethera.net

The main differences since that series are in the command line syntax
for notmuch-count.

We also now consider passing --uuid to notmuch-compact an error, since
the user might reasonably expect that to do something sensible, but we
don't because of the existing API.

___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch


[PATCH 4/5] cli: add global option --uuid

2015-08-14 Thread David Bremner
The function notmuch_exit_if_unmatched_db_uuid is split from
notmuch_process_shared_options because it needs an open notmuch
database.

There are two exceptional cases in uuid handling.

1) notmuch config and notmuch setup don't currently open the database,
   so it doesn't make sense to check the UUID.

2) notmuch compact opens the database inside the library, so we either
   need to open the database just to check uuid, or change the API.
---
 doc/man1/notmuch.rst   | 11 +--
 notmuch-client.h   |  4 
 notmuch-compact.c  |  5 +
 notmuch-config.c   |  4 
 notmuch-count.c|  2 ++
 notmuch-dump.c |  2 ++
 notmuch-insert.c   |  2 ++
 notmuch-new.c  |  3 ++-
 notmuch-reply.c|  2 ++
 notmuch-restore.c  |  2 ++
 notmuch-search.c   |  2 ++
 notmuch-setup.c|  4 
 notmuch-show.c |  2 ++
 notmuch-tag.c  |  2 ++
 notmuch.c  | 18 ++
 test/T570-revision-tracking.sh | 27 +++
 test/random-corpus.c   |  2 ++
 17 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/doc/man1/notmuch.rst b/doc/man1/notmuch.rst
index 0401c91..3acfbdb 100644
--- a/doc/man1/notmuch.rst
+++ b/doc/man1/notmuch.rst
@@ -51,9 +51,16 @@ Supported global options for ``notmuch`` include
Specify the configuration file to use. This overrides any
configuration file specified by ${NOTMUCH\_CONFIG}.
 
+``--uuid=HEX``
+   Enforce that the database UUID (a unique identifier which
+   persists until e.g. the database is compacted)
+   is HEX; exit with an error if it is not. This is useful to
+   detect rollover in modification counts on messages. You can
+   find this UUID using e.g. ``notmuch count --lastmod``
+
 All global options except ``--config`` can also be specified after the
-command. For example, ``notmuch subcommand --version`` is equivalent to
-``notmuch --version subcommand``.
+command. For example, ``notmuch subcommand --uuid=HEX`` is
+equivalent to ``notmuch --uuid=HEX subcommand``.
 
 COMMANDS
 
diff --git a/notmuch-client.h b/notmuch-client.h
index 78680aa..4a4f86c 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -466,7 +466,11 @@ notmuch_database_dump (notmuch_database_t *notmuch,
   notmuch_bool_t gzip_output);
 
 #include command-line-arguments.h
+
+extern char *notmuch_requested_db_uuid;
 extern const notmuch_opt_desc_t  notmuch_shared_options [];
+void notmuch_exit_if_unmatched_db_uuid (notmuch_database_t *notmuch);
+
 void notmuch_process_shared_options (const char* subcommand_name);
 int notmuch_minimal_options (const char* subcommand_name,
 int argc, char **argv);
diff --git a/notmuch-compact.c b/notmuch-compact.c
index 5be551d..9373721 100644
--- a/notmuch-compact.c
+++ b/notmuch-compact.c
@@ -46,6 +46,11 @@ notmuch_compact_command (notmuch_config_t *config, int argc, 
char *argv[])
 if (opt_index  0)
return EXIT_FAILURE;
 
+if (notmuch_requested_db_uuid) {
+   fprintf (stderr, Error: --uuid not implemented for compact\n);
+   return EXIT_FAILURE;
+}
+
 notmuch_process_shared_options (argv[0]);
 
 if (! quiet)
diff --git a/notmuch-config.c b/notmuch-config.c
index 9348278..d252bb2 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -878,6 +878,10 @@ notmuch_config_command (notmuch_config_t *config, int 
argc, char *argv[])
 if (opt_index  0)
return EXIT_FAILURE;
 
+if (notmuch_requested_db_uuid)
+   fprintf (stderr, Warning: ignoring --uuid=%s\n,
+notmuch_requested_db_uuid);
+
 /* skip at least subcommand argument */
 argc-= opt_index;
 argv+= opt_index;
diff --git a/notmuch-count.c b/notmuch-count.c
index 182710a..f26e726 100644
--- a/notmuch-count.c
+++ b/notmuch-count.c
@@ -189,6 +189,8 @@ notmuch_count_command (notmuch_config_t *config, int argc, 
char *argv[])
   NOTMUCH_DATABASE_MODE_READ_ONLY, notmuch))
return EXIT_FAILURE;
 
+notmuch_exit_if_unmatched_db_uuid (notmuch);
+
 query_str = query_string_from_args (config, argc-opt_index, 
argv+opt_index);
 if (query_str == NULL) {
fprintf (stderr, Out of memory.\n);
diff --git a/notmuch-dump.c b/notmuch-dump.c
index fab22bd..24fc2f2 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -215,6 +215,8 @@ notmuch_dump_command (notmuch_config_t *config, int argc, 
char *argv[])
   NOTMUCH_DATABASE_MODE_READ_WRITE, notmuch))
return EXIT_FAILURE;
 
+notmuch_exit_if_unmatched_db_uuid (notmuch);
+
 char *output_file_name = NULL;
 int opt_index;
 
diff --git a/notmuch-insert.c b/notmuch-insert.c
index c277d62..5205c17 100644
--- a/notmuch-insert.c
+++ b/notmuch-insert.c
@@ -536,6 +536,8 @@ notmuch_insert_command 

[PATCH 2/5] lib: API to retrieve database revision and UUID

2015-08-14 Thread David Bremner
From: Austin Clements acleme...@csail.mit.edu

This exposes the committed database revision to library users along
with a UUID that can be used to detect when revision numbers are no
longer comparable (e.g., because the database has been replaced).
---
 lib/database-private.h |  1 +
 lib/database.cc| 11 +++
 lib/notmuch.h  | 18 ++
 test/T570-revision-tracking.sh | 37 +
 test/test-lib.sh   |  5 +
 5 files changed, 72 insertions(+)
 create mode 100755 test/T570-revision-tracking.sh

diff --git a/lib/database-private.h b/lib/database-private.h
index 5c5a2bb..4e93257 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -170,6 +170,7 @@ struct _notmuch_database {
  * under a higher revision number, which can be generated with
  * notmuch_database_new_revision. */
 unsigned long revision;
+const char *uuid;
 
 Xapian::QueryParser *query_parser;
 Xapian::TermGenerator *term_gen;
diff --git a/lib/database.cc b/lib/database.cc
index 52e2e8f..fc78769 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -992,6 +992,8 @@ notmuch_database_open_verbose (const char *path,
notmuch-revision = 0;
else
notmuch-revision = Xapian::sortable_unserialise (last_mod);
+   notmuch-uuid = talloc_strdup (
+   notmuch, notmuch-xapian_db-get_uuid ().c_str ());
 
notmuch-query_parser = new Xapian::QueryParser;
notmuch-term_gen = new Xapian::TermGenerator;
@@ -1666,6 +1668,15 @@ DONE:
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+unsigned long
+notmuch_database_get_revision (notmuch_database_t *notmuch,
+   const char **uuid)
+{
+if (uuid)
+   *uuid = notmuch-uuid;
+return notmuch-revision;
+}
+
 /* We allow the user to use arbitrarily long paths for directories. But
  * we have a term-length limit. So if we exceed that, we'll use the
  * SHA-1 of the path for the database term.
diff --git a/lib/notmuch.h b/lib/notmuch.h
index b1f5bfa..8639b38 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -468,6 +468,24 @@ notmuch_status_t
 notmuch_database_end_atomic (notmuch_database_t *notmuch);
 
 /**
+ * Return the committed database revision and UUID.
+ *
+ * The database revision number increases monotonically with each
+ * commit to the database.  Hence, all messages and message changes
+ * committed to the database (that is, visible to readers) have a last
+ * modification revision = the committed database revision.  Any
+ * messages committed in the future will be assigned a modification
+ * revision  the committed database revision.
+ *
+ * The UUID is a NUL-terminated opaque string that uniquely identifies
+ * this database.  Two revision numbers are only comparable if they
+ * have the same database UUID.
+ */
+unsigned long
+notmuch_database_get_revision (notmuch_database_t *notmuch,
+   const char **uuid);
+
+/**
  * Retrieve a directory object from the database for 'path'.
  *
  * Here, 'path' should be a path relative to the path of 'database'
diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh
new file mode 100755
index 000..e0a5703
--- /dev/null
+++ b/test/T570-revision-tracking.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+test_description=database revision tracking
+
+. ./test-lib.sh || exit 1
+
+add_email_corpus
+
+test_begin_subtest notmuch_database_get_revision
+test_C ${MAIL_DIR} 'EOF'
+#include stdio.h
+#include string.h
+#include notmuch.h
+int main (int argc, char** argv)
+{
+   notmuch_database_t *db;
+   notmuch_status_t stat;
+   unsigned long revision;
+   const char *uuid;
+
+   unsigned long rev;
+
+   stat = notmuch_database_open (argv[1], NOTMUCH_DATABASE_MODE_READ_ONLY, 
db);
+   if (stat)
+   fputs (open failed\n, stderr);
+   revision = notmuch_database_get_revision (db, uuid);
+   printf(%s\t%lu\n, uuid, revision);
+}
+EOF
+notmuch_uuid_sanitize  OUTPUT  CLEAN
+cat 'EOF' EXPECTED
+== stdout ==
+UUID   53
+== stderr ==
+EOF
+test_expect_equal_file EXPECTED CLEAN
+
+test_done
diff --git a/test/test-lib.sh b/test/test-lib.sh
index 0bf7163..126911f 100644
--- a/test/test-lib.sh
+++ b/test/test-lib.sh
@@ -720,6 +720,11 @@ notmuch_date_sanitize ()
 sed \
-e 's/^Date: Fri, 05 Jan 2001 .*/Date: GENERATED_DATE/'
 }
+
+notmuch_uuid_sanitize ()
+{
+sed 
's/[0-9a-f]\{8\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{4\}-[0-9a-f]\{12\}/UUID/g'
+}
 # End of notmuch helper functions
 
 # Use test_set_prereq to tell that a particular prerequisite is available.
-- 
2.5.0

___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch


[PATCH 5/5] lib: Add lastmod: queries for filtering by last modification

2015-08-14 Thread David Bremner
From: Austin Clements amdra...@mit.edu

The implementation is essentially the same as the date range search
prior to Jani's fancy date parser.
---
 doc/man7/notmuch-search-terms.rst |  8 
 lib/database-private.h|  1 +
 lib/database.cc   |  4 
 test/T570-revision-tracking.sh| 17 +
 4 files changed, 30 insertions(+)

diff --git a/doc/man7/notmuch-search-terms.rst 
b/doc/man7/notmuch-search-terms.rst
index 1d27ac1..e71a525 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -54,6 +54,8 @@ indicate user-supplied values):
 
 -  date:since..until
 
+-  lastmod:since..until
+
 The **from:** prefix is used to match the name or address of the sender
 of an email message.
 
@@ -124,6 +126,12 @@ The time range can also be specified using timestamps with 
a syntax of:
 Each timestamp is a number representing the number of seconds since
 1970-01-01 00:00:00 UTC.
 
+The **lastmod:** prefix can be used to restrict the result by the
+database revision number of when messages were last modified (tags
+were added/removed or filenames changed).  This is usually used in
+conjunction with the **--uuid** argument to **notmuch search**
+to find messages that have changed since an earlier query.
+
 Operators
 -
 
diff --git a/lib/database-private.h b/lib/database-private.h
index 4e93257..3fb10f7 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -176,6 +176,7 @@ struct _notmuch_database {
 Xapian::TermGenerator *term_gen;
 Xapian::ValueRangeProcessor *value_range_processor;
 Xapian::ValueRangeProcessor *date_range_processor;
+Xapian::ValueRangeProcessor *last_mod_range_processor;
 };
 
 /* Prior to database version 3, features were implied by the database
diff --git a/lib/database.cc b/lib/database.cc
index fc78769..bab3334 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -1000,6 +1000,7 @@ notmuch_database_open_verbose (const char *path,
notmuch-term_gen-set_stemmer (Xapian::Stem (english));
notmuch-value_range_processor = new Xapian::NumberValueRangeProcessor 
(NOTMUCH_VALUE_TIMESTAMP);
notmuch-date_range_processor = new ParseTimeValueRangeProcessor 
(NOTMUCH_VALUE_TIMESTAMP);
+   notmuch-last_mod_range_processor = new 
Xapian::NumberValueRangeProcessor (NOTMUCH_VALUE_LAST_MOD, lastmod:);
 
notmuch-query_parser-set_default_op (Xapian::Query::OP_AND);
notmuch-query_parser-set_database (*notmuch-xapian_db);
@@ -1007,6 +1008,7 @@ notmuch_database_open_verbose (const char *path,
notmuch-query_parser-set_stemming_strategy 
(Xapian::QueryParser::STEM_SOME);
notmuch-query_parser-add_valuerangeprocessor 
(notmuch-value_range_processor);
notmuch-query_parser-add_valuerangeprocessor 
(notmuch-date_range_processor);
+   notmuch-query_parser-add_valuerangeprocessor 
(notmuch-last_mod_range_processor);
 
for (i = 0; i  ARRAY_SIZE (BOOLEAN_PREFIX_EXTERNAL); i++) {
prefix_t *prefix = BOOLEAN_PREFIX_EXTERNAL[i];
@@ -1085,6 +1087,8 @@ notmuch_database_close (notmuch_database_t *notmuch)
 notmuch-value_range_processor = NULL;
 delete notmuch-date_range_processor;
 notmuch-date_range_processor = NULL;
+delete notmuch-last_mod_range_processor;
+notmuch-last_mod_range_processor = NULL;
 
 return status;
 }
diff --git a/test/T570-revision-tracking.sh b/test/T570-revision-tracking.sh
index 20b44cb..0936011 100755
--- a/test/T570-revision-tracking.sh
+++ b/test/T570-revision-tracking.sh
@@ -73,4 +73,21 @@ test_expect_success 'tag succeeds with correct uuid' \
 test_expect_code 1 'tag fails with incorrect uuid' \
 notmuch tag --uuid=this-is-no-uuid '*' +test2
 
+test_begin_subtest 'lastmod:0.. matches everything'
+total=$(notmuch count '*')
+modtotal=$(notmuch count lastmod:0..)
+test_expect_equal $total $modtotal
+
+test_begin_subtest 'lastmod:100.. matches nothing'
+modtotal=$(notmuch count lastmod:100..)
+test_expect_equal 0 $modtotal
+
+test_begin_subtest 'exclude one message using lastmod'
+lastmod=$(notmuch count --lastmod '*' | cut -f3)
+total=$(notmuch count '*')
+notmuch tag +4efc743a.3060...@april.org id:4efc743a.3060...@april.org
+subtotal=$(notmuch count lastmod:..$lastmod)
+result=$(($subtotal == $total-1))
+test_expect_equal 1 $result
+
 test_done
-- 
2.5.0

___
notmuch mailing list
notmuch@notmuchmail.org
http://notmuchmail.org/mailman/listinfo/notmuch


[PATCH 1/5] lib: Add per-message last modification tracking

2015-08-14 Thread David Bremner
From: Austin Clements amdra...@mit.edu

This adds a new document value that stores the revision of the last
modification to message metadata, where the revision number increases
monotonically with each database commit.

An alternative would be to store the wall-clock time of the last
modification of each message.  In principle this is simpler and has
the advantage that any process can determine the current timestamp
without support from libnotmuch.  However, even assuming a computer's
clock never goes backward and ignoring clock skew in networked
environments, this has a fatal flaw.  Xapian uses (optimistic)
snapshot isolation, which means reads can be concurrent with writes.
Given this, consider the following time line with a write and two read
transactions:

   write  |-X-A--|
   read 1   |---B---|
   read 2  |---|

The write transaction modifies message X and records the wall-clock
time of the modification at A.  The writer hangs around for a while
and later commits its change.  Read 1 is concurrent with the write, so
it doesn't see the change to X.  It does some query and records the
wall-clock time of its results at B.  Transaction read 2 later starts
after the write commits and queries for changes since wall-clock time
B (say the reads are performing an incremental backup).  Even though
read 1 could not see the change to X, read 2 is told (correctly) that
X has not changed since B, the time of the last read.  In fact, X
changed before wall-clock time A, but the change was not visible until
*after* wall-clock time B, so read 2 misses the change to X.

This is tricky to solve in full-blown snapshot isolation, but because
Xapian serializes writes, we can use a simple, monotonically
increasing database revision number.  Furthermore, maintaining this
revision number requires no more IO than a wall-clock time solution
because Xapian already maintains statistics on the upper (and lower)
bound of each value stream.
---
 lib/database-private.h | 16 +++-
 lib/database.cc| 50 --
 lib/message.cc | 22 ++
 lib/notmuch-private.h  | 10 +-
 4 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 24243db..5c5a2bb 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -100,6 +100,12 @@ enum _notmuch_features {
  *
  * Introduced: version 3. */
 NOTMUCH_FEATURE_INDEXED_MIMETYPES = 1  5,
+
+/* If set, messages store the revision number of the last
+ * modification in NOTMUCH_VALUE_LAST_MOD.
+ *
+ * Introduced: version 3. */
+NOTMUCH_FEATURE_LAST_MOD = 1  6,
 };
 
 /* In C++, a named enum is its own type, so define bitwise operators
@@ -145,6 +151,8 @@ struct _notmuch_database {
 
 notmuch_database_mode_t mode;
 int atomic_nesting;
+/* TRUE if changes have been made in this atomic section */
+notmuch_bool_t atomic_dirty;
 Xapian::Database *xapian_db;
 
 /* Bit mask of features used by this database.  This is a
@@ -158,6 +166,11 @@ struct _notmuch_database {
  * next library call. May be NULL */
 char *status_string;
 
+/* Highest committed revision number.  Modifications are recorded
+ * under a higher revision number, which can be generated with
+ * notmuch_database_new_revision. */
+unsigned long revision;
+
 Xapian::QueryParser *query_parser;
 Xapian::TermGenerator *term_gen;
 Xapian::ValueRangeProcessor *value_range_processor;
@@ -179,7 +192,8 @@ struct _notmuch_database {
  * will have it). */
 #define NOTMUCH_FEATURES_CURRENT \
 (NOTMUCH_FEATURE_FILE_TERMS | NOTMUCH_FEATURE_DIRECTORY_DOCS | \
- NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS)
+ NOTMUCH_FEATURE_BOOL_FOLDER | NOTMUCH_FEATURE_GHOSTS | \
+ NOTMUCH_FEATURE_LAST_MOD)
 
 /* Return the list of terms from the given iterator matching a prefix.
  * The prefix will be stripped from the strings in the returned list.
diff --git a/lib/database.cc b/lib/database.cc
index 6a15174..52e2e8f 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -101,6 +101,9 @@ typedef struct {
  *
  * SUBJECT:The value of the Subject header
  *
+ * LAST_MOD:   The revision number as of the last tag or
+ * filename change.
+ *
  * In addition, terms from the content of the message are added with
  * from, to, attachment, and subject prefixes for use by the
  * user in searching. Similarly, terms from the path of the mail
@@ -310,6 +313,8 @@ static const struct {
  * them. */
 { NOTMUCH_FEATURE_INDEXED_MIMETYPES,
   indexed MIME types, w},
+{ NOTMUCH_FEATURE_LAST_MOD,
+  modification tracking, w},
 };
 
 const char *
@@ -737,6 +742,23 @@ _notmuch_database_ensure_writable (notmuch_database_t 
*notmuch)
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+/* Allocate a revision number for the next change. */
+unsigned