[PATCH 18/36] lib/query: generalize exclude handling to s-expression queries

2021-08-24 Thread David Bremner
In fact most of the code path is in common, only the caching of terms
in the query needs to be added for s-expression queries.
---
 lib/query.cc  | 34 ++---
 test/T081-sexpr-search.sh | 40 +++
 2 files changed, 63 insertions(+), 11 deletions(-)

diff --git a/lib/query.cc b/lib/query.cc
index 435f7229..56f90e1c 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -165,6 +165,19 @@ notmuch_query_create_with_syntax (notmuch_database_t 
*notmuch,
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+static void
+_notmuch_query_cache_terms (notmuch_query_t *query)
+{
+/* Xapian doesn't support skip_to on terms from a query since
+ *  they are unordered, so cache a copy of all terms in
+ *  something searchable.
+ */
+
+for (Xapian::TermIterator t = query->xapian_query.get_terms_begin ();
+t != query->xapian_query.get_terms_end (); ++t)
+   query->terms.insert (*t);
+}
+
 static notmuch_status_t
 _notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
 {
@@ -173,15 +186,7 @@ _notmuch_query_ensure_parsed_xapian (notmuch_query_t 
*query)
query->notmuch->query_parser->
parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
 
-   /* Xapian doesn't support skip_to on terms from a query since
-*  they are unordered, so cache a copy of all terms in
-*  something searchable.
-*/
-
-   for (Xapian::TermIterator t = query->xapian_query.get_terms_begin ();
-t != query->xapian_query.get_terms_end (); ++t)
-   query->terms.insert (*t);
-
+   _notmuch_query_cache_terms (query);
query->parsed = true;
 
 } catch (const Xapian::Error ) {
@@ -203,11 +208,18 @@ _notmuch_query_ensure_parsed_xapian (notmuch_query_t 
*query)
 static notmuch_status_t
 _notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
 {
+notmuch_status_t status;
+
 if (query->parsed)
return NOTMUCH_STATUS_SUCCESS;
 
-return _notmuch_sexp_string_to_xapian_query (query->notmuch, 
query->query_string,
-query->xapian_query);
+status = _notmuch_sexp_string_to_xapian_query (query->notmuch, 
query->query_string,
+  query->xapian_query);
+if (status)
+   return status;
+
+_notmuch_query_cache_terms (query);
+return NOTMUCH_STATUS_SUCCESS;
 }
 
 static notmuch_status_t
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 44cb681f..be243fc0 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -525,4 +525,44 @@ notmuch search: Syntax error in query
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "Search, exclude \"deleted\" messages from search"
+notmuch config set search.exclude_tags deleted
+generate_message '[subject]="Not deleted"'
+not_deleted_id=$gen_msg_id
+generate_message '[subject]="Deleted"'
+notmuch new > /dev/null
+notmuch tag +deleted id:$gen_msg_id
+deleted_id=$gen_msg_id
+output=$(notmuch search --query=sexp '(subject deleted)' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; 
Not deleted (inbox unread)"
+
+test_begin_subtest "Search, exclude \"deleted\" messages from message search 
--exclude=false"
+output=$(notmuch search --query=sexp --exclude=false --output=messages 
'(subject deleted)' | notmuch_search_sanitize)
+test_expect_equal "$output" "id:$not_deleted_id
+id:$deleted_id"
+
+test_begin_subtest "Search, exclude \"deleted\" messages from search, 
overridden"
+notmuch search --query=sexp '(and (subject deleted) (tag deleted))' | 
notmuch_search_sanitize > OUTPUT
+cat < EXPECTED
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; Deleted (deleted inbox 
unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Search, exclude \"deleted\" messages from threads"
+add_message '[subject]="Not deleted reply"' '[in-reply-to]="<$gen_msg_id>"'
+output=$(notmuch search --query=sexp '(subject deleted)' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; 
Not deleted (inbox unread)
+thread:XXX   2001-01-05 [1/2] Notmuch Test Suite; Not deleted reply (deleted 
inbox unread)"
+
+test_begin_subtest "Search, don't exclude \"deleted\" messages when 
--exclude=flag specified"
+output=$(notmuch search --query=sexp --exclude=flag '(subject deleted)' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; 
Not deleted (inbox unread)
+thread:XXX   2001-01-05 [1/2] Notmuch Test Suite; Deleted (deleted inbox 
unread)"
+
+test_begin_subtest "Search, don't exclude \"deleted\" messages from search if 
not configured"
+notmuch config set search.exclude_tags
+output=$(notmuch search --query=sexp '(subject deleted)' | 
notmuch_search_sanitize)
+test_expect_equal "$output" "thread:XXX   2001-01-05 [1/1] 

[PATCH 31/36] lib/parse-sexp: thread environment argument through parser

2021-08-24 Thread David Bremner
No functionality change, just an extra argument carried everywhere.
---
 lib/parse-sexp.cc | 47 +--
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 291480ca..8f7c26c2 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -7,6 +7,11 @@
 /* _sexp is used for file scope symbols to avoid clashing with
  * definitions from sexp.h */
 
+typedef struct {
+const char *name;
+const sexp_t *sx;
+} _sexp_binding_t;
+
 typedef enum {
 SEXP_FLAG_NONE = 0,
 SEXP_FLAG_FIELD= 1 << 0,
@@ -99,12 +104,14 @@ static _sexp_prefix_t prefixes[] =
 
 static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch,
   const _sexp_prefix_t *parent,
+  const _sexp_binding_t *env,
   const sexp_t *sx,
   Xapian::Query );
 
 static notmuch_status_t
 _sexp_combine_query (notmuch_database_t *notmuch,
 const _sexp_prefix_t *parent,
+const _sexp_binding_t *env,
 Xapian::Query::op operation,
 Xapian::Query left,
 const sexp_t *sx,
@@ -121,12 +128,13 @@ _sexp_combine_query (notmuch_database_t *notmuch,
return NOTMUCH_STATUS_SUCCESS;
 }
 
-status = _sexp_to_xapian_query (notmuch, parent, sx, subquery);
+status = _sexp_to_xapian_query (notmuch, parent, env, sx, subquery);
 if (status)
return status;
 
 return _sexp_combine_query (notmuch,
parent,
+   env,
operation,
Xapian::Query (operation, left, subquery),
sx->next, output);
@@ -165,6 +173,7 @@ _sexp_parse_phrase (std::string term_prefix, const char 
*phrase, Xapian::Query &
 static notmuch_status_t
 _sexp_parse_wildcard (notmuch_database_t *notmuch,
  const _sexp_prefix_t *parent,
+ unused(const _sexp_binding_t *env),
  std::string match,
  Xapian::Query )
 {
@@ -201,6 +210,7 @@ _sexp_parse_one_term (notmuch_database_t *notmuch, 
std::string term_prefix, cons
 notmuch_status_t
 _sexp_parse_regex (notmuch_database_t *notmuch,
   const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
+  unused(const _sexp_binding_t *env),
   std::string val, Xapian::Query )
 {
 if (! parent) {
@@ -225,7 +235,7 @@ _sexp_parse_regex (notmuch_database_t *notmuch,
 static notmuch_status_t
 _sexp_expand_query (notmuch_database_t *notmuch,
const _sexp_prefix_t *prefix, const _sexp_prefix_t *parent,
-   const sexp_t *sx, Xapian::Query )
+   unused(const _sexp_binding_t *env), const sexp_t *sx, 
Xapian::Query )
 {
 Xapian::Query subquery;
 notmuch_status_t status;
@@ -236,7 +246,8 @@ _sexp_expand_query (notmuch_database_t *notmuch,
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
 }
 
-status = _sexp_combine_query (notmuch, NULL, prefix->xapian_op, 
prefix->initial, sx, subquery);
+status = _sexp_combine_query (notmuch, NULL, NULL, prefix->xapian_op, 
prefix->initial, sx,
+ subquery);
 if (status)
return status;
 
@@ -272,7 +283,7 @@ _sexp_parse_infix (notmuch_database_t *notmuch, const 
sexp_t *sx, Xapian::Query
 
 static notmuch_status_t
 _sexp_parse_header (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
-   const sexp_t *sx, Xapian::Query )
+   const _sexp_binding_t *env, const sexp_t *sx, Xapian::Query 
)
 {
 _sexp_prefix_t user_prefix;
 
@@ -287,13 +298,13 @@ _sexp_parse_header (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent,
 
 parent = _prefix;
 
-return _sexp_combine_query (notmuch, parent, Xapian::Query::OP_AND, 
Xapian::Query::MatchAll,
+return _sexp_combine_query (notmuch, parent, env, Xapian::Query::OP_AND, 
Xapian::Query::MatchAll,
sx->list->next, output);
 }
 
 static notmuch_status_t
-maybe_saved_squery (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, 
const sexp_t *sx,
-   Xapian::Query )
+maybe_saved_squery (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
+   const _sexp_binding_t *env, const sexp_t *sx, Xapian::Query 
)
 {
 char *key;
 char *expansion = NULL;
@@ -325,7 +336,7 @@ maybe_saved_squery (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent, c
goto DONE;
 }
 
-status =  _sexp_to_xapian_query (notmuch, parent, saved_sexp, output);
+status =  _sexp_to_xapian_query (notmuch, parent, env, saved_sexp, output);
 
   DONE:
 if (local)
@@ -339,8 +350,8 @@ 

[PATCH 02/36] configure: optional library sfsexp

2021-08-24 Thread David Bremner
The configure part is essentially the same as the other checks using
pkg-config. Since the optional inclusion of this feature changes what
options are available to the user, include it in the "built_with"
pseudo-configuration keys.
---
 configure| 26 +-
 lib/built-with.c |  2 ++
 notmuch-config.c |  3 +++
 test/T030-config.sh  |  1 +
 test/T055-path-config.sh |  9 +
 5 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index cfa9c09b..4262d122 100755
--- a/configure
+++ b/configure
@@ -820,6 +820,19 @@ else
 WITH_BASH=0
 fi
 
+printf "Checking for sfsexp... "
+if pkg-config --exists sfsexp; then
+printf "Yes.\n"
+have_sfsexp=1
+sfsexp_cflags=$(pkg-config --cflags sfsexp)
+sfsexp_ldflags=$(pkg-config --libs sfsexp)
+else
+printf "No (will not enable s-expression queries).\n"
+have_sfsexp=0
+sfsexp_cflags=
+sfsexp_ldflags=
+fi
+
 if [ -z "${EMACSLISPDIR-}" ]; then
 EMACSLISPDIR="\$(prefix)/share/emacs/site-lisp"
 fi
@@ -1443,6 +1456,13 @@ HAVE_VALGRIND = ${have_valgrind}
 # And if so, flags needed at compile time for valgrind macros
 VALGRIND_CFLAGS = ${valgrind_cflags}
 
+# Whether the sfsexp library is available
+HAVE_SFSEXP = ${have_sfsexp}
+
+# And if so, flags needed at compile/link time for sfsexp
+SFSEXP_CFLAGS = ${sfsexp_cflags}
+SFSEXP_LDFLAGS = ${sfsexp_ldflags}
+
 # Support for emacs
 WITH_EMACS = ${WITH_EMACS}
 
@@ -1459,6 +1479,7 @@ WITH_ZSH = ${WITH_ZSH}
 COMMON_CONFIGURE_CFLAGS = \\
\$(GMIME_CFLAGS) \$(TALLOC_CFLAGS) \$(ZLIB_CFLAGS)  \\
-DHAVE_VALGRIND=\$(HAVE_VALGRIND) \$(VALGRIND_CFLAGS)   \\
+   -DHAVE_SFSEXP=\$(HAVE_SFSEXP) \$(SFSEXP_CFLAGS) \\
-DHAVE_GETLINE=\$(HAVE_GETLINE) \\
-DWITH_EMACS=\$(WITH_EMACS) \\
-DHAVE_CANONICALIZE_FILE_NAME=\$(HAVE_CANONICALIZE_FILE_NAME) \\
@@ -1475,7 +1496,7 @@ CONFIGURE_CFLAGS = \$(COMMON_CONFIGURE_CFLAGS)
 
 CONFIGURE_CXXFLAGS = \$(COMMON_CONFIGURE_CFLAGS) \$(XAPIAN_CXXFLAGS)
 
-CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) 
\$(XAPIAN_LDFLAGS)
+CONFIGURE_LDFLAGS = \$(GMIME_LDFLAGS) \$(TALLOC_LDFLAGS) \$(ZLIB_LDFLAGS) 
\$(XAPIAN_LDFLAGS) \$(SFSEXP_LDFLAGS)
 EOF
 
 # construct the sh.config
@@ -1524,6 +1545,9 @@ NOTMUCH_HAVE_PYTHON3_CFFI=${have_python3_cffi}
 # Is the python pytest package available?
 NOTMUCH_HAVE_PYTHON3_PYTEST=${have_python3_pytest}
 
+# Is the sfsexp library available?
+NOTMUCH_HAVE_SFSEXP=${have_sfsexp}
+
 # Platform we are run on
 PLATFORM=${platform}
 EOF
diff --git a/lib/built-with.c b/lib/built-with.c
index 0c70010b..89958e12 100644
--- a/lib/built-with.c
+++ b/lib/built-with.c
@@ -32,6 +32,8 @@ notmuch_built_with (const char *name)
return HAVE_XAPIAN_DB_RETRY_LOCK;
 } else if (STRNCMP_LITERAL (name, "session_key") == 0) {
return true;
+} else if (STRNCMP_LITERAL (name, "sexpr_query") == 0) {
+   return HAVE_SFSEXP;
 } else {
return false;
 }
diff --git a/notmuch-config.c b/notmuch-config.c
index 80a207f6..c0c91cc8 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -679,6 +679,9 @@ _notmuch_config_list_built_with ()
 printf ("%sretry_lock=%s\n",
BUILT_WITH_PREFIX,
notmuch_built_with ("retry_lock") ? "true" : "false");
+printf ("%ssexpr_query=%s\n",
+   BUILT_WITH_PREFIX,
+   notmuch_built_with ("sexpr_query") ? "true" : "false");
 }
 
 static int
diff --git a/test/T030-config.sh b/test/T030-config.sh
index 636c6356..3a585d1b 100755
--- a/test/T030-config.sh
+++ b/test/T030-config.sh
@@ -51,6 +51,7 @@ cat < EXPECTED
 built_with.compact=something
 built_with.field_processor=something
 built_with.retry_lock=something
+built_with.sexpr_query=something
 database.autocommit=8000
 database.mail_root=MAIL_DIR
 database.path=MAIL_DIR
diff --git a/test/T055-path-config.sh b/test/T055-path-config.sh
index 27dd209a..ef22e964 100755
--- a/test/T055-path-config.sh
+++ b/test/T055-path-config.sh
@@ -266,7 +266,7 @@ EOF
test_expect_equal "${output}+${output2}" "${value}+"
 
test_begin_subtest "Config list ($config)"
-   notmuch config list | notmuch_dir_sanitize | \
+   notmuch config list | notmuch_config_sanitize | \
sed -e "s/^database.backup_dir=.*$/database.backup_dir/"  \
   -e "s/^database.hook_dir=.*$/database.hook_dir/" \
   -e "s/^database.path=.*$/database.path/"  \
@@ -274,9 +274,10 @@ EOF
   -e 
"s,^database.mail_root=CWD/home/env_points_here,database.mail_root=MAIL_DIR," \
   > OUTPUT
cat < EXPECTED
-built_with.compact=true
-built_with.field_processor=true
-built_with.retry_lock=true
+built_with.compact=something
+built_with.field_processor=something
+built_with.retry_lock=something
+built_with.sexpr_query=something
 database.autocommit=8000
 database.backup_dir
 database.hook_dir
-- 
2.32.0

[PATCH 14/36] lib/parse-sexp: add term prefix backed fields

2021-08-24 Thread David Bremner
We use "boolean" to describe fields that should generate terms
literally without stemming or phrase splitting.  This terminology
might not be ideal but it is already enshrined in
notmuch-search-terms(7).
---
 doc/man7/notmuch-sexp-queries.rst | 18 +-
 lib/parse-sexp.cc | 49 
 test/T081-sexpr-search.sh | 94 +++
 3 files changed, 160 insertions(+), 1 deletion(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index b763876d..6e68fcc3 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -81,6 +81,14 @@ string) into words, ignore punctuation. Phrase splitting is 
applied to
 terms in phrase (probabilistic) fields. Both phrase splitting and
 stemming apply only in phrase fields.
 
+Each term or phrase field has an associated combining operator
+(``and`` or ``or``) used to combine the queries from each element of
+the tail of the list. This is generally ``or`` for those fields where
+a message has one such attribute, and ``and`` otherwise.
+
+Term or phrase fields can contain arbitrarily complex queries made up
+from terms, operators, and modifiers, but not other fields.
+
 .. _field-table:
 
 .. table:: Fields with supported modifiers
@@ -112,7 +120,7 @@ stemming apply only in phrase fields.
   ++---+---+---+---+--+
   |  mimetype  |or |  phrase   |yes|yes|no|
   ++---+---+---+---+--+
-  |path|or |   term|yes|yes|   yes|
+  |path|or |   term|no |yes|   yes|
   ++---+---+---+---+--+
   |  property  |and|   term|yes|yes|   yes|
   ++---+---+---+---+--+
@@ -151,10 +159,18 @@ EXAMPLES
 Match the *phrase* "quick" followed by "fox" in phrase fields (or
 outside a field). Match the literal string in a term field.
 
+``(id 1234@invalid blah@test)``
+Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
+
 ``(subject quick "brown fox")``
 Match messages whose subject contains "quick" (anywhere, stemmed) and
 the phrase "brown fox".
 
+``(to (or b...@example.com mall...@example.org))`` ``(or (to b...@example.com) 
(to mall...@example.org))``
+Match in the "To" or "Cc" headers, "b...@example.com",
+"mall...@example.org", and also "b...@example.com.au" since it
+contains the adjacent triple "bob", "example", "com".
+
 NOTES
 =
 
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 0917f505..26b7e5f1 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -10,8 +10,26 @@
 typedef enum {
 SEXP_FLAG_NONE = 0,
 SEXP_FLAG_FIELD= 1 << 0,
+SEXP_FLAG_BOOLEAN  = 1 << 1,
 } _sexp_flag_t;
 
+/*
+ * define bitwise operators to hide casts */
+
+inline _sexp_flag_t
+operator| (_sexp_flag_t a, _sexp_flag_t b)
+{
+return static_cast<_sexp_flag_t>(
+   static_cast(a) | static_cast(b));
+}
+
+inline _sexp_flag_t
+operator& (_sexp_flag_t a, _sexp_flag_t b)
+{
+return static_cast<_sexp_flag_t>(
+   static_cast(a) & static_cast(b));
+}
+
 typedef struct  {
 const char *name;
 Xapian::Query::op xapian_op;
@@ -23,12 +41,39 @@ static _sexp_prefix_t prefixes[] =
 {
 { "and",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_NONE },
+{ "attachment", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_FIELD },
+{ "body",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_FIELD },
+{ "from",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_FIELD },
+{ "folder", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+{ "id", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+{ "is", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+{ "mid",Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+{ "mimetype",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_FIELD },
 { "not",Xapian::Query::OP_AND_NOT,  
Xapian::Query::MatchAll,
   SEXP_FLAG_NONE },
 { "or", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_NONE },
+{ "path",   Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+{ "property",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_FIELD

[PATCH 16/36] lib/parse-sexp: add '*' as syntactic sugar for '(starts-with "")'

2021-08-24 Thread David Bremner
Users that insist on using a literal '*' as a tag, can continue to do
so by quoting it when searching.
---
 doc/man7/notmuch-sexp-queries.rst | 19 ++--
 lib/parse-sexp.cc |  5 
 test/T081-sexpr-search.sh | 48 +++
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index c83ce3d0..f32bab9c 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -36,8 +36,11 @@ An s-expression query is either an atom, the empty list, or a
 a *field*, *logical operation*, or *modifier*, and 0 or more
 subqueries.
 
-``*`` ``()``
-Match all messages.
+``*``
+   "*" matches any non-empty string in the current field.
+
+``()``
+The empty list matches all messages
 
 *term*
 
@@ -138,6 +141,15 @@ from terms, operators, and modifiers, but not other fields.
 MODIFIERS
 `
 
+*Modifiers* refer to any prefixes (first elements of compound queries)
+that are neither operators nor fields.
+
+``(starts-with`` *subword* ``)``
+Matches any term starting with *subword*.  This applies in either
+phrase or term :any:`fields `, or outside of fields [#not-body]_. 
Note that
+a ``starts-with`` query cannot be part of a phrase. The
+atom ``*`` is a synonym for ``(starts-with "")``.
+
 EXAMPLES
 
 
@@ -181,6 +193,9 @@ EXAMPLES
 "mall...@example.org", and also "b...@example.com.au" since it
 contains the adjacent triple "bob", "example", "com".
 
+``(not (to *))``
+Match messages with an empty or invalid 'To' and 'Cc' field.
+
 NOTES
 =
 
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 692b3849..ffb00148 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -176,6 +176,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+
+   if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
+   return _sexp_parse_wildcard (notmuch, parent, "", output);
+   }
+
if (parent && (parent->flags & SEXP_FLAG_BOOLEAN)) {
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 24c6edd1..df502dc5 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -386,6 +386,46 @@ thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; search 
by to (name) (inbox unr
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "wildcard search for 'is'"
+notmuch search not id:${notag_mid} > EXPECTED
+notmuch search --query=sexp '(is *)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "negated wildcard search for 'is'"
+notmuch search id:${notag_mid} > EXPECTED
+notmuch search --query=sexp '(not (is *))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "wildcard search for 'property'"
+notmuch search property:foo=bar > EXPECTED
+notmuch search --query=sexp '(property *)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "wildcard search for 'tag'"
+notmuch search not id:${notag_mid} > EXPECTED
+notmuch search --query=sexp '(tag *)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "negated wildcard search for 'tag'"
+notmuch search id:${notag_mid} > EXPECTED
+notmuch search --query=sexp '(not (tag *))' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+add_message '[subject]="message with tag \"*\""'
+notmuch tag '+*' id:${gen_msg_id}
+
+test_begin_subtest "search for 'tag' \"*\""
+output=$(notmuch search --query=sexp --output=messages '(tag "*")')
+test_expect_equal "$output" "id:$gen_msg_id"
+
+test_begin_subtest "search for missing / empty to"
+add_message [to]="undisclosed-recipients:"
+notmuch search --query=sexp '(not (to *))' | notmuch_search_sanitize > OUTPUT
+cat < EXPECTED
+thread:XXX   2001-01-05 [1/1] Notmuch Test Suite; search for missing / empty 
to (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest "Unbalanced parens"
 # A code 1 indicates the error was handled (a crash will return e.g. 139).
 test_expect_code 1 "notmuch search --query=sexp '('"
@@ -454,4 +494,12 @@ notmuch search: Syntax error in query
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "wildcard, illegal field"
+notmuch search --query=sexp '(body *)' >OUTPUT 2>&1
+cat < EXPECTED
+notmuch search: Syntax error in query
+'body' does not support wildcard queries
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 20/36] lib/parse-sexp: support regular expressions

2021-08-24 Thread David Bremner
At least to the degree that the Xapian QueryParser based parser
also supports them. Support short alias 'rx' as it seems to make more
complex queries nicer to read.
---
 doc/man7/notmuch-sexp-queries.rst |  8 
 lib/parse-sexp.cc | 54 ++-
 test/T081-sexpr-search.sh | 72 +++
 3 files changed, 124 insertions(+), 10 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index f32bab9c..7eaffe56 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,11 @@ MODIFIERS
 *Modifiers* refer to any prefixes (first elements of compound queries)
 that are neither operators nor fields.
 
+``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
+Interpret *atom* as a POSIX.2 regular expression (see
+:manpage:`regex(7)`). This applies in term fields and a subset 
[#not-phrase]_ of
+phrase fields (see :any:`field-table`).
+
 ``(starts-with`` *subword* ``)``
 Matches any term starting with *subword*.  This applies in either
 phrase or term :any:`fields `, or outside of fields [#not-body]_. 
Note that
@@ -205,6 +210,9 @@ NOTES
 
 .. [#aka-bool] a.k.a. boolean prefixes
 
+.. [#not-phrase] Due to the implemention of phrase fields in Xapian,
+ regex queries could only match individual words.
+
 .. [#not-body] Due the the way ``body`` is implemented in notmuch,
this modifier is not supported in the ``body`` field.
 
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 0192bda9..84914296 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -13,6 +13,8 @@ typedef enum {
 SEXP_FLAG_BOOLEAN  = 1 << 1,
 SEXP_FLAG_SINGLE   = 1 << 2,
 SEXP_FLAG_WILDCARD = 1 << 3,
+SEXP_FLAG_REGEX= 1 << 4,
+SEXP_FLAG_DO_REGEX = 1 << 5,
 } _sexp_flag_t;
 
 /*
@@ -48,15 +50,15 @@ static _sexp_prefix_t prefixes[] =
 { "body",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD },
 { "from",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
 { "folder", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "id", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "is", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "mid",Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "mimetype",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
 { "not",Xapian::Query::OP_AND_NOT,  
Xapian::Query::MatchAll,
@@ -64,17 +66,21 @@ static _sexp_prefix_t prefixes[] =
 { "or", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_NONE },
 { "path",   Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "property",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
+{ "regex",  Xapian::Query::OP_INVALID,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
+{ "rx", Xapian::Query::OP_INVALID,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
 { "starts-with",Xapian::Query::OP_WILDCARD, 
Xapian::Query::MatchAll,
   SEXP_FLAG_SINGLE },
 { "subject",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
 { "tag",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "thread", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | 

[PATCH 05/36] CLI/search+address: support sexpr queries

2021-08-24 Thread David Bremner
Initially support selection of query syntax in two subcommands to
enable testing.
---
 notmuch-search.c | 13 +
 test/T080-search.sh  |  7 +++
 test/T095-address.sh |  7 +++
 3 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/notmuch-search.c b/notmuch-search.c
index 375a247d..39d55bfe 100644
--- a/notmuch-search.c
+++ b/notmuch-search.c
@@ -56,6 +56,7 @@ typedef struct {
 int format_sel;
 sprinter_t *format;
 int exclude;
+int query_syntax;
 notmuch_query_t *query;
 int sort;
 int output;
@@ -719,11 +720,10 @@ _notmuch_search_prepare (search_context_t *ctx, int argc, 
char *argv[])
return EXIT_FAILURE;
 }
 
-ctx->query = notmuch_query_create (ctx->notmuch, query_str);
-if (ctx->query == NULL) {
-   fprintf (stderr, "Out of memory\n");
+if (print_status_database ("notmuch search", ctx->notmuch,
+  notmuch_query_create_with_syntax (ctx->notmuch, 
query_str,
+
ctx->query_syntax, >query)))
return EXIT_FAILURE;
-}
 
 notmuch_query_set_sort (ctx->query, ctx->sort);
 
@@ -769,6 +769,7 @@ static search_context_t search_context = {
 .format_sel = NOTMUCH_FORMAT_TEXT,
 .exclude = NOTMUCH_EXCLUDE_TRUE,
 .sort = NOTMUCH_SORT_NEWEST_FIRST,
+.query_syntax = NOTMUCH_QUERY_SYNTAX_XAPIAN,
 .output = 0,
 .offset = 0,
 .limit = -1, /* unlimited */
@@ -787,6 +788,10 @@ static const notmuch_opt_desc_t common_options[] = {
  { "text", NOTMUCH_FORMAT_TEXT },
  { "text0", NOTMUCH_FORMAT_TEXT0 },
  { 0, 0 } } },
+{ .opt_keyword = _context.query_syntax, .name = "query", .keywords =
+ (notmuch_keyword_t []){ { "infix", NOTMUCH_QUERY_SYNTAX_XAPIAN },
+ { "sexp", NOTMUCH_QUERY_SYNTAX_SEXP },
+ { 0, 0 } } },
 { .opt_int = _format_version, .name = "format-version" },
 { }
 };
diff --git a/test/T080-search.sh b/test/T080-search.sh
index a3f0dead..9bda1eb9 100755
--- a/test/T080-search.sh
+++ b/test/T080-search.sh
@@ -189,4 +189,11 @@ test_begin_subtest "parts do not have adjacent term 
positions"
 output=$(notmuch search id:termpos and '"c x"')
 test_expect_equal "$output" ""
 
+if [[ NOTMUCH_HAVE_SFSEXP = 1 ]]; then
+test_begin_subtest "sexpr query: all messages"
+notmuch search '*' > EXPECTED
+notmuch search --query=sexp '()' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+fi
+
 test_done
diff --git a/test/T095-address.sh b/test/T095-address.sh
index 817be538..8bb3627a 100755
--- a/test/T095-address.sh
+++ b/test/T095-address.sh
@@ -325,4 +325,11 @@ cat  EXPECTED
+notmuch address --query=sexp '()' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+fi
+
 test_done
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 11/36] lib/parse-sexp: support subject field

2021-08-24 Thread David Bremner
The broken tests are because we do not yet handle phrase searches.
---
 doc/man7/notmuch-sexp-queries.rst | 62 +--
 lib/parse-sexp.cc | 19 +-
 test/T081-sexpr-search.sh | 57 
 3 files changed, 133 insertions(+), 5 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index 0304759e..08e97cc3 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -36,9 +36,8 @@ An s-expression query is either an atom, the empty list, or a
 a *field*, *logical operation*, or *modifier*, and 0 or more
 subqueries.
 
-``*``
-``()``
-The empty list matches all messages
+``*`` ``()``
+Match all messages.
 
 *term*
 Match all messages containing *term*, possibly after
@@ -64,6 +63,59 @@ subqueries.
 FIELDS
 ``
 
+*Fields* (also called *prefixes* in notmuch documentation)
+correspond to attributes of mail messages. Some are inherent (and
+immutable) like ``subject``, while others ``tag`` and ``property`` are
+settable by the user.  Each concrete field in
+:any:`the table below `
+is discussed further under "Search prefixes" in
+:any:`notmuch-search-terms(7)`. The row *user* refers to user defined
+fields, described in :any:`notmuch-config(1)`.
+
+.. _field-table:
+
+.. table:: Fields with supported modifiers
+
+  ++---+---+---+---+--+
+  |   field|  combine  |   type|  expand   | wildcard  |  regex   |
+  ++===+===+===+===+==+
+  |   *none*   |and|   |no |yes|no|
+  ++---+---+---+---+--+
+  |   *user*   |and|  phrase   |no |yes|no|
+  ++---+---+---+---+--+
+  | attachment |and|  phrase   |yes|yes|no|
+  ++---+---+---+---+--+
+  |body|and|  phrase   |no |no |no|
+  ++---+---+---+---+--+
+  |date|   |   range   |no |no |no|
+  ++---+---+---+---+--+
+  |   folder   |or |  phrase   |yes|yes|   yes|
+  ++---+---+---+---+--+
+  |from|and|  phrase   |yes|yes|   yes|
+  ++---+---+---+---+--+
+  | id |or |   term|no |yes|   yes|
+  ++---+---+---+---+--+
+  | is |and|   term|yes|yes|   yes|
+  ++---+---+---+---+--+
+  |  lastmod   |   |   range   |no |no |no|
+  ++---+---+---+---+--+
+  |mid |or |   term|no |yes|   yes|
+  ++---+---+---+---+--+
+  |  mimetype  |or |  phrase   |yes|yes|no|
+  ++---+---+---+---+--+
+  |path|or |   term|yes|yes|   yes|
+  ++---+---+---+---+--+
+  |  property  |and|   term|yes|yes|   yes|
+  ++---+---+---+---+--+
+  |  subject   |and|  phrase   |yes|yes|   yes|
+  ++---+---+---+---+--+
+  |tag |and|   term|yes|yes|   yes|
+  ++---+---+---+---+--+
+  |   thread   |or |   term|yes|yes|   yes|
+  ++---+---+---+---+--+
+  | to |and|  phrase   |yes|yes|no|
+  ++---+---+---+---+--+
+
 .. _modifiers:
 
 MODIFIERS
@@ -86,6 +138,10 @@ EXAMPLES
 ``(not Bob Marley)``
 Match messages containing neither "Bob" nor "Marley", nor their stems,
 
+``(subject quick "brown fox")``
+Match messages whose subject contains "quick" (anywhere, stemmed) and
+the phrase "brown fox".
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 0d2c0ba8..25556058 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -8,7 +8,8 @@
  * definitions from sexp.h */
 
 typedef enum {
-SEXP_FLAG_NONE 

[PATCH 22/36] lib/query: factor out _notmuch_query_string_to_xapian_query

2021-08-24 Thread David Bremner
When dealing with recursive queries (i.e. thread:{foo}) it turns out
to be useful just to deal with the underlying Xapian objects, and not
wrap them in notmuch objects.
---
 lib/database-private.h |  7 ++
 lib/query.cc   | 51 --
 2 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index cf4eb94b..7ee8e62d 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -302,11 +302,18 @@ notmuch_status_t
 _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
 
 #if __cplusplus
+/* query.cc */
+notmuch_status_t
+_notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
+  std::string query_string,
+  Xapian::Query ,
+  std::string );
 /* parse-sexp.cc */
 notmuch_status_t
 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char 
*querystr,
  Xapian::Query );
 
+/* regexp-fields.cc */
 notmuch_status_t
 _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, 
std::string field,
  std::string regexp_str,
diff --git a/lib/query.cc b/lib/query.cc
index 57596f48..87ee18fc 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -178,38 +178,55 @@ _notmuch_query_cache_terms (notmuch_query_t *query)
query->terms.insert (*t);
 }
 
-static notmuch_status_t
-_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
+notmuch_status_t
+_notmuch_query_string_to_xapian_query (notmuch_database_t *notmuch,
+  std::string query_string,
+  Xapian::Query ,
+  std::string )
 {
 try {
-   if (strcmp (query->query_string, "") == 0 ||
-   strcmp (query->query_string, "*") == 0) {
-   query->xapian_query = Xapian::Query::MatchAll;
+   if (query_string == "" || query_string == "*") {
+   output = Xapian::Query::MatchAll;
} else {
-   query->xapian_query =
-   query->notmuch->query_parser->
-   parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
-
-   _notmuch_query_cache_terms (query);
+   output =
+   notmuch->query_parser->
+   parse_query (query_string, NOTMUCH_QUERY_PARSER_FLAGS);
}
-   query->parsed = true;
-
 } catch (const Xapian::Error ) {
-   if (! query->notmuch->exception_reported) {
-   _notmuch_database_log (query->notmuch,
+   if (! notmuch->exception_reported) {
+   _notmuch_database_log (notmuch,
   "A Xapian exception occurred parsing query: 
%s\n",
   error.get_msg ().c_str ());
-   _notmuch_database_log_append (query->notmuch,
+   _notmuch_database_log_append (notmuch,
  "Query string was: %s\n",
- query->query_string);
-   query->notmuch->exception_reported = true;
+ query_string.c_str ());
+   notmuch->exception_reported = true;
}
 
+   msg = error.get_msg ();
return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
 }
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+static notmuch_status_t
+_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
+{
+notmuch_status_t status;
+std::string msg; /* ignored */
+
+status =  _notmuch_query_string_to_xapian_query (query->notmuch, 
query->query_string,
+query->xapian_query, msg);
+if (status)
+   return status;
+
+query->parsed = true;
+
+_notmuch_query_cache_terms (query);
+
+return NOTMUCH_STATUS_SUCCESS;
+}
+
 static notmuch_status_t
 _notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
 {
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 24/36] lib/parse-sexp: expand queries

2021-08-24 Thread David Bremner
The code here is just gluing together _notmuch_query_expand with the
existing sexp parser infrastructure.
---
 doc/man7/notmuch-sexp-queries.rst | 20 +++
 lib/parse-sexp.cc | 56 +--
 test/T081-sexpr-search.sh | 52 
 3 files changed, 118 insertions(+), 10 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index 7eaffe56..fee43cb5 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,11 @@ MODIFIERS
 *Modifiers* refer to any prefixes (first elements of compound queries)
 that are neither operators nor fields.
 
+``(matching`` |q1| |q2| ... |qn| ``)`` ``(of`` |q1| |q2| ... |qn|  ``)``
+Match all messages have the same values of the current field as
+those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
+phrase fields. Most commonly used in the ``thread`` field.
+
 ``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
 Interpret *atom* as a POSIX.2 regular expression (see
 :manpage:`regex(7)`). This applies in term fields and a subset 
[#not-phrase]_ of
@@ -176,6 +181,9 @@ EXAMPLES
 Match the *phrase* "quick" followed by "fox" in phrase fields (or
 outside a field). Match the literal string in a term field.
 
+``(folder (of (id 1234@invalid)))``
+Match any message in the same folder as the one with Message-Id 
"1234@invalid"
+
 ``(id 1234@invalid blah@test)``
 Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
 
@@ -193,6 +201,14 @@ EXAMPLES
 Match messages whose subject contains "quick brown fox", but also
 "brown fox quicksand".
 
+``(thread (of (id 1234@invalid)))``
+Match any message in the same thread as the one with Message-Id 
"1234@invalid"
+
+``(thread (matching (from b...@example.com) (to b...@example.com)))``
+Match any (messages in) a thread containing a message from
+"b...@example.com" and a (possibly distinct) message to "bob at
+example.com")
+
 ``(to (or b...@example.com mall...@example.org))`` ``(or (to b...@example.com) 
(to mall...@example.org))``
 Match in the "To" or "Cc" headers, "b...@example.com",
 "mall...@example.org", and also "b...@example.com.au" since it
@@ -216,6 +232,10 @@ NOTES
 .. [#not-body] Due the the way ``body`` is implemented in notmuch,
this modifier is not supported in the ``body`` field.
 
+.. [#not-path] Due to the way recursive ``path`` queries are implemented
+   in notmuch, this modifier is not supported in the
+   ``path`` field.
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 17401f47..9f6e0b77 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -15,6 +15,8 @@ typedef enum {
 SEXP_FLAG_WILDCARD = 1 << 3,
 SEXP_FLAG_REGEX= 1 << 4,
 SEXP_FLAG_DO_REGEX = 1 << 5,
+SEXP_FLAG_EXPAND   = 1 << 6,
+SEXP_FLAG_DO_EXPAND = 1 << 7,
 } _sexp_flag_t;
 
 /*
@@ -46,29 +48,33 @@ static _sexp_prefix_t prefixes[] =
 { "and",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_NONE },
 { "attachment", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
 { "body",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD },
 { "from",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_REGEX | 
SEXP_FLAG_EXPAND },
 { "folder", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
 { "id", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "is", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
+{ "matching",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_DO_EXPAND },
 { "mid",Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "mimetype",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD | SEXP_FLAG_EXPAND },
 { 

[PATCH 21/36] lib: generate actual Xapian query for "*" and ""

2021-08-24 Thread David Bremner
The previous code had the somewhat bizarre effect that the (notmuch
specific) query string was "*" (interpreted as MatchAll) and the
allegedly parsed xapian_query was "MatchNothing".

This commit also reduces code duplication.
---
 lib/query.cc | 34 ++
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/lib/query.cc b/lib/query.cc
index 56f90e1c..57596f48 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -182,11 +182,16 @@ static notmuch_status_t
 _notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
 {
 try {
-   query->xapian_query =
-   query->notmuch->query_parser->
-   parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
+   if (strcmp (query->query_string, "") == 0 ||
+   strcmp (query->query_string, "*") == 0) {
+   query->xapian_query = Xapian::Query::MatchAll;
+   } else {
+   query->xapian_query =
+   query->notmuch->query_parser->
+   parse_query (query->query_string, NOTMUCH_QUERY_PARSER_FLAGS);
 
-   _notmuch_query_cache_terms (query);
+   _notmuch_query_cache_terms (query);
+   }
query->parsed = true;
 
 } catch (const Xapian::Error ) {
@@ -331,7 +336,6 @@ _notmuch_query_search_documents (notmuch_query_t *query,
 notmuch_messages_t **out)
 {
 notmuch_database_t *notmuch = query->notmuch;
-const char *query_string = query->query_string;
 notmuch_mset_messages_t *messages;
 notmuch_status_t status;
 
@@ -361,13 +365,9 @@ _notmuch_query_search_documents (notmuch_query_t *query,
Xapian::MSet mset;
Xapian::MSetIterator iterator;
 
-   if (strcmp (query_string, "") == 0 ||
-   strcmp (query_string, "*") == 0) {
-   final_query = mail_query;
-   } else {
-   final_query = Xapian::Query (Xapian::Query::OP_AND,
-mail_query, query->xapian_query);
-   }
+   final_query = Xapian::Query (Xapian::Query::OP_AND,
+mail_query, query->xapian_query);
+
messages->base.excluded_doc_ids = NULL;
 
if ((query->omit_excluded != NOTMUCH_EXCLUDE_FALSE) && 
(query->exclude_terms)) {
@@ -688,7 +688,6 @@ notmuch_status_t
 _notmuch_query_count_documents (notmuch_query_t *query, const char *type, 
unsigned *count_out)
 {
 notmuch_database_t *notmuch = query->notmuch;
-const char *query_string = query->query_string;
 Xapian::doccount count = 0;
 notmuch_status_t status;
 
@@ -704,13 +703,8 @@ _notmuch_query_count_documents (notmuch_query_t *query, 
const char *type, unsign
Xapian::Query final_query, exclude_query;
Xapian::MSet mset;
 
-   if (strcmp (query_string, "") == 0 ||
-   strcmp (query_string, "*") == 0) {
-   final_query = mail_query;
-   } else {
-   final_query = Xapian::Query (Xapian::Query::OP_AND,
-mail_query, query->xapian_query);
-   }
+   final_query = Xapian::Query (Xapian::Query::OP_AND,
+mail_query, query->xapian_query);
 
exclude_query = _notmuch_exclude_tags (query);
 
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 10/36] lib/parse-sexp: support and, not, and or.

2021-08-24 Thread David Bremner
All operations and (Xapian) fields will eventually have an entry in
the prefixes table. The flags field is just a placeholder for now, but
will eventually distinguish between various kinds of prefixes.
---
 doc/man7/notmuch-sexp-queries.rst | 16 ---
 lib/parse-sexp.cc | 76 +--
 test/T081-sexpr-search.sh | 31 +++--
 3 files changed, 109 insertions(+), 14 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index d177934d..0304759e 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -51,7 +51,9 @@ subqueries.
 (for most fields) or *or*. See :any:`fields` for more information.
 
 ``(`` *operator* |q1| |q2| ... |qn| ``)``
-Combine queries |q1| to |qn|. See :any:`operators` for more information.
+Combine queries |q1| to |qn|. Currently supported operators are
+``and``, ``or``, and ``not``. ``(not`` |q1| ... |qn| ``)`` is equivalent
+to ``(and (not`` |q1| ``) ... (not`` |qn| ``))``.
 
 ``(`` *modifier* |q1| |q2| ... |qn| ``)``
 Combine queries |q1| to |qn|, and reinterpret the result (e.g. as a 
regular expression).
@@ -62,11 +64,6 @@ subqueries.
 FIELDS
 ``
 
-.. _operators:
-
-OPERATORS
-`
-
 .. _modifiers:
 
 MODIFIERS
@@ -82,6 +79,13 @@ EXAMPLES
 Match all messages containing "added", but also those containing "add", 
"additional",
 "Additional", "adds", etc... via stemming.
 
+``(and Bob Marley)``
+Match messages containing words "Bob" and "Marley", or their stems
+The words need not be adjacent.
+
+``(not Bob Marley)``
+Match messages containing neither "Bob" nor "Marley", nor their stems,
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index f031d790..0d2c0ba8 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -7,12 +7,69 @@
 /* _sexp is used for file scope symbols to avoid clashing with
  * definitions from sexp.h */
 
+typedef enum {
+SEXP_FLAG_NONE = 0,
+} _sexp_flag_t;
+
+typedef struct  {
+const char *name;
+Xapian::Query::op xapian_op;
+Xapian::Query initial;
+_sexp_flag_t flags;
+} _sexp_prefix_t;
+
+static _sexp_prefix_t prefixes[] =
+{
+{ "and",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_NONE },
+{ "not",Xapian::Query::OP_AND_NOT,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_NONE },
+{ "or", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
+  SEXP_FLAG_NONE },
+{ }
+};
+
+static notmuch_status_t _sexp_to_xapian_query (notmuch_database_t *notmuch,
+  const _sexp_prefix_t *parent,
+  const sexp_t *sx,
+  Xapian::Query );
+
+static notmuch_status_t
+_sexp_combine_query (notmuch_database_t *notmuch,
+const _sexp_prefix_t *parent,
+Xapian::Query::op operation,
+Xapian::Query left,
+const sexp_t *sx,
+Xapian::Query )
+{
+Xapian::Query subquery;
+
+notmuch_status_t status;
+
+/* if we run out elements, return accumulator */
+
+if (! sx) {
+   output = left;
+   return NOTMUCH_STATUS_SUCCESS;
+}
+
+status = _sexp_to_xapian_query (notmuch, parent, sx, subquery);
+if (status)
+   return status;
+
+return _sexp_combine_query (notmuch,
+   parent,
+   operation,
+   Xapian::Query (operation, left, subquery),
+   sx->next, output);
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
 
 static notmuch_status_t
-_sexp_to_xapian_query (notmuch_database_t *notmuch, const sexp_t *sx,
+_sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t 
*parent, const sexp_t *sx,
   Xapian::Query )
 {
 
@@ -32,11 +89,20 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
sexp_t *sx,
return NOTMUCH_STATUS_SUCCESS;
 }
 
-if (sx->list->ty == SEXP_VALUE)
-   _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
-else
+if (sx->list->ty == SEXP_LIST) {
_notmuch_database_log (notmuch, "unexpected list in field/operation 
position\n",
   sx->list->val);
+   return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+}
+
+for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
+   if (strcmp (prefix->name, sx->list->val) == 0) {
+   return _sexp_combine_query (notmuch, parent, prefix->xapian_op, 
prefix->initial,
+   sx->list->next, output);
+

[PATCH 23/36] lib/thread-fp: factor out query expansion, rewrite in Xapian

2021-08-24 Thread David Bremner
It will be convenient not to have to construct a notmuch query object
when parsing subqueries, so the commit rewrites the query
expansion (currently only used for thread:{} queries) using only
Xapian. As a bonus it seems about 15% faster in initial experiments.
---
 lib/database-private.h | 16 +-
 lib/parse-sexp.cc  |  2 --
 lib/query.cc   | 48 ++
 lib/thread-fp.cc   | 26 ---
 4 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 7ee8e62d..9ee3b933 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -40,6 +40,10 @@
 
 #include 
 
+#if HAVE_SFSEXP
+#include 
+#endif
+
 /* Bit masks for _notmuch_database::features.  Features are named,
  * independent aspects of the database schema.
  *
@@ -313,11 +317,21 @@ notmuch_status_t
 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char 
*querystr,
  Xapian::Query );
 
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, 
Xapian::Query subquery,
+  Xapian::Query , std::string );
+
 /* regexp-fields.cc */
 notmuch_status_t
 _notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, 
std::string field,
  std::string regexp_str,
  Xapian::Query , std::string );
-#endif
 
+#if HAVE_SFSEXP
+/* parse-sexp.cc */
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char 
*querystr,
+ Xapian::Query );
+#endif
+#endif
 #endif
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 84914296..17401f47 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -219,8 +219,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
   Xapian::Query )
 {
 if (sx->ty == SEXP_VALUE) {
-   std::string term = Xapian::Unicode::tolower (sx->val);
-   Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
 
if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
diff --git a/lib/query.cc b/lib/query.cc
index 87ee18fc..b0937fcc 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -821,3 +821,51 @@ notmuch_query_get_database (const notmuch_query_t *query)
 {
 return query->notmuch;
 }
+
+notmuch_status_t
+_notmuch_query_expand (notmuch_database_t *notmuch, const char *field, 
Xapian::Query subquery,
+  Xapian::Query , std::string )
+{
+std::set terms;
+const std::string term_prefix =  _find_prefix (field);
+
+if (_debug_query ()) {
+   fprintf (stderr, "Expanding subquery:\n%s\n",
+subquery.get_description ().c_str ());
+}
+
+try {
+   Xapian::Enquire enquire (*notmuch->xapian_db);
+   Xapian::MSet mset;
+
+   enquire.set_weighting_scheme (Xapian::BoolWeight ());
+   enquire.set_query (subquery);
+
+   mset = enquire.get_mset (0, notmuch->xapian_db->get_doccount ());
+
+   for (Xapian::MSetIterator iterator = mset.begin (); iterator != 
mset.end (); iterator++) {
+   Xapian::docid doc_id = *iterator;
+   Xapian::Document doc = notmuch->xapian_db->get_document (doc_id);
+   Xapian::TermIterator i = doc.termlist_begin ();
+
+   for (i.skip_to (term_prefix);
+i != doc.termlist_end () && ((*i).rfind (term_prefix, 0) == 
0); i++) {
+   terms.insert (*i);
+   }
+   }
+   output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end 
());
+   if (_debug_query ()) {
+   fprintf (stderr, "Expanded query:\n%s\n",
+subquery.get_description ().c_str ());
+   }
+
+} catch (const Xapian::Error ) {
+   _notmuch_database_log (notmuch,
+  "A Xapian exception occurred expanding query: 
%s\n",
+  error.get_msg ().c_str ());
+   msg = error.get_msg ();
+   return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
+}
+
+return NOTMUCH_STATUS_SUCCESS;
+}
diff --git a/lib/thread-fp.cc b/lib/thread-fp.cc
index 06708ef2..3aa9c423 100644
--- a/lib/thread-fp.cc
+++ b/lib/thread-fp.cc
@@ -34,28 +34,20 @@ ThreadFieldProcessor::operator() (const std::string & str)
if (str.size () <= 1 || str.at (str.size () - 1) != '}') {
throw Xapian::QueryParserError ("missing } in '" + str + "'");
} else {
+   Xapian::Query subquery;
+   Xapian::Query query;
+   std::string msg;
std::string subquery_str = str.substr (1, str.size () - 2);
-   notmuch_query_t *subquery = notmuch_query_create (notmuch, 
subquery_str.c_str ());
-   notmuch_messages_t *messages;
-   std::set terms;
 
-   if (! subquery)
-   throw 

[PATCH 01/36] CLI: make variable n_requested_db_uuid file scope.

2021-08-24 Thread David Bremner
It turns out that now that we pass an open database into the
subcommands, it is easy to check any requested uuid against the
database at the same time as we process the other shared
arguments. This results in overall less boilerplate code, as well as
making a CLI scope function and variable file scope in notmuch.c.
---
 notmuch-client.h |  4 +---
 notmuch-compact.c|  7 +--
 notmuch-config.c |  4 
 notmuch-count.c  |  4 +---
 notmuch-dump.c   |  4 +---
 notmuch-insert.c |  4 +---
 notmuch-new.c|  4 +---
 notmuch-reindex.c|  4 +---
 notmuch-reply.c  |  4 +---
 notmuch-restore.c|  3 +--
 notmuch-search.c |  6 ++
 notmuch-setup.c  |  4 
 notmuch-show.c   |  4 +---
 notmuch-tag.c|  4 +---
 notmuch.c| 24 ++--
 test/random-corpus.c |  3 ++-
 16 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/notmuch-client.h b/notmuch-client.h
index 8643a63f..f820791f 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -485,11 +485,9 @@ print_status_gzbytes (const char *loc,
 
 #include "command-line-arguments.h"
 
-extern const char *notmuch_requested_db_uuid;
 extern const notmuch_opt_desc_t notmuch_shared_options [];
-void notmuch_exit_if_unmatched_db_uuid (notmuch_database_t *notmuch);
 
-void notmuch_process_shared_options (const char *subcommand_name);
+void notmuch_process_shared_options (notmuch_database_t *notmuch, const char 
*subcommand_name);
 int notmuch_minimal_options (const char *subcommand_name,
 int argc, char **argv);
 
diff --git a/notmuch-compact.c b/notmuch-compact.c
index 2648434e..40ffb428 100644
--- a/notmuch-compact.c
+++ b/notmuch-compact.c
@@ -45,12 +45,7 @@ notmuch_compact_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;
 
-if (notmuch_requested_db_uuid) {
-   fprintf (stderr, "Error: --uuid not implemented for compact\n");
-   return EXIT_FAILURE;
-}
-
-notmuch_process_shared_options (argv[0]);
+notmuch_process_shared_options (NULL, argv[0]);
 
 if (! quiet)
printf ("Compacting database...\n");
diff --git a/notmuch-config.c b/notmuch-config.c
index 4de55e5f..80a207f6 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -708,10 +708,6 @@ notmuch_config_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;
 
-if (notmuch_requested_db_uuid)
-   fprintf (stderr, "Warning: ignoring --uuid=%s\n",
-notmuch_requested_db_uuid);
-
 /* skip at least subcommand argument */
 argc -= opt_index;
 argv += opt_index;
diff --git a/notmuch-count.c b/notmuch-count.c
index 5ac4292b..e8c545e3 100644
--- a/notmuch-count.c
+++ b/notmuch-count.c
@@ -182,7 +182,7 @@ notmuch_count_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;
 
-notmuch_process_shared_options (argv[0]);
+notmuch_process_shared_options (notmuch, argv[0]);
 
 if (input_file_name) {
batch = true;
@@ -201,8 +201,6 @@ notmuch_count_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
return EXIT_FAILURE;
 }
 
-notmuch_exit_if_unmatched_db_uuid (notmuch);
-
 query_str = query_string_from_args (notmuch, argc - opt_index, argv + 
opt_index);
 if (query_str == NULL) {
fprintf (stderr, "Out of memory.\n");
diff --git a/notmuch-dump.c b/notmuch-dump.c
index ae89e4da..5c8213be 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -366,8 +366,6 @@ notmuch_dump_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 const char *query_str = NULL;
 int ret;
 
-notmuch_exit_if_unmatched_db_uuid (notmuch);
-
 const char *output_file_name = NULL;
 int opt_index;
 
@@ -394,7 +392,7 @@ notmuch_dump_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;
 
-notmuch_process_shared_options (argv[0]);
+notmuch_process_shared_options (notmuch, argv[0]);
 
 if (include == 0)
include = DUMP_INCLUDE_CONFIG | DUMP_INCLUDE_TAGS | 
DUMP_INCLUDE_PROPERTIES;
diff --git a/notmuch-insert.c b/notmuch-insert.c
index e3d87e4a..72e2e35f 100644
--- a/notmuch-insert.c
+++ b/notmuch-insert.c
@@ -478,7 +478,7 @@ notmuch_insert_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 if (opt_index < 0)
return EXIT_FAILURE;
 
-notmuch_process_shared_options (argv[0]);
+notmuch_process_shared_options (notmuch, argv[0]);
 
 mail_root = notmuch_config_get (notmuch, NOTMUCH_CONFIG_MAIL_ROOT);
 
@@ -550,8 +550,6 @@ notmuch_insert_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
return EXIT_FAILURE;
 }
 
-notmuch_exit_if_unmatched_db_uuid (notmuch);
-
 status = notmuch_process_shared_indexing_options (notmuch);
 if (status != NOTMUCH_STATUS_SUCCESS) {
  

[PATCH 35/36] CLI/tag: enable sexp queries

2021-08-24 Thread David Bremner
We have to rewrite _optimize_tag_query here because it is generating
a query string in the infix Xapian syntax. Luckily this is easy to do
with the sexp query syntax.
---
 notmuch-tag.c| 43 ---
 test/T150-tagging.sh | 43 +++
 2 files changed, 79 insertions(+), 7 deletions(-)

diff --git a/notmuch-tag.c b/notmuch-tag.c
index aa886032..71ff06bf 100644
--- a/notmuch-tag.c
+++ b/notmuch-tag.c
@@ -39,8 +39,8 @@ handle_sigint (unused (int sig))
 
 
 static char *
-_optimize_tag_query (void *ctx, const char *orig_query_string,
-const tag_op_list_t *list)
+_optimize_tag_query_infix (void *ctx, const char *orig_query_string,
+  const tag_op_list_t *list)
 {
 /* This is subtler than it looks.  Xapian ignores the '-' operator
  * at the beginning both queries and parenthesized groups and,
@@ -88,6 +88,33 @@ _optimize_tag_query (void *ctx, const char 
*orig_query_string,
 return query_string;
 }
 
+static char *
+_optimize_tag_query (void *ctx, const char *orig_query_string,
+notmuch_query_syntax_t stx,
+const tag_op_list_t *list)
+{
+char *query_string;
+
+if (stx == NOTMUCH_QUERY_SYNTAX_XAPIAN)
+   return _optimize_tag_query_infix (ctx, orig_query_string, list);
+
+/* Don't optimize if there are no tag changes. */
+if (tag_op_list_size (list) == 0)
+   return talloc_strdup (ctx, orig_query_string);
+
+query_string = talloc_asprintf (ctx, "(and %s", orig_query_string);
+for (size_t i = 0; i < tag_op_list_size (list) && query_string; i++) {
+   query_string = talloc_asprintf_append_buffer (
+   query_string, tag_op_list_isremove (list, i) ? " (tag \"%s\")" : " 
(not (tag \"%s\"))",
+   tag_op_list_tag (list, i));
+}
+
+if (query_string)
+   query_string = talloc_strdup_append_buffer (query_string, ")");
+
+return query_string;
+}
+
 /* Tag messages matching 'query_string' according to 'tag_ops'
  */
 static int
@@ -104,7 +131,9 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const 
char *query_string,
 if (! (flags & TAG_FLAG_REMOVE_ALL)) {
/* Optimize the query so it excludes messages that already
 * have the specified set of tags. */
-   query_string = _optimize_tag_query (ctx, query_string, tag_ops);
+   query_string = _optimize_tag_query (ctx, query_string,
+   shared_option_query_syntax (),
+   tag_ops);
if (query_string == NULL) {
fprintf (stderr, "Out of memory.\n");
return 1;
@@ -112,11 +141,11 @@ tag_query (void *ctx, notmuch_database_t *notmuch, const 
char *query_string,
flags |= TAG_FLAG_PRE_OPTIMIZED;
 }
 
-query = notmuch_query_create (notmuch, query_string);
-if (query == NULL) {
-   fprintf (stderr, "Out of memory.\n");
+status = notmuch_query_create_with_syntax (notmuch, query_string,
+  shared_option_query_syntax (),
+  );
+if (print_status_database ("notmuch tag", notmuch, status))
return 1;
-}
 
 /* tagging is not interested in any special sort order */
 notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED);
diff --git a/test/T150-tagging.sh b/test/T150-tagging.sh
index c292b24e..1a2fd77e 100755
--- a/test/T150-tagging.sh
+++ b/test/T150-tagging.sh
@@ -2,6 +2,21 @@
 test_description='"notmuch tag"'
 . $(dirname "$0")/test-lib.sh || exit 1
 
+test_query_syntax () {
+# use a tag with a space to stress the query string munging code.
+local new_tag="${RANDOM} ${RANDOM}"
+test_begin_subtest "sexpr query: $1"
+backup_database
+notmuch tag --query=sexp "+${new_tag}" -- "$1"
+notmuch dump > OUTPUT
+restore_database
+backup_database
+notmuch tag "+${new_tag}" -- "$2"
+notmuch dump > EXPECTED
+restore_database
+test_expect_equal_file_nonempty EXPECTED OUTPUT
+}
+
 add_message '[subject]=One'
 add_message '[subject]=Two'
 
@@ -310,4 +325,32 @@ output=$(notmuch tag +something '*' 2>&1 | sed 's/: .*$//' 
)
 chmod u+w ${MAIL_DIR}/.notmuch/xapian/*.*
 test_expect_equal "$output" "A Xapian exception occurred opening database"
 
+add_email_corpus
+
+if [ $NOTMUCH_HAVE_SFSEXP -eq 1 ]; then
+
+test_query_syntax '(and "wonderful" "wizard")' 'wonderful and wizard'
+test_query_syntax '(or "php" "wizard")' 'php or wizard'
+test_query_syntax 'wizard' 'wizard'
+test_query_syntax 'Wizard' 'Wizard'
+test_query_syntax '(attachment notmuch-help.patch)' 
'attachment:notmuch-help.patch'
+test_query_syntax '(mimetype text/html)' 'mimetype:text/html'
+
+test_begin_subtest "--batch --query=sexp"
+notmuch dump --format=batch-tag > backup.tags
+notmuch tag --batch --query=sexp  < OUTPUT
+cat < EXPECTED
+

[PATCH 36/36] doc/sexp-queries: update synopsis and description

2021-08-24 Thread David Bremner
I chose to go with a somewhat terse synopsis to try to keep the length
of the page down.
---
 doc/man7/notmuch-sexp-queries.rst | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index 81e3929b..019d15f0 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -7,11 +7,22 @@ notmuch-sexp-queries
 SYNOPSIS
 
 
-**notmuch** **search** ``--query=sexp`` '(and (to santa) (date december))'
+**notmuch** *subcommand* ``--query=sexp`` [option ...]  ``--`` '(and (to 
santa) (date december))'
 
 DESCRIPTION
 ===
 
+Notmuch supports an alternative query syntax based on `S-expressions
+`_ . It can be selected
+with the command line ``--query=sexp`` or with the appropriate option
+to the library function :c:func:`notmuch_query_create_with_syntax`.
+Support for this syntax is currently optional, you can test if your
+build of notmuch supports it with
+
+::
+
+   $ notmuch config get built_with.sexpr_query
+
 
 S-EXPRESSIONS
 -
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 32/36] lib/parse-sexp: apply macros

2021-08-24 Thread David Bremner
Macros implement lazy evaluation and lexical scope.  The former is
needed to make certain natural constructs work sensibly (e.g. (tag
,param)) but the latter is mainly future-proofing in case the DSL is
is extended to allow local bindings.

For technical background, see chapters 6 and 17 of [1] (or some other
intermediate programming languages textbook).

[1] http://cs.brown.edu/courses/cs173/2012/book/
---
 doc/man7/notmuch-sexp-queries.rst |  46 +++
 lib/parse-sexp.cc | 114 ++-
 test/T081-sexpr-search.sh | 126 ++
 3 files changed, 284 insertions(+), 2 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index db3f8837..81e3929b 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -63,6 +63,14 @@ subqueries.
 Combine queries |q1| to |qn|, and reinterpret the result (e.g. as a 
regular expression).
 See :any:`modifiers` for more information.
 
+``(macro (`` |p1| ... |pn| ``) body)``
+Define saved query with parameter substitution. The syntax is
+recognized only in saved s-expression queries (see ``squery.*`` in
+:any:`notmuch-config(1)`). Parameter names in ``body`` must be
+prefixed with ``,`` to be expanded (see :any:`macro_examples`).
+Macros may refer to other macros, but only to their own
+parameters [#macro-details]_.
+
 .. _fields:
 
 FIELDS
@@ -234,9 +242,43 @@ EXAMPLES
 Match messages with a non-empty List-Id header, assuming
 configuration ``index.header.List=List-Id``
 
+.. _macro_examples:
+
+MACRO EXAMPLES
+--
+
+A macro that takes two parameters and applies different fields to them.
+
+::
+
+   $ notmuch config set squery.TagSubject '(macro (tagname subj) (and (tag 
,tagname) (subject ,subj)))'
+   $ notmuch search --query=sexp '(TagSubject inbox maildir)'
+
+Nested macros are allowed.
+
+::
+
+$ notmuch config set squery.Inner '(macro (x) (subject ,x))'
+$ notmuch config set squery.Outer  '(macro (x y) (and (tag ,x) (Inner 
,y)))'
+$ notmuch search --query=sexp '(Outer inbox maildir)'
+
+Parameters can be re-used to reduce boilerplate. Any field, including
+user defined fields is permitted within a macro.
+
+::
+
+$ notmuch config set squery.About '(macro (name) (or (subject ,name) (List 
,name)))'
+$ notmuch search --query=sexp '(About notmuch)'
+
+
 NOTES
 =
 
+.. [#macro-details] Technically macros impliment lazy evaluation and
+lexical scope. There is one top level scope
+containing all macro definitions, but all
+parameter definitions are local to a given macro.
+
 .. [#aka-pref] a.k.a. prefixes
 
 .. [#aka-prob] a.k.a. probabilistic prefixes
@@ -256,3 +298,7 @@ NOTES
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
+
+.. |p1| replace:: :math:`p_1`
+.. |p2| replace:: :math:`p_2`
+.. |pn| replace:: :math:`p_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 8f7c26c2..356c32ea 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -7,9 +7,18 @@
 /* _sexp is used for file scope symbols to avoid clashing with
  * definitions from sexp.h */
 
-typedef struct {
+/* sexp_binding structs attach name to a sexp and a defining
+ * context. The latter allows lazy evaluation of parameters whose
+ * definition contains other parameters.  Lazy evaluation is needed
+ * because a primary goal of macros is to change the parent field for
+ * a sexp.
+ */
+
+typedef struct sexp_binding {
 const char *name;
 const sexp_t *sx;
+const struct sexp_binding *context;
+const struct sexp_binding *next;
 } _sexp_binding_t;
 
 typedef enum {
@@ -302,6 +311,81 @@ _sexp_parse_header (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent,
sx->list->next, output);
 }
 
+static _sexp_binding_t *
+_sexp_bind (void *ctx, const _sexp_binding_t *env, const char *name, const 
sexp_t *sx, const
+   _sexp_binding_t *context)
+{
+_sexp_binding_t *binding = talloc (ctx, _sexp_binding_t);
+
+binding->name = talloc_strdup (ctx, name);
+binding->sx = sx;
+binding->context = context;
+binding->next = env;
+return binding;
+}
+
+static notmuch_status_t
+maybe_apply_macro (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
+  const _sexp_binding_t *env, const sexp_t *sx, const sexp_t 
*args,
+  Xapian::Query )
+{
+const sexp_t *params, *param, *arg, *body;
+void *local = talloc_new (notmuch);
+_sexp_binding_t *new_env = NULL;
+notmuch_status_t status = NOTMUCH_STATUS_SUCCESS;
+
+if (sx->list->ty != SEXP_VALUE || strcmp (sx->list->val, "macro") != 0) {
+   status = NOTMUCH_STATUS_IGNORED;
+   goto DONE;
+}
+
+params = sx->list->next;
+
+if (! params || (params->ty != SEXP_LIST)) {
+   _notmuch_database_log (notmuch, 

[PATCH 13/36] lib/parse-sexp: support phrase queries.

2021-08-24 Thread David Bremner
Anything that is quoted or not purely word characters is considered a
phrase.  Phrases are not stemmed, because the stems do not have
positional information in the database. It is less efficient to scan
the term twice, but it avoids a second pass to add prefixes, so maybe
it balances out. In any case, it seems unlikely query parsing is very
often a bottleneck.
---
 doc/man7/notmuch-sexp-queries.rst | 32 ++
 lib/parse-sexp.cc | 45 +--
 test/T081-sexpr-search.sh | 21 +--
 3 files changed, 83 insertions(+), 15 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index 08e97cc3..b763876d 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -40,10 +40,12 @@ subqueries.
 Match all messages.
 
 *term*
-Match all messages containing *term*, possibly after
-stemming or phase splitting. For discussion of stemming in
-notmuch see :any:`notmuch-search-terms(7)`. Stemming only applies
-to unquoted terms (basic values) in s-expression queries.
+
+Match all messages containing *term*, possibly after stemming or
+phrase splitting. For discussion of stemming in notmuch see
+:any:`notmuch-search-terms(7)`. Stemming only applies to unquoted
+terms (basic values) in s-expression queries.  For information on
+phrase splitting see :any:`fields`.
 
 ``(`` *field* |q1| |q2| ... |qn| ``)``
 Restrict the queries |q1| to |qn| to *field*, and combine with *and*
@@ -63,7 +65,7 @@ subqueries.
 FIELDS
 ``
 
-*Fields* (also called *prefixes* in notmuch documentation)
+*Fields* [#aka-pref]_
 correspond to attributes of mail messages. Some are inherent (and
 immutable) like ``subject``, while others ``tag`` and ``property`` are
 settable by the user.  Each concrete field in
@@ -72,6 +74,13 @@ is discussed further under "Search prefixes" in
 :any:`notmuch-search-terms(7)`. The row *user* refers to user defined
 fields, described in :any:`notmuch-config(1)`.
 
+Most fields are either *phrase fields* [#aka-prob]_ (which match
+sequences of words), or *term fields* [#aka-bool]_ (which match exact
+strings). *Phrase splitting* breaks the term (basic value or quoted
+string) into words, ignore punctuation. Phrase splitting is applied to
+terms in phrase (probabilistic) fields. Both phrase splitting and
+stemming apply only in phrase fields.
+
 .. _field-table:
 
 .. table:: Fields with supported modifiers
@@ -138,10 +147,23 @@ EXAMPLES
 ``(not Bob Marley)``
 Match messages containing neither "Bob" nor "Marley", nor their stems,
 
+``"quick fox"`` ``quick-fox`` ``quick@fox``
+Match the *phrase* "quick" followed by "fox" in phrase fields (or
+outside a field). Match the literal string in a term field.
+
 ``(subject quick "brown fox")``
 Match messages whose subject contains "quick" (anywhere, stemmed) and
 the phrase "brown fox".
 
+NOTES
+=
+
+.. [#aka-pref] a.k.a. prefixes
+
+.. [#aka-prob] a.k.a. probabilistic prefixes
+
+.. [#aka-bool] a.k.a. boolean prefixes
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 25556058..0917f505 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -2,7 +2,7 @@
 
 #if HAVE_SFSEXP
 #include "sexp.h"
-
+#include "unicode-util.h"
 
 /* _sexp is used for file scope symbols to avoid clashing with
  * definitions from sexp.h */
@@ -67,6 +67,36 @@ _sexp_combine_query (notmuch_database_t *notmuch,
sx->next, output);
 }
 
+static notmuch_status_t
+_sexp_parse_phrase (std::string term_prefix, const char *phrase, Xapian::Query 
)
+{
+Xapian::Utf8Iterator p (phrase);
+Xapian::Utf8Iterator end;
+std::vector terms;
+
+while (p != end) {
+   Xapian::Utf8Iterator start;
+   while (p != end && ! Xapian::Unicode::is_wordchar (*p))
+   p++;
+
+   if (p == end)
+   break;
+
+   start = p;
+
+   while (p != end && Xapian::Unicode::is_wordchar (*p))
+   p++;
+
+   if (p != start) {
+   std::string word (start, p);
+   word = Xapian::Unicode::tolower (word);
+   terms.push_back (term_prefix + word);
+   }
+}
+output = Xapian::Query (Xapian::Query::OP_PHRASE, terms.begin (), 
terms.end ());
+return NOTMUCH_STATUS_SUCCESS;
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -80,13 +110,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
std::string term = Xapian::Unicode::tolower (sx->val);
Xapian::Stem stem = *(notmuch->stemmer);
std::string term_prefix = parent ? _find_prefix (parent->name) : "";
-   if (sx->aty == SEXP_BASIC)
-   term = "Z" + term_prefix + stem (term);
-   else
-

[PATCH 04/36] lib: define notmuch_query_create_with_syntax

2021-08-24 Thread David Bremner
Set the parsing syntax when the (notmuch) query object is
created. Initially the library always returns a trivial query that
matches all messages when using s-expression syntax.

It seems better to select the syntax at query creation time because
the lazy parsing is an implementation detail.
---
 lib/notmuch.h | 10 +++
 lib/query.cc  | 75 ++-
 2 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/lib/notmuch.h b/lib/notmuch.h
index ef11ed1b..2f1ee951 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -961,6 +961,16 @@ notmuch_query_t *
 notmuch_query_create (notmuch_database_t *database,
  const char *query_string);
 
+typedef enum {
+NOTMUCH_QUERY_SYNTAX_XAPIAN,
+NOTMUCH_QUERY_SYNTAX_SEXP
+} notmuch_query_syntax_t;
+
+notmuch_status_t
+notmuch_query_create_with_syntax (notmuch_database_t *database,
+ const char *query_string,
+ notmuch_query_syntax_t syntax,
+ notmuch_query_t **output);
 /**
  * Sort values for notmuch_query_set_sort.
  */
diff --git a/lib/query.cc b/lib/query.cc
index 39b85e91..a3fe3793 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -23,6 +23,10 @@
 
 #include  /* GHashTable, GPtrArray */
 
+#if HAVE_SFSEXP
+#include "sexp.h"
+#endif
+
 struct _notmuch_query {
 notmuch_database_t *notmuch;
 const char *query_string;
@@ -30,6 +34,7 @@ struct _notmuch_query {
 notmuch_string_list_t *exclude_terms;
 notmuch_exclude_t omit_excluded;
 bool parsed;
+notmuch_query_syntax_t syntax;
 Xapian::Query xapian_query;
 std::set terms;
 };
@@ -105,7 +110,10 @@ _notmuch_query_constructor (notmuch_database_t *notmuch,
 
 query->notmuch = notmuch;
 
-query->query_string = talloc_strdup (query, query_string);
+if (query_string)
+   query->query_string = talloc_strdup (query, query_string);
+else
+   query->query_string = NULL;
 
 query->sort = NOTMUCH_SORT_NEWEST_FIRST;
 
@@ -121,20 +129,49 @@ notmuch_query_create (notmuch_database_t *notmuch,
  const char *query_string)
 {
 
-notmuch_query_t *query = _notmuch_query_constructor (notmuch, 
query_string);
+notmuch_query_t *query;
+notmuch_status_t status;
 
-if (! query)
+status = notmuch_query_create_with_syntax (notmuch, query_string,
+  NOTMUCH_QUERY_SYNTAX_XAPIAN,
+  );
+if (status)
return NULL;
 
 return query;
 }
 
-static notmuch_status_t
-_notmuch_query_ensure_parsed (notmuch_query_t *query)
+notmuch_status_t
+notmuch_query_create_with_syntax (notmuch_database_t *notmuch,
+ const char *query_string,
+ notmuch_query_syntax_t syntax,
+ notmuch_query_t **output)
 {
-if (query->parsed)
-   return NOTMUCH_STATUS_SUCCESS;
 
+notmuch_query_t *query;
+
+if (! output)
+   return NOTMUCH_STATUS_NULL_POINTER;
+
+query = _notmuch_query_constructor (notmuch, query_string);
+if (! query)
+   return NOTMUCH_STATUS_OUT_OF_MEMORY;
+
+if (syntax == NOTMUCH_QUERY_SYNTAX_SEXP && ! HAVE_SFSEXP) {
+   _notmuch_database_log (notmuch, "sexp query parser not available");
+   return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
+}
+
+query->syntax = syntax;
+
+*output = query;
+
+return NOTMUCH_STATUS_SUCCESS;
+}
+
+static notmuch_status_t
+_notmuch_query_ensure_parsed_xapian (notmuch_query_t *query)
+{
 try {
query->xapian_query =
query->notmuch->query_parser->
@@ -167,6 +204,30 @@ _notmuch_query_ensure_parsed (notmuch_query_t *query)
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+static notmuch_status_t
+_notmuch_query_ensure_parsed_sexpr (notmuch_query_t *query)
+{
+if (query->parsed)
+   return NOTMUCH_STATUS_SUCCESS;
+
+query->xapian_query = Xapian::Query::MatchAll;
+return NOTMUCH_STATUS_SUCCESS;
+}
+
+static notmuch_status_t
+_notmuch_query_ensure_parsed (notmuch_query_t *query)
+{
+if (query->parsed)
+   return NOTMUCH_STATUS_SUCCESS;
+
+#if HAVE_SFSEXP
+if (query->syntax == NOTMUCH_QUERY_SYNTAX_SEXP)
+   return _notmuch_query_ensure_parsed_sexpr (query);
+#endif
+
+return _notmuch_query_ensure_parsed_xapian (query);
+}
+
 const char *
 notmuch_query_get_query_string (const notmuch_query_t *query)
 {
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 08/36] lib: leave stemmer object accessible

2021-08-24 Thread David Bremner
This enables using the same stemmer in both query parsers.
---
 lib/database-private.h | 1 +
 lib/open.cc| 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index f206efaf..85d55299 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -232,6 +232,7 @@ struct _notmuch_database {
  */
 unsigned long view;
 Xapian::QueryParser *query_parser;
+Xapian::Stem *stemmer;
 Xapian::TermGenerator *term_gen;
 Xapian::RangeProcessor *value_range_processor;
 Xapian::RangeProcessor *date_range_processor;
diff --git a/lib/open.cc b/lib/open.cc
index 280ffee3..8a835e98 100644
--- a/lib/open.cc
+++ b/lib/open.cc
@@ -432,7 +432,8 @@ _finish_open (notmuch_database_t *notmuch,
  
"lastmod:");
notmuch->query_parser->set_default_op (Xapian::Query::OP_AND);
notmuch->query_parser->set_database (*notmuch->xapian_db);
-   notmuch->query_parser->set_stemmer (Xapian::Stem ("english"));
+   notmuch->stemmer = new Xapian::Stem ("english");
+   notmuch->query_parser->set_stemmer (*notmuch->stemmer);
notmuch->query_parser->set_stemming_strategy 
(Xapian::QueryParser::STEM_SOME);
notmuch->query_parser->add_rangeprocessor 
(notmuch->value_range_processor);
notmuch->query_parser->add_rangeprocessor 
(notmuch->date_range_processor);
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 17/36] lib/parse-sexp: handle unprefixed terms.

2021-08-24 Thread David Bremner
This is equivalent to adding the same field name "" for multiple
prefixes in the Xapian query parser, but we have to explicitely
construct the resulting query.
---
 lib/parse-sexp.cc | 36 
 test/T081-sexpr-search.sh | 31 +++
 2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index ffb00148..0192bda9 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -164,6 +164,22 @@ _sexp_parse_wildcard (notmuch_database_t *notmuch,
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+static notmuch_status_t
+_sexp_parse_one_term (notmuch_database_t *notmuch, std::string term_prefix, 
const sexp_t *sx,
+ Xapian::Query )
+{
+Xapian::Stem stem = *(notmuch->stemmer);
+
+if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
+   std::string term = Xapian::Unicode::tolower (sx->val);
+
+   output = Xapian::Query ("Z" + term_prefix + stem (term));
+   return NOTMUCH_STATUS_SUCCESS;
+} else {
+   return _sexp_parse_phrase (term_prefix, sx->val, output);
+}
+
+}
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -185,11 +201,23 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
}
-   if (sx->aty == SEXP_BASIC && unicode_word_utf8 (sx->val)) {
-   output = Xapian::Query ("Z" + term_prefix + stem (term));
-   return NOTMUCH_STATUS_SUCCESS;
+   if (parent) {
+   return _sexp_parse_one_term (notmuch, term_prefix, sx, output);
} else {
-   return _sexp_parse_phrase (term_prefix, sx->val, output);
+   Xapian::Query accumulator;
+   for (_sexp_prefix_t *prefix = prefixes; prefix->name; prefix++) {
+   if (prefix->flags & SEXP_FLAG_FIELD) {
+   notmuch_status_t status;
+   Xapian::Query subquery;
+   term_prefix = _find_prefix (prefix->name);
+   status = _sexp_parse_one_term (notmuch, term_prefix, sx, 
subquery);
+   if (status)
+   return status;
+   accumulator = Xapian::Query (Xapian::Query::OP_OR, 
accumulator, subquery);
+   }
+   }
+   output = accumulator;
+   return NOTMUCH_STATUS_SUCCESS;
}
 }
 
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index df502dc5..44cb681f 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -122,6 +122,29 @@ add_message '[subject]="utf8-message-body-subject"' 
'[date]="Sat, 01 Jan 2000 12
 output=$(notmuch search --query=sexp '(body bödý)' | notmuch_search_sanitize)
 test_expect_equal "$output" "thread:XXX   2000-01-01 [1/1] Notmuch Test Suite; 
utf8-message-body-subject (inbox unread)"
 
+add_message "[body]=thebody-1" "[subject]=kryptonite-1"
+add_message "[body]=nothing-to-see-here-1" "[subject]=thebody-1"
+
+test_begin_subtest 'search without body: prefix'
+notmuch search thebody > EXPECTED
+notmuch search --query=sexp '(and thebody)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'negated body: prefix'
+notmuch search thebody and not body:thebody > EXPECTED
+notmuch search --query=sexp '(and (not (body thebody)) thebody)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'search unprefixed for prefixed term'
+notmuch search kryptonite > EXPECTED
+notmuch search --query=sexp '(and kryptonite)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest 'search with body: prefix for term only in subject'
+notmuch search body:kryptonite > EXPECTED
+notmuch search --query=sexp '(body kryptonite)' > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
 test_begin_subtest "Search by 'from'"
 add_message '[subject]="search by from"' '[date]="Sat, 01 Jan 2000 12:00:00 
-"' [from]=searchbyfrom
 output=$(notmuch search --query=sexp '(from searchbyfrom)' | 
notmuch_search_sanitize)
@@ -287,11 +310,11 @@ output=$(notmuch search --query=sexp '(attachment 
(starts-with not))' | notmuch_
 test_expect_equal "$output" 'thread:XXX   2009-11-18 [2/2] Lars 
Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox 
signed unread)'
 
 test_begin_subtest "starts-with, folder"
-notmuch search --output=files --query=sexp '(folder (starts-with bad))' | 
notmuch_dir_sanitize > OUTPUT
+notmuch search --output=files --query=sexp '(folder (starts-with bad))' | 
notmuch_dir_sanitize | sed 's/[0-9]*$/XXX/' > OUTPUT
 cat < EXPECTED
-MAIL_DIR/bad/msg-010
-MAIL_DIR/bad/news/msg-012
-MAIL_DIR/duplicate/bad/news/msg-012
+MAIL_DIR/bad/msg-XXX
+MAIL_DIR/bad/news/msg-XXX
+MAIL_DIR/duplicate/bad/news/msg-XXX
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
-- 
2.32.0

[PATCH 03/36] lib: split notmuch_query_create

2021-08-24 Thread David Bremner
Most of the function will be re-usable when creating a query from an
s-expression.
---
 lib/query.cc | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/lib/query.cc b/lib/query.cc
index 792aba21..39b85e91 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -84,9 +84,9 @@ _notmuch_query_destructor (notmuch_query_t *query)
 return 0;
 }
 
-notmuch_query_t *
-notmuch_query_create (notmuch_database_t *notmuch,
- const char *query_string)
+static notmuch_query_t *
+_notmuch_query_constructor (notmuch_database_t *notmuch,
+   const char *query_string)
 {
 notmuch_query_t *query;
 
@@ -116,6 +116,19 @@ notmuch_query_create (notmuch_database_t *notmuch,
 return query;
 }
 
+notmuch_query_t *
+notmuch_query_create (notmuch_database_t *notmuch,
+ const char *query_string)
+{
+
+notmuch_query_t *query = _notmuch_query_constructor (notmuch, 
query_string);
+
+if (! query)
+   return NULL;
+
+return query;
+}
+
 static notmuch_status_t
 _notmuch_query_ensure_parsed (notmuch_query_t *query)
 {
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 25/36] lib/parse-sexp: support infix subqueries

2021-08-24 Thread David Bremner
This is necessary so that programs can take infix syntax queries from
a user and use the sexp query syntax to construct e.g. a refinement of
that query.
---
 doc/man7/notmuch-sexp-queries.rst |  7 +
 lib/parse-sexp.cc | 34 
 test/T081-sexpr-search.sh | 43 +++
 3 files changed, 84 insertions(+)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index fee43cb5..e1c01aa5 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -144,6 +144,10 @@ MODIFIERS
 *Modifiers* refer to any prefixes (first elements of compound queries)
 that are neither operators nor fields.
 
+``(infix`` *atom* ``)``
+Interpret *atom* as an infix notmuch query (see
+:any:`notmuch-search-terms(7)`). Not supported inside fields.
+
 ``(matching`` |q1| |q2| ... |qn| ``)`` ``(of`` |q1| |q2| ... |qn|  ``)``
 Match all messages have the same values of the current field as
 those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
@@ -187,6 +191,9 @@ EXAMPLES
 ``(id 1234@invalid blah@test)``
 Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
 
+``(and (infix "date:2009-11-18..2009-11-18") (tag unread))``
+Match messages in the given date range with tag unread.
+
 ``(starts-with prelim)``
 Match any words starting with "prelim".
 
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 9f6e0b77..e562e8f5 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -57,6 +57,8 @@ static _sexp_prefix_t prefixes[] =
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
 { "id", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
+{ "infix",  Xapian::Query::OP_INVALID,  
Xapian::Query::MatchAll,
+  SEXP_FLAG_SINGLE },
 { "is", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
 { "matching",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
@@ -242,6 +244,34 @@ _sexp_expand_query (notmuch_database_t *notmuch,
 return status;
 }
 
+static notmuch_status_t
+_sexp_parse_infix (notmuch_database_t *notmuch,  const _sexp_prefix_t *parent,
+  const sexp_t *sx, Xapian::Query )
+{
+if (parent) {
+   _notmuch_database_log (notmuch, "'infix' not supported inside '%s'\n", 
parent->name);
+   return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+}
+try {
+   output = notmuch->query_parser->parse_query (sx->val, 
NOTMUCH_QUERY_PARSER_FLAGS);
+} catch (const Xapian::QueryParserError ) {
+   _notmuch_database_log (notmuch, "Syntax error in infix query: %s\n", 
sx->val);
+   return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+} catch (const Xapian::Error ) {
+   if (! notmuch->exception_reported) {
+   _notmuch_database_log (notmuch,
+  "A Xapian exception occurred parsing query: 
%s\n",
+  error.get_msg ().c_str ());
+   _notmuch_database_log_append (notmuch,
+ "Query string was: %s\n",
+ sx->val);
+   notmuch->exception_reported = true;
+   return NOTMUCH_STATUS_XAPIAN_EXCEPTION;
+   }
+}
+return NOTMUCH_STATUS_SUCCESS;
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -311,6 +341,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
}
 
+   if (strcmp (prefix->name, "infix") == 0) {
+   return _sexp_parse_infix (notmuch, parent, sx->list->next, 
output);
+   }
+
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
return _sexp_parse_wildcard (notmuch, parent, 
sx->list->next->val, output);
 
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 453cce29..bd57282d 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -689,4 +689,47 @@ 
id:cf0c4d610911171136h1713aa59w9cf9aa31f052a...@mail.gmail.com
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "infix query"
+notmuch search to:searchbyto | notmuch_search_sanitize > EXPECTED
+notmuch search --query=sexp '(infix "to:searchbyto")' |  
notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "bad infix query 1"
+notmuch search --query=sexp '(infix "from:/unbalanced")' 2>&1|  
notmuch_search_sanitize > OUTPUT
+cat < EXPECTED
+notmuch search: Syntax error in query
+Syntax error in infix query: from:/unbalanced
+EOF

[PATCH 26/36] lib/parse-sexp: parse user headers

2021-08-24 Thread David Bremner
One subtle aspect is the replacement of _find_prefix with
_notmuch_database_prefix, which understands user headers. Otherwise
the code mainly consists of creating a fake prefix record (since the
user prefixes are not in the prefix table) and error handling.
---
 doc/man7/notmuch-sexp-queries.rst |  4 +++
 lib/parse-sexp.cc | 34 +---
 test/T081-sexpr-search.sh | 44 +++
 3 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index e1c01aa5..a9fd2f2b 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -224,6 +224,10 @@ EXAMPLES
 ``(not (to *))``
 Match messages with an empty or invalid 'To' and 'Cc' field.
 
+``(List *)``
+Match messages with a non-empty List-Id header, assuming
+configuration ``index.header.List=List-Id``
+
 NOTES
 =
 
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index e562e8f5..089de353 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -166,7 +166,7 @@ _sexp_parse_wildcard (notmuch_database_t *notmuch,
  Xapian::Query )
 {
 
-std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+std::string term_prefix = parent ? _notmuch_database_prefix (notmuch, 
parent->name) : "";
 
 if (parent && ! (parent->flags & SEXP_FLAG_WILDCARD)) {
_notmuch_database_log (notmuch, "'%s' does not support wildcard 
queries\n", parent->name);
@@ -272,6 +272,27 @@ _sexp_parse_infix (notmuch_database_t *notmuch,  const 
_sexp_prefix_t *parent,
 return NOTMUCH_STATUS_SUCCESS;
 }
 
+static notmuch_status_t
+_sexp_parse_header (notmuch_database_t *notmuch, const _sexp_prefix_t *parent,
+   const sexp_t *sx, Xapian::Query )
+{
+_sexp_prefix_t user_prefix;
+
+user_prefix.name = sx->list->val;
+user_prefix.flags = SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD;
+
+if (parent) {
+   _notmuch_database_log (notmuch, "nested field: '%s' inside '%s'\n",
+  sx->list->val, parent->name);
+   return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+}
+
+parent = _prefix;
+
+return _sexp_combine_query (notmuch, parent, Xapian::Query::OP_AND, 
Xapian::Query::MatchAll,
+   sx->list->next, output);
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -281,7 +302,7 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
   Xapian::Query )
 {
 if (sx->ty == SEXP_VALUE) {
-   std::string term_prefix = parent ? _find_prefix (parent->name) : "";
+   std::string term_prefix = parent ? _notmuch_database_prefix (notmuch, 
parent->name) : "";
 
if (sx->aty == SEXP_BASIC && strcmp (sx->val, "*") == 0) {
return _sexp_parse_wildcard (notmuch, parent, "", output);
@@ -291,6 +312,7 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
output = Xapian::Query (term_prefix + sx->val);
return NOTMUCH_STATUS_SUCCESS;
}
+
if (parent) {
return _sexp_parse_one_term (notmuch, term_prefix, sx, output);
} else {
@@ -299,7 +321,7 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
if (prefix->flags & SEXP_FLAG_FIELD) {
notmuch_status_t status;
Xapian::Query subquery;
-   term_prefix = _find_prefix (prefix->name);
+   term_prefix = _notmuch_database_prefix (notmuch, 
prefix->name);
status = _sexp_parse_one_term (notmuch, term_prefix, sx, 
subquery);
if (status)
return status;
@@ -323,6 +345,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
 }
 
+/* Check for user defined field */
+if (_notmuch_string_map_get (notmuch->user_prefix, sx->list->val)) {
+   return _sexp_parse_header (notmuch, parent, sx, output);
+}
+
 for (_sexp_prefix_t *prefix = prefixes; prefix && prefix->name; prefix++) {
if (strcmp (prefix->name, sx->list->val) == 0) {
if (prefix->flags & SEXP_FLAG_FIELD) {
@@ -362,7 +389,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
 }
 
 _notmuch_database_log (notmuch, "unknown prefix '%s'\n", sx->list->val);
-
 return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
 }
 
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index bd57282d..0484b231 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -732,4 +732,48 @@ notmuch search date:2009-11-18..2009-11-18 and tag:unread 
> EXPECTED
 notmuch search --query=sexp  '(and (infix 

[PATCH 29/36] CLI/config support saving s-expression queries

2021-08-24 Thread David Bremner
This commit does not enable using saved s-expression queries, only
saving and retrieving them from the config file or the database. Use
in queries will be enabled in a following commit.
---
 doc/man1/notmuch-config.rst |  5 +
 notmuch-config.c|  1 +
 test/T081-sexpr-search.sh   | 13 +
 3 files changed, 19 insertions(+)

diff --git a/doc/man1/notmuch-config.rst b/doc/man1/notmuch-config.rst
index 07a9eaf0..7d901758 100644
--- a/doc/man1/notmuch-config.rst
+++ b/doc/man1/notmuch-config.rst
@@ -251,6 +251,11 @@ paths are presumed relative to `$HOME` for items in section
 :any:`notmuch-search-terms(7)` for more information about named
 queries.
 
+**squery.**
+Expansion for named query called , using s-expression syntax. See
+:any:`notmuch-sexp-queries(7)` for more information about s-expression
+queries.
+
 FILES
 =
 
diff --git a/notmuch-config.c b/notmuch-config.c
index c0c91cc8..db00a26c 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -517,6 +517,7 @@ static const struct config_key
 { "index.decrypt",   false,  NULL },
 { "index.header.",   true,   validate_field_name },
 { "query.",  true,   NULL },
+{ "squery.", true,   validate_field_name },
 };
 
 static const config_key_info_t *
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 22e53335..e0de0304 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -812,4 +812,17 @@ notmuch search 'List:"notmuch notmuchmail org"' | 
notmuch_search_sanitize > EXPE
 notmuch search --query=sexp '(List notmuch notmuchmail org)' | 
notmuch_search_sanitize > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "check saved query name"
+test_expect_code 1 "notmuch config set squery.test '(subject utf8-sübjéct)'"
+
+test_begin_subtest "roundtrip saved query (database)"
+notmuch config set --database squery.Test '(subject utf8-sübjéct)'
+output=$(notmuch config get squery.Test)
+test_expect_equal "$output" '(subject utf8-sübjéct)'
+
+test_begin_subtest "roundtrip saved query"
+notmuch config set squery.Test '(subject override subject)'
+output=$(notmuch config get squery.Test)
+test_expect_equal "$output" '(subject override subject)'
+
 test_done
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 07/36] lib/parse-sexp: parse single terms and the empty list.

2021-08-24 Thread David Bremner
There is not much of a parser here yet, but it already does some
useful error reporting. Most functionality sketched in the
documentation is not implemented yet; detailed documentation will
follow with the implementation.
---
 doc/conf.py   |  4 ++
 doc/index.rst |  1 +
 doc/man7/notmuch-sexp-queries.rst | 81 +++
 lib/Makefile.local|  3 +-
 lib/database-private.h|  7 +++
 lib/parse-sexp.cc | 55 +
 lib/query.cc  |  8 +--
 test/T080-search.sh   |  7 ---
 test/T081-sexpr-search.sh | 65 +
 9 files changed, 217 insertions(+), 14 deletions(-)
 create mode 100644 doc/man7/notmuch-sexp-queries.rst
 create mode 100644 lib/parse-sexp.cc
 create mode 100755 test/T081-sexpr-search.sh

diff --git a/doc/conf.py b/doc/conf.py
index 3ec55a61..1fbd102b 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -159,6 +159,10 @@ man_pages = [
  u'syntax for notmuch queries',
  [notmuch_authors], 7),
 
+('man7/notmuch-sexp-queries', 'notmuch-sexp-queries',
+ u's-expression syntax for notmuch queries',
+ [notmuch_authors], 7),
+
 ('man1/notmuch-show', 'notmuch-show',
  u'show messages matching the given search terms',
  [notmuch_authors], 1),
diff --git a/doc/index.rst b/doc/index.rst
index a3bf3480..fbdcf779 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -24,6 +24,7 @@ Contents:
man1/notmuch-restore
man1/notmuch-search
man7/notmuch-search-terms
+   man7/notmuch-sexp-queries
man1/notmuch-show
man1/notmuch-tag
python-bindings
diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
new file mode 100644
index ..1118f854
--- /dev/null
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -0,0 +1,81 @@
+.. _notmuch-sexp-queries(7):
+
+
+notmuch-sexp-queries
+
+
+SYNOPSIS
+
+
+**notmuch** **search** ``--query=sexp`` '(and (to santa) (date december))'
+
+DESCRIPTION
+===
+
+
+S-EXPRESSIONS
+-
+
+An *s-expression* is either an atom, or list of whitespace delimited
+s-expressions inside parentheses. Atoms are either
+
+*basic value*
+A basic value is an unquoted string containing no whitespace, double 
quotes, or
+parentheses.
+
+*quoted string*
+Double quotes (") delimit strings possibly containing whitespace
+or parentheses. These can contain double quote characters by
+escaping with backslash. E.g. ``"this is a quote \""``.
+
+S-EXPRESSION QUERIES
+
+
+An s-expression query is either an atom, the empty list, or a
+*compound query* consisting of a prefix atom (first element) defining
+a *field*, *logical operation*, or *modifier*, and 0 or more
+subqueries.
+
+``*``
+``()``
+The empty list matches all messages
+
+*term*
+Match all messages containing *term*, possibly after stemming
+or phase splitting.
+
+``(`` *field* |q1| |q2| ... |qn| ``)``
+Restrict the queries |q1| to |qn| to *field*, and combine with *and*
+(for most fields) or *or*. See :any:`fields` for more information.
+
+``(`` *operator* |q1| |q2| ... |qn| ``)``
+Combine queries |q1| to |qn|. See :any:`operators` for more information.
+
+``(`` *modifier* |q1| |q2| ... |qn| ``)``
+Combine queries |q1| to |qn|, and reinterpret the result (e.g. as a 
regular expression).
+See :any:`modifiers` for more information.
+
+.. _fields:
+
+FIELDS
+``
+
+.. _operators:
+
+OPERATORS
+`
+
+.. _modifiers:
+
+MODIFIERS
+`
+
+EXAMPLES
+
+
+``Wizard``
+Match all messages containing the word "wizard", ignoring case.
+
+.. |q1| replace:: :math:`q_1`
+.. |q2| replace:: :math:`q_2`
+.. |qn| replace:: :math:`q_n`
diff --git a/lib/Makefile.local b/lib/Makefile.local
index e2d4b91d..1378a74b 100644
--- a/lib/Makefile.local
+++ b/lib/Makefile.local
@@ -63,7 +63,8 @@ libnotmuch_cxx_srcs = \
$(dir)/features.cc  \
$(dir)/prefix.cc\
$(dir)/open.cc  \
-   $(dir)/init.cc
+   $(dir)/init.cc  \
+   $(dir)/parse-sexp.cc
 
 libnotmuch_modules := $(libnotmuch_c_srcs:.c=.o) $(libnotmuch_cxx_srcs:.cc=.o)
 
diff --git a/lib/database-private.h b/lib/database-private.h
index 9706c17e..f206efaf 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -300,4 +300,11 @@ _notmuch_database_setup_standard_query_fields 
(notmuch_database_t *notmuch);
 notmuch_status_t
 _notmuch_database_setup_user_query_fields (notmuch_database_t *notmuch);
 
+#if __cplusplus
+/* parse-sexp.cc */
+notmuch_status_t
+_notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char 
*querystr,
+ Xapian::Query );
+#endif
+
 #endif
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
new file mode 100644
index ..66dbdb41
--- /dev/null
+++ b/lib/parse-sexp.cc
@@ -0,0 +1,55 @@

[PATCH 06/36] lib: add new status code for query syntax errors.

2021-08-24 Thread David Bremner
This will help provide more meaningful error messages without special
casing on the client side.
---
 bindings/python-cffi/notmuch2/_build.py  | 1 +
 bindings/python-cffi/notmuch2/_errors.py | 3 +++
 lib/database.cc  | 2 ++
 lib/notmuch.h| 4 
 4 files changed, 10 insertions(+)

diff --git a/bindings/python-cffi/notmuch2/_build.py 
b/bindings/python-cffi/notmuch2/_build.py
index f712b6c5..24df939e 100644
--- a/bindings/python-cffi/notmuch2/_build.py
+++ b/bindings/python-cffi/notmuch2/_build.py
@@ -53,6 +53,7 @@ ffibuilder.cdef(
 NOTMUCH_STATUS_NO_CONFIG,
 NOTMUCH_STATUS_NO_DATABASE,
 NOTMUCH_STATUS_DATABASE_EXISTS,
+NOTMUCH_STATUS_BAD_QUERY_SYNTAX,
 NOTMUCH_STATUS_LAST_STATUS
 } notmuch_status_t;
 typedef enum {
diff --git a/bindings/python-cffi/notmuch2/_errors.py 
b/bindings/python-cffi/notmuch2/_errors.py
index 9301073e..f55cc96b 100644
--- a/bindings/python-cffi/notmuch2/_errors.py
+++ b/bindings/python-cffi/notmuch2/_errors.py
@@ -56,6 +56,8 @@ class NotmuchError(Exception):
 NoDatabaseError,
 capi.lib.NOTMUCH_STATUS_DATABASE_EXISTS:
 DatabaseExistsError,
+capi.lib.NOTMUCH_STATUS_BAD_QUERY_SYNTAX:
+QuerySyntaxError,
 }
 return types[status]
 
@@ -103,6 +105,7 @@ class IllegalArgumentError(NotmuchError): pass
 class NoConfigError(NotmuchError): pass
 class NoDatabaseError(NotmuchError): pass
 class DatabaseExistsError(NotmuchError): pass
+class QuerySyntaxError(NotmuchError): pass
 
 class ObjectDestroyedError(NotmuchError):
 """The object has already been destroyed and it's memory freed.
diff --git a/lib/database.cc b/lib/database.cc
index 31794900..7eb0de79 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -309,6 +309,8 @@ notmuch_status_to_string (notmuch_status_t status)
return "No database found";
 case NOTMUCH_STATUS_DATABASE_EXISTS:
return "Database exists, not recreated";
+case NOTMUCH_STATUS_BAD_QUERY_SYNTAX:
+   return "Syntax error in query";
 default:
 case NOTMUCH_STATUS_LAST_STATUS:
return "Unknown error status value";
diff --git a/lib/notmuch.h b/lib/notmuch.h
index 2f1ee951..546643e8 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -220,6 +220,10 @@ typedef enum _notmuch_status {
  * Database exists, so not (re)-created
  */
 NOTMUCH_STATUS_DATABASE_EXISTS,
+/**
+ * Syntax error in query
+ */
+NOTMUCH_STATUS_BAD_QUERY_SYNTAX,
 /**
  * Not an actual status value. Just a way to find out how many
  * valid status values there are.
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 19/36] lib: factor out query construction from regexp

2021-08-24 Thread David Bremner
This will allow re-use of this code outside of the Xapian query parser.
---
 lib/database-private.h |  5 +++
 lib/regexp-fields.cc   | 81 +-
 lib/regexp-fields.h|  6 
 3 files changed, 68 insertions(+), 24 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 85d55299..cf4eb94b 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -306,6 +306,11 @@ _notmuch_database_setup_user_query_fields 
(notmuch_database_t *notmuch);
 notmuch_status_t
 _notmuch_sexp_string_to_xapian_query (notmuch_database_t *notmuch, const char 
*querystr,
  Xapian::Query );
+
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, 
std::string field,
+ std::string regexp_str,
+ Xapian::Query , std::string );
 #endif
 
 #endif
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 0feb50e5..c6d9d94f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -26,27 +26,32 @@
 #include "notmuch-private.h"
 #include "database-private.h"
 
-static void
-compile_regex (regex_t , const char *str)
+notmuch_status_t
+compile_regex (regex_t , const char *str, std::string )
 {
 int err = regcomp (, str, REG_EXTENDED | REG_NOSUB);
 
 if (err != 0) {
size_t len = regerror (err, , NULL, 0);
char *buffer = new char[len];
-   std::string msg = "Regexp error: ";
+   msg = "Regexp error: ";
(void) regerror (err, , buffer, len);
msg.append (buffer, len);
delete[] buffer;
 
-   throw Xapian::QueryParserError (msg);
+   return NOTMUCH_STATUS_ILLEGAL_ARGUMENT;
 }
+return NOTMUCH_STATUS_SUCCESS;
 }
 
 RegexpPostingSource::RegexpPostingSource (Xapian::valueno slot, const 
std::string )
 : slot_ (slot)
 {
-compile_regex (regexp_, regexp.c_str ());
+std::string msg;
+notmuch_status_t status = compile_regex (regexp_, regexp.c_str (), msg);
+
+if (status)
+   throw Xapian::QueryParserError (msg);
 }
 
 RegexpPostingSource::~RegexpPostingSource ()
@@ -141,18 +146,54 @@ _find_slot (std::string prefix)
return Xapian::BAD_VALUENO;
 }
 
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+RegexpFieldProcessor::RegexpFieldProcessor (std::string field_,
notmuch_field_flag_t options_,
Xapian::QueryParser _,
notmuch_database_t *notmuch_)
-: slot (_find_slot (prefix)),
-term_prefix (_find_prefix (prefix.c_str ())),
+: slot (_find_slot (field_)),
+field (field_),
+term_prefix (_find_prefix (field_.c_str ())),
 options (options_),
 parser (parser_),
 notmuch (notmuch_)
 {
 };
 
+notmuch_status_t
+_notmuch_regexp_to_query (notmuch_database_t *notmuch, Xapian::valueno slot, 
std::string field,
+ std::string regexp_str,
+ Xapian::Query , std::string )
+{
+regex_t regexp;
+notmuch_status_t status;
+
+status = compile_regex (regexp, regexp_str.c_str (), msg);
+if (status) {
+   _notmuch_database_log_append (notmuch, "error compiling regex %s", 
msg.c_str ());
+   return status;
+}
+
+if (slot == Xapian::BAD_VALUENO)
+   slot = _find_slot (field);
+
+if (slot == Xapian::BAD_VALUENO) {
+   std::string term_prefix = _find_prefix (field.c_str ());
+   std::vector terms;
+
+   for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin 
(term_prefix);
+it != notmuch->xapian_db->allterms_end (); ++it) {
+   if (regexec (, (*it).c_str () + term_prefix.size (),
+0, NULL, 0) == 0)
+   terms.push_back (*it);
+   }
+   output = Xapian::Query (Xapian::Query::OP_OR, terms.begin (), terms.end 
());
+} else {
+   RegexpPostingSource *postings = new RegexpPostingSource (slot, 
regexp_str);
+   output = Xapian::Query (postings->release ());
+}
+return NOTMUCH_STATUS_SUCCESS;
+}
+
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
@@ -168,23 +209,15 @@ RegexpFieldProcessor::operator() (const std::string & str)
 
 if (str.at (0) == '/') {
if (str.length () > 1 && str.at (str.size () - 1) == '/') {
+   Xapian::Query query;
std::string regexp_str = str.substr (1, str.size () - 2);
-   if (slot != Xapian::BAD_VALUENO) {
-   RegexpPostingSource *postings = new RegexpPostingSource (slot, 
regexp_str);
-   return Xapian::Query (postings->release ());
-   } else {
-   std::vector terms;
-   regex_t regexp;
-
-   compile_regex (regexp, regexp_str.c_str ());
-   for (Xapian::TermIterator it = 
notmuch->xapian_db->allterms_begin (term_prefix);
-it != 

[PATCH 34/36] CLI/{count, dump, reindex, reply, show}: enable sexp queries

2021-08-24 Thread David Bremner
The change in each case is to call notmuch_query_create_with_syntax,
relying on the already inherited shared options.  As a bonus we get
improved error handling from the new query creation API.

The remaining subcommand is 'tag', which is a bit trickier.
---
 notmuch-count.c   | 10 ++
 notmuch-dump.c|  9 +
 notmuch-reindex.c |  8 
 notmuch-reply.c   |  9 +
 notmuch-show.c|  8 
 test/T060-count.sh| 24 
 test/T220-reply.sh| 19 +++
 test/T240-dump-restore.sh | 13 +
 test/T520-show.sh | 18 ++
 test/T700-reindex.sh  | 29 +
 10 files changed, 123 insertions(+), 24 deletions(-)

diff --git a/notmuch-count.c b/notmuch-count.c
index e8c545e3..0d9046a8 100644
--- a/notmuch-count.c
+++ b/notmuch-count.c
@@ -74,10 +74,12 @@ print_count (notmuch_database_t *notmuch, const char 
*query_str,
 int ret = 0;
 notmuch_status_t status;
 
-query = notmuch_query_create (notmuch, query_str);
-if (query == NULL) {
-   fprintf (stderr, "Out of memory\n");
-   return -1;
+status = notmuch_query_create_with_syntax (notmuch, query_str,
+  shared_option_query_syntax (),
+  );
+if (print_status_database ("notmuch count", notmuch, status)) {
+   ret = -1;
+   goto DONE;
 }
 
 for (notmuch_config_values_start (exclude_tags);
diff --git a/notmuch-dump.c b/notmuch-dump.c
index 5c8213be..cb82d61f 100644
--- a/notmuch-dump.c
+++ b/notmuch-dump.c
@@ -232,11 +232,12 @@ database_dump_file (notmuch_database_t *notmuch, gzFile 
output,
 if (! query_str)
query_str = "";
 
-query = notmuch_query_create (notmuch, query_str);
-if (query == NULL) {
-   fprintf (stderr, "Out of memory\n");
+status = notmuch_query_create_with_syntax (notmuch, query_str,
+  shared_option_query_syntax (),
+  );
+if (print_status_database ("notmuch dump", notmuch, status))
return EXIT_FAILURE;
-}
+
 /* Don't ask xapian to sort by Message-ID. Xapian optimizes returning the
  * first results quickly at the expense of total time.
  */
diff --git a/notmuch-reindex.c b/notmuch-reindex.c
index b40edbb6..49eacd47 100644
--- a/notmuch-reindex.c
+++ b/notmuch-reindex.c
@@ -49,11 +49,11 @@ reindex_query (notmuch_database_t *notmuch, const char 
*query_string,
 
 notmuch_status_t ret = NOTMUCH_STATUS_SUCCESS;
 
-query = notmuch_query_create (notmuch, query_string);
-if (query == NULL) {
-   fprintf (stderr, "Out of memory.\n");
+status = notmuch_query_create_with_syntax (notmuch, query_string,
+  shared_option_query_syntax (),
+  );
+if (print_status_database ("notmuch reindex", notmuch, status))
return 1;
-}
 
 /* reindexing is not interested in any special sort order */
 notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED);
diff --git a/notmuch-reply.c b/notmuch-reply.c
index 5d5f95a3..2fb26cbc 100644
--- a/notmuch-reply.c
+++ b/notmuch-reply.c
@@ -716,6 +716,7 @@ notmuch_reply_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
 };
 int format = FORMAT_DEFAULT;
 int reply_all = true;
+notmuch_status_t status;
 
 notmuch_opt_desc_t options[] = {
{ .opt_keyword = , .name = "format", .keywords =
@@ -758,11 +759,11 @@ notmuch_reply_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
return EXIT_FAILURE;
 }
 
-query = notmuch_query_create (notmuch, query_string);
-if (query == NULL) {
-   fprintf (stderr, "Out of memory\n");
+status = notmuch_query_create_with_syntax (notmuch, query_string,
+  shared_option_query_syntax (),
+  );
+if (print_status_database ("notmuch reply", notmuch, status))
return EXIT_FAILURE;
-}
 
 if (do_reply (notmuch, query, , format, reply_all) != 0)
return EXIT_FAILURE;
diff --git a/notmuch-show.c b/notmuch-show.c
index 667fbee8..2848c9c3 100644
--- a/notmuch-show.c
+++ b/notmuch-show.c
@@ -1364,11 +1364,11 @@ notmuch_show_command (notmuch_database_t *notmuch, int 
argc, char *argv[])
return EXIT_FAILURE;
 }
 
-query = notmuch_query_create (notmuch, query_string);
-if (query == NULL) {
-   fprintf (stderr, "Out of memory\n");
+status = notmuch_query_create_with_syntax (notmuch, query_string,
+  shared_option_query_syntax (),
+  );
+if (print_status_database ("notmuch show", notmuch, status))
return EXIT_FAILURE;
-}
 
 

[PATCH 15/36] lib/parse-sexp: 'starts-with' wildcard searches

2021-08-24 Thread David Bremner
The many tests potentially overkill, but they could catch typos in the
prefixes table. As a simplifying assumption, for now we assume a
single argument to the wildcard operator, as this matches the Xapian
semantics. The name 'starts-with' is chosen to emphasize the supported
case of wildcards in currrent (1.4.x) Xapian.
---
 doc/man7/notmuch-sexp-queries.rst |  13 ++
 lib/parse-sexp.cc |  61 +++---
 test/T081-sexpr-search.sh | 196 ++
 3 files changed, 255 insertions(+), 15 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index 6e68fcc3..c83ce3d0 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -162,10 +162,20 @@ EXAMPLES
 ``(id 1234@invalid blah@test)``
 Matches Message-Id "1234@invalid" *or* Message-Id "blah@test"
 
+``(starts-with prelim)``
+Match any words starting with "prelim".
+
 ``(subject quick "brown fox")``
 Match messages whose subject contains "quick" (anywhere, stemmed) and
 the phrase "brown fox".
 
+``(subject (starts-with prelim))``
+Matches any word starting with "prelim", inside a message subject.
+
+``(subject (starts-wih quick) "brown fox")``
+Match messages whose subject contains "quick brown fox", but also
+"brown fox quicksand".
+
 ``(to (or b...@example.com mall...@example.org))`` ``(or (to b...@example.com) 
(to mall...@example.org))``
 Match in the "To" or "Cc" headers, "b...@example.com",
 "mall...@example.org", and also "b...@example.com.au" since it
@@ -180,6 +190,9 @@ NOTES
 
 .. [#aka-bool] a.k.a. boolean prefixes
 
+.. [#not-body] Due the the way ``body`` is implemented in notmuch,
+   this modifier is not supported in the ``body`` field.
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 26b7e5f1..692b3849 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -11,6 +11,8 @@ typedef enum {
 SEXP_FLAG_NONE = 0,
 SEXP_FLAG_FIELD= 1 << 0,
 SEXP_FLAG_BOOLEAN  = 1 << 1,
+SEXP_FLAG_SINGLE   = 1 << 2,
+SEXP_FLAG_WILDCARD = 1 << 3,
 } _sexp_flag_t;
 
 /*
@@ -42,38 +44,39 @@ static _sexp_prefix_t prefixes[] =
 { "and",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_NONE },
 { "attachment", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
 { "body",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD },
 { "from",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
 { "folder", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
 { "id", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
 { "is", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
 { "mid",Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
 { "mimetype",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
 { "not",Xapian::Query::OP_AND_NOT,  
Xapian::Query::MatchAll,
   SEXP_FLAG_NONE },
 { "or", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_NONE },
 { "path",   Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
 { "property",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD
-  | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
+{ "starts-with",Xapian::Query::OP_WILDCARD, 
Xapian::Query::MatchAll,
+  SEXP_FLAG_SINGLE },
 { "subject",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD },
+  SEXP_FLAG_FIELD | SEXP_FLAG_WILDCARD },
 { "tag",Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD },
 { "thread", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
-  SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN },
+  

[PATCH 12/36] util/unicode: allow calling from C++

2021-08-24 Thread David Bremner
The omission of the 'extern "C"' machinery seems like an oversight.
---
 util/unicode-util.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/util/unicode-util.h b/util/unicode-util.h
index 32d1e6ef..1bb9336a 100644
--- a/util/unicode-util.h
+++ b/util/unicode-util.h
@@ -4,9 +4,16 @@
 #include 
 #include 
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /* The utf8 encoded string would tokenize as a single word, according
  * to xapian. */
 bool unicode_word_utf8 (const char *str);
 typedef gunichar notmuch_unichar;
 
+#ifdef __cplusplus
+}
+#endif
 #endif
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 30/36] lib/parse-sexp: support saved s-expression queries

2021-08-24 Thread David Bremner
It turns out there is not really much code in query-fp.cc useful for
supporting the new syntax. The code we could potentially factor out
amounts to calling notmuch_database_get_config; both the key
construction and the parsing of the results are specific to the query
syntax involved.
---
 lib/parse-sexp.cc | 50 ++-
 test/T081-sexpr-search.sh | 32 +
 2 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 0fbb2afc..291480ca 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -291,6 +291,49 @@ _sexp_parse_header (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent,
sx->list->next, output);
 }
 
+static notmuch_status_t
+maybe_saved_squery (notmuch_database_t *notmuch, const _sexp_prefix_t *parent, 
const sexp_t *sx,
+   Xapian::Query )
+{
+char *key;
+char *expansion = NULL;
+notmuch_status_t status;
+sexp_t *saved_sexp;
+void *local = talloc_new (notmuch);
+char *buf;
+
+key = talloc_asprintf (local, "squery.%s", sx->list->val);
+if (! key) {
+   status = NOTMUCH_STATUS_OUT_OF_MEMORY;
+   goto DONE;
+}
+
+status = notmuch_database_get_config (notmuch, key, );
+if (status)
+   goto DONE;
+if (EMPTY_STRING (expansion)) {
+   status = NOTMUCH_STATUS_IGNORED;
+   goto DONE;
+}
+
+buf = talloc_strdup (local, expansion);
+/* XXX TODO: free this memory */
+saved_sexp = parse_sexp (buf, strlen (expansion));
+if (! saved_sexp) {
+   _notmuch_database_log (notmuch, "invalid saved s-expression query: 
'%s'\n", expansion);
+   status = NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+   goto DONE;
+}
+
+status =  _sexp_to_xapian_query (notmuch, parent, saved_sexp, output);
+
+  DONE:
+if (local)
+   talloc_free (local);
+
+return status;
+}
+
 /* Here we expect the s-expression to be a proper list, with first
  * element defining and operation, or as a special case the empty
  * list */
@@ -299,6 +342,8 @@ static notmuch_status_t
 _sexp_to_xapian_query (notmuch_database_t *notmuch, const _sexp_prefix_t 
*parent, const sexp_t *sx,
   Xapian::Query )
 {
+notmuch_status_t status;
+
 if (sx->ty == SEXP_VALUE) {
std::string term_prefix = parent ? _notmuch_database_prefix (notmuch, 
parent->name) : "";
 
@@ -317,7 +362,6 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
Xapian::Query accumulator;
for (_sexp_prefix_t *prefix = prefixes; prefix->name; prefix++) {
if (prefix->flags & SEXP_FLAG_FIELD) {
-   notmuch_status_t status;
Xapian::Query subquery;
term_prefix = _notmuch_database_prefix (notmuch, 
prefix->name);
status = _sexp_parse_one_term (notmuch, term_prefix, sx, 
subquery);
@@ -343,6 +387,10 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
 }
 
+status = maybe_saved_squery (notmuch, parent, sx, output);
+if (status != NOTMUCH_STATUS_IGNORED)
+   return status;
+
 /* Check for user defined field */
 if (_notmuch_string_map_get (notmuch->user_prefix, sx->list->val)) {
return _sexp_parse_header (notmuch, parent, sx, output);
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index e0de0304..e8a77318 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -825,4 +825,36 @@ notmuch config set squery.Test '(subject override subject)'
 output=$(notmuch config get squery.Test)
 test_expect_equal "$output" '(subject override subject)'
 
+test_begin_subtest "unknown saved query"
+notmuch search --query=sexp '(Unknown foo bar)' >OUTPUT 2>&1
+cat < EXPECTED
+notmuch search: Syntax error in query
+unknown prefix 'Unknown'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "syntax error in saved query"
+notmuch config set squery.Bad '(Bad'
+notmuch search --query=sexp '(Bad foo bar)' >OUTPUT 2>&1
+cat < EXPECTED
+notmuch search: Syntax error in query
+invalid saved s-expression query: '(Bad'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Saved Search by 'tag' and 'subject'"
+notmuch search tag:inbox and subject:maildir | notmuch_search_sanitize > 
EXPECTED
+notmuch config set squery.TagSubject  '(and (tag inbox) (subject maildir))'
+notmuch search --query=sexp '(TagSubject)' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "Saved Search: illegal nesting"
+notmuch config set squery.TagSubject  '(and (tag inbox) (subject maildir))'
+notmuch search --query=sexp '(subject (TagSubject))' >OUTPUT 2>&1
+cat < EXPECTED
+notmuch search: Syntax error in query
+nested field: 'tag' inside 'subject'
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 

[PATCH 28/36] lib/parse-sexp: handle saved queries

2021-08-24 Thread David Bremner
This provides functionality analogous to query: in the Xapian
QueryParser based parser. Perhaps counterintuitively, the saved
queries currently have to be in the original query syntax (i.e. not
s-expressions).
---
 doc/man7/notmuch-sexp-queries.rst |  6 ++
 lib/parse-sexp.cc | 24 ++---
 test/T081-sexpr-search.sh | 36 +++
 3 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index a9fd2f2b..db3f8837 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -153,6 +153,12 @@ that are neither operators nor fields.
 those matching all of |q1| ... |qn|. Supported in most term [#not-path]_ or
 phrase fields. Most commonly used in the ``thread`` field.
 
+``(query`` *atom* ``)``
+Expand to the saved query named by *atom*. See
+:any:`notmuch-config(1)` for more. Note that the saved query must
+be in infix syntax (:any:`notmuch-search-terms(7)`). Not supported
+inside fields.
+
 ``(regex`` *atom* ``)`` ``(rx`` *atom* ``)``
 Interpret *atom* as a POSIX.2 regular expression (see
 :manpage:`regex(7)`). This applies in term fields and a subset 
[#not-phrase]_ of
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 089de353..0fbb2afc 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -17,6 +17,7 @@ typedef enum {
 SEXP_FLAG_DO_REGEX = 1 << 5,
 SEXP_FLAG_EXPAND   = 1 << 6,
 SEXP_FLAG_DO_EXPAND = 1 << 7,
+SEXP_FLAG_ORPHAN   = 1 << 8,
 } _sexp_flag_t;
 
 /*
@@ -58,7 +59,7 @@ static _sexp_prefix_t prefixes[] =
 { "id", Xapian::Query::OP_OR,   
Xapian::Query::MatchNothing,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "infix",  Xapian::Query::OP_INVALID,  
Xapian::Query::MatchAll,
-  SEXP_FLAG_SINGLE },
+  SEXP_FLAG_SINGLE | SEXP_FLAG_ORPHAN },
 { "is", Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
 { "matching",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
@@ -77,6 +78,8 @@ static _sexp_prefix_t prefixes[] =
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX },
 { "property",   Xapian::Query::OP_AND,  
Xapian::Query::MatchAll,
   SEXP_FLAG_FIELD | SEXP_FLAG_BOOLEAN | SEXP_FLAG_WILDCARD | 
SEXP_FLAG_REGEX | SEXP_FLAG_EXPAND },
+{ "query",  Xapian::Query::OP_INVALID,  
Xapian::Query::MatchNothing,
+  SEXP_FLAG_SINGLE | SEXP_FLAG_ORPHAN },
 { "regex",  Xapian::Query::OP_INVALID,  
Xapian::Query::MatchAll,
   SEXP_FLAG_SINGLE | SEXP_FLAG_DO_REGEX },
 { "rx", Xapian::Query::OP_INVALID,  
Xapian::Query::MatchAll,
@@ -245,13 +248,8 @@ _sexp_expand_query (notmuch_database_t *notmuch,
 }
 
 static notmuch_status_t
-_sexp_parse_infix (notmuch_database_t *notmuch,  const _sexp_prefix_t *parent,
-  const sexp_t *sx, Xapian::Query )
+_sexp_parse_infix (notmuch_database_t *notmuch, const sexp_t *sx, 
Xapian::Query )
 {
-if (parent) {
-   _notmuch_database_log (notmuch, "'infix' not supported inside '%s'\n", 
parent->name);
-   return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
-}
 try {
output = notmuch->query_parser->parse_query (sx->val, 
NOTMUCH_QUERY_PARSER_FLAGS);
 } catch (const Xapian::QueryParserError ) {
@@ -361,6 +359,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
parent = prefix;
}
 
+   if (parent && (prefix->flags & SEXP_FLAG_ORPHAN)) {
+   _notmuch_database_log (notmuch, "'%s' not supported inside 
'%s'\n",
+  prefix->name, parent->name);
+   return NOTMUCH_STATUS_BAD_QUERY_SYNTAX;
+   }
+
if ((prefix->flags & SEXP_FLAG_SINGLE) &&
(! sx->list->next || sx->list->next->next || sx->list->next->ty 
!= SEXP_VALUE)) {
_notmuch_database_log (notmuch, "'%s' expects single atom as 
argument\n",
@@ -369,7 +373,11 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
_sexp_prefix_t *parent
}
 
if (strcmp (prefix->name, "infix") == 0) {
-   return _sexp_parse_infix (notmuch, parent, sx->list->next, 
output);
+   return _sexp_parse_infix (notmuch, sx->list->next, output);
+   }
+
+   if (strcmp (prefix->name, "query") == 0) {
+   return _notmuch_query_name_to_query (notmuch, 
sx->list->next->val, output);
}
 
if (prefix->xapian_op == Xapian::Query::OP_WILDCARD)
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 0484b231..22e53335 100755
--- a/test/T081-sexpr-search.sh
+++ 

[PATCH 27/36] lib: factor out expansion of saved queries.

2021-08-24 Thread David Bremner
This is intended to allow use outside of the Xapian query parser.
---
 lib/database-private.h |  5 +
 lib/query-fp.cc| 22 +++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index 9ee3b933..8b9d67fe 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -327,6 +327,11 @@ _notmuch_regexp_to_query (notmuch_database_t *notmuch, 
Xapian::valueno slot, std
  std::string regexp_str,
  Xapian::Query , std::string );
 
+/* thread-fp.cc */
+notmuch_status_t
+_notmuch_query_name_to_query (notmuch_database_t *notmuch, const std::string 
name,
+ Xapian::Query );
+
 #if HAVE_SFSEXP
 /* parse-sexp.cc */
 notmuch_status_t
diff --git a/lib/query-fp.cc b/lib/query-fp.cc
index b980b7f0..75b1d875 100644
--- a/lib/query-fp.cc
+++ b/lib/query-fp.cc
@@ -24,17 +24,33 @@
 #include "query-fp.h"
 #include 
 
-Xapian::Query
-QueryFieldProcessor::operator() (const std::string & name)
+notmuch_status_t
+_notmuch_query_name_to_query (notmuch_database_t *notmuch, const std::string 
name,
+ Xapian::Query )
 {
 std::string key = "query." + name;
 char *expansion;
 notmuch_status_t status;
 
 status = notmuch_database_get_config (notmuch, key.c_str (), );
+if (status)
+   return status;
+
+output = notmuch->query_parser->parse_query (expansion, 
NOTMUCH_QUERY_PARSER_FLAGS);
+return NOTMUCH_STATUS_SUCCESS;
+}
+
+Xapian::Query
+QueryFieldProcessor::operator() (const std::string & name)
+{
+notmuch_status_t status;
+Xapian::Query output;
+
+status = _notmuch_query_name_to_query (notmuch, name, output);
 if (status) {
throw Xapian::QueryParserError ("error looking up key" + name);
 }
 
-return parser.parse_query (expansion, NOTMUCH_QUERY_PARSER_FLAGS);
+return output;
+
 }
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 09/36] lib/parse-sexp: stem unquoted atoms

2021-08-24 Thread David Bremner
This is somewhat less DWIM than the Xapian query parser, but it has
the advantage of simplicity.
---
 doc/man7/notmuch-sexp-queries.rst | 10 --
 lib/parse-sexp.cc | 10 +++---
 test/T081-sexpr-search.sh |  5 -
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/doc/man7/notmuch-sexp-queries.rst 
b/doc/man7/notmuch-sexp-queries.rst
index 1118f854..d177934d 100644
--- a/doc/man7/notmuch-sexp-queries.rst
+++ b/doc/man7/notmuch-sexp-queries.rst
@@ -41,8 +41,10 @@ subqueries.
 The empty list matches all messages
 
 *term*
-Match all messages containing *term*, possibly after stemming
-or phase splitting.
+Match all messages containing *term*, possibly after
+stemming or phase splitting. For discussion of stemming in
+notmuch see :any:`notmuch-search-terms(7)`. Stemming only applies
+to unquoted terms (basic values) in s-expression queries.
 
 ``(`` *field* |q1| |q2| ... |qn| ``)``
 Restrict the queries |q1| to |qn| to *field*, and combine with *and*
@@ -76,6 +78,10 @@ EXAMPLES
 ``Wizard``
 Match all messages containing the word "wizard", ignoring case.
 
+``added``
+Match all messages containing "added", but also those containing "add", 
"additional",
+"Additional", "adds", etc... via stemming.
+
 .. |q1| replace:: :math:`q_1`
 .. |q2| replace:: :math:`q_2`
 .. |qn| replace:: :math:`q_n`
diff --git a/lib/parse-sexp.cc b/lib/parse-sexp.cc
index 66dbdb41..f031d790 100644
--- a/lib/parse-sexp.cc
+++ b/lib/parse-sexp.cc
@@ -1,5 +1,4 @@
-#include 
-#include "notmuch-private.h"
+#include "database-private.h"
 
 #if HAVE_SFSEXP
 #include "sexp.h"
@@ -18,7 +17,12 @@ _sexp_to_xapian_query (notmuch_database_t *notmuch, const 
sexp_t *sx,
 {
 
 if (sx->ty == SEXP_VALUE) {
-   output = Xapian::Query (Xapian::Unicode::tolower (sx->val));
+   std::string term = Xapian::Unicode::tolower (sx->val);
+   Xapian::Stem stem = *(notmuch->stemmer);
+   if (sx->aty == SEXP_BASIC)
+   term = "Z" + stem (term);
+
+   output = Xapian::Query (term);
return NOTMUCH_STATUS_SUCCESS;
 }
 
diff --git a/test/T081-sexpr-search.sh b/test/T081-sexpr-search.sh
index 46cc712c..4e3d755c 100755
--- a/test/T081-sexpr-search.sh
+++ b/test/T081-sexpr-search.sh
@@ -29,11 +29,14 @@ EOF
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "single term in body, stemmed version"
-test_subtest_known_broken
 notmuch search arriv > EXPECTED
 notmuch search --query=sexp arriv > OUTPUT
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "single term in body, unstemmed version"
+notmuch search --query=sexp '"arriv"' > OUTPUT
+test_expect_equal_file /dev/null OUTPUT
+
 test_begin_subtest "Unbalanced parens"
 # A code 1 indicates the error was handled (a crash will return e.g. 139).
 test_expect_code 1 "notmuch search --query=sexp '('"
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


[PATCH 33/36] CLI: move query syntax to shared option

2021-08-24 Thread David Bremner
This will allow easy addition of a query syntax option to other subcommands.
---
 notmuch-client.h |  2 ++
 notmuch-search.c |  7 ++-
 notmuch.c| 12 
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/notmuch-client.h b/notmuch-client.h
index f820791f..96d81166 100644
--- a/notmuch-client.h
+++ b/notmuch-client.h
@@ -487,6 +487,8 @@ print_status_gzbytes (const char *loc,
 
 extern const notmuch_opt_desc_t notmuch_shared_options [];
 
+notmuch_query_syntax_t shared_option_query_syntax ();
+
 void notmuch_process_shared_options (notmuch_database_t *notmuch, const char 
*subcommand_name);
 int notmuch_minimal_options (const char *subcommand_name,
 int argc, char **argv);
diff --git a/notmuch-search.c b/notmuch-search.c
index 39d55bfe..327e1445 100644
--- a/notmuch-search.c
+++ b/notmuch-search.c
@@ -722,7 +722,8 @@ _notmuch_search_prepare (search_context_t *ctx, int argc, 
char *argv[])
 
 if (print_status_database ("notmuch search", ctx->notmuch,
   notmuch_query_create_with_syntax (ctx->notmuch, 
query_str,
-
ctx->query_syntax, >query)))
+
shared_option_query_syntax (),
+>query)))
return EXIT_FAILURE;
 
 notmuch_query_set_sort (ctx->query, ctx->sort);
@@ -788,10 +789,6 @@ static const notmuch_opt_desc_t common_options[] = {
  { "text", NOTMUCH_FORMAT_TEXT },
  { "text0", NOTMUCH_FORMAT_TEXT0 },
  { 0, 0 } } },
-{ .opt_keyword = _context.query_syntax, .name = "query", .keywords =
- (notmuch_keyword_t []){ { "infix", NOTMUCH_QUERY_SYNTAX_XAPIAN },
- { "sexp", NOTMUCH_QUERY_SYNTAX_SEXP },
- { 0, 0 } } },
 { .opt_int = _format_version, .name = "format-version" },
 { }
 };
diff --git a/notmuch.c b/notmuch.c
index 3824bf19..3fb58bf2 100644
--- a/notmuch.c
+++ b/notmuch.c
@@ -54,14 +54,26 @@ notmuch_exit_if_unmatched_db_uuid (notmuch_database_t 
*notmuch);
 
 static bool print_version = false, print_help = false;
 static const char *notmuch_requested_db_uuid = NULL;
+static int query_syntax = NOTMUCH_QUERY_SYNTAX_XAPIAN;
 
 const notmuch_opt_desc_t notmuch_shared_options [] = {
 { .opt_bool = _version, .name = "version" },
 { .opt_bool = _help, .name = "help" },
 { .opt_string = _requested_db_uuid, .name = "uuid" },
+{ .opt_keyword = _syntax, .name = "query", .keywords =
+ (notmuch_keyword_t []){ { "infix", NOTMUCH_QUERY_SYNTAX_XAPIAN },
+ { "sexp", NOTMUCH_QUERY_SYNTAX_SEXP },
+ { 0, 0 } } },
+
 { }
 };
 
+notmuch_query_syntax_t
+shared_option_query_syntax ()
+{
+return query_syntax;
+}
+
 /* any subcommand wanting to support these options should call
  * inherit notmuch_shared_options and call
  * notmuch_process_shared_options (notmuch, subcommand_name);
-- 
2.32.0
___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org


v5 sexp query parser

2021-08-24 Thread David Bremner
Changes since v4:

1) --query=sexp is now recognized for all notmuch subcommands (and ignored 
where there is no query argument).

2) cleanup for the (common) case of missing the sfsexp library

3) An updated "SYNOPSIS" and "DESCRIPTION" the notmuch-sexp-queries(7) man page.

I don't plan on applying this for 0.33, but will probably go ahead and apply 
something like this series to master soon after the 0.33 release. So now is the 
time to bikeshed.

d

___
notmuch mailing list -- notmuch@notmuchmail.org
To unsubscribe send an email to notmuch-le...@notmuchmail.org