v2 regexp search for mid/folder/path

2017-03-29 Thread David Bremner
This is mainly a rebase against current master, but also required an
update to the (just pushed) empty query string handling

Apparently there's at least 4 ways to test a C++ string for being
empty, so here I try a different way of writing it.

Interdiff follows:

diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 26b22fe2..1598c17f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -156,12 +156,17 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string 
prefix,
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
-if (str.size () == 0)
-   return Xapian::Query(Xapian::Query::OP_AND_NOT,
+if (str.empty ()) {
+   if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+   return Xapian::Query(Xapian::Query::OP_AND_NOT,
 Xapian::Query::MatchAll,
 Xapian::Query (Xapian::Query::OP_WILDCARD, 
term_prefix));
+   } else {
+   return Xapian::Query (term_prefix);
+   }
+}
 
-if (str.length() > 0 && str.at (0) == '/') {
+if (str.at (0) == '/') {
if (str.length() > 1 && str.at (str.size () - 1) == '/'){
std::string regexp_str = str.substr(1,str.size () - 2);
if (slot != Xapian::BAD_VALUENO) {

___
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch


[PATCH 1/2] lib: Add regexp searching for mid: prefix

2017-03-29 Thread David Bremner
The bulk of the change is passing in the field options to the regexp
field processor, so that we can properly handle the
fallback (non-regexp case).
---
 lib/database.cc   |  6 --
 lib/regexp-fields.cc  | 36 +---
 lib/regexp-fields.h   |  4 +++-
 test/T650-regexp-query.sh | 16 
 4 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 5bc131a3..49b3849c 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -262,7 +262,8 @@ prefix_t prefix_table[] = {
 { "tag",   "K",NOTMUCH_FIELD_EXTERNAL },
 { "is","K",NOTMUCH_FIELD_EXTERNAL },
 { "id","Q",NOTMUCH_FIELD_EXTERNAL },
-{ "mid",   "Q",NOTMUCH_FIELD_EXTERNAL },
+{ "mid",   "Q",NOTMUCH_FIELD_EXTERNAL |
+   NOTMUCH_FIELD_PROCESSOR },
 { "path",  "P",NOTMUCH_FIELD_EXTERNAL },
 { "property",  "XPROPERTY",NOTMUCH_FIELD_EXTERNAL },
 /*
@@ -313,7 +314,8 @@ _setup_query_field (const prefix_t *prefix, 
notmuch_database_t *notmuch)
else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
fp = (new QueryFieldProcessor (*notmuch->query_parser, 
notmuch))->release ();
else
-   fp = (new RegexpFieldProcessor (prefix->name, 
*notmuch->query_parser, notmuch))->release ();
+   fp = (new RegexpFieldProcessor (prefix->name, prefix->flags,
+   *notmuch->query_parser, 
notmuch))->release ();
 
/* we treat all field-processor fields as boolean in order to get the 
raw input */
notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 1651677c..7ae55e70 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -135,13 +135,21 @@ static inline Xapian::valueno _find_slot (std::string 
prefix)
return NOTMUCH_VALUE_FROM;
 else if (prefix == "subject")
return NOTMUCH_VALUE_SUBJECT;
+else if (prefix == "mid")
+   return NOTMUCH_VALUE_MESSAGE_ID;
 else
throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + 
"'");
 }
 
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, 
Xapian::QueryParser _, notmuch_database_t *notmuch_)
-   : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str 
())),
- parser (parser_), notmuch (notmuch_)
+RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+   notmuch_field_flag_t options_,
+   Xapian::QueryParser _,
+   notmuch_database_t *notmuch_)
+   : slot (_find_slot (prefix)),
+ term_prefix (_find_prefix (prefix.c_str ())),
+ options (options_),
+ parser (parser_),
+ notmuch (notmuch_)
 {
 };
 
@@ -161,16 +169,22 @@ RegexpFieldProcessor::operator() (const std::string & str)
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + 
str + "'");
}
 } else {
-   /* TODO replace this with a nicer API level triggering of
-* phrase parsing, when possible */
-   std::string query_str;
+   if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+   /* TODO replace this with a nicer API level triggering of
+* phrase parsing, when possible */
+   std::string query_str;
 
-   if (str.find (' ') != std::string::npos)
-   query_str = '"' + str + '"';
-   else
-   query_str = str;
+   if (str.find (' ') != std::string::npos)
+   query_str = '"' + str + '"';
+   else
+   query_str = str;
 
-   return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, 
term_prefix);
+   return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, 
term_prefix);
+   } else {
+   /* Boolean prefix */
+   std::string term = term_prefix + str;
+   return Xapian::Query (term);
+   }
 }
 }
 #endif
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index a4ba7ad8..d5f93445 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -65,11 +65,13 @@ class RegexpFieldProcessor : public Xapian::FieldProcessor {
  protected:
 Xapian::valueno slot;
 std::string term_prefix;
+notmuch_field_flag_t options;
 Xapian::QueryParser 
 notmuch_database_t *notmuch;
 
  public:
-RegexpFieldProcessor (std::string prefix, Xapian::QueryParser _, 
notmuch_database_t *notmuch_);
+RegexpFieldProcessor (std::string prefix, notmuch_field_flag_t options,
+ Xapian::QueryParser _, notmuch_database_t 
*notmuch_);
 
 ~RegexpFieldProcessor () { };
 
diff --git 

[PATCH 2/2] lib: Add regexp expansion for for tags and paths

2017-03-29 Thread David Bremner
From a ui perspective this looks similar to what was already provided
for from, subject, and mid, but the implimentation is quite
different. It uses the database's list of terms to construct a term
based query equivalent to the passed regular expression.
---
 lib/database.cc  | 12 
 lib/regexp-fields.cc | 31 +--
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 49b3849c..5b13f541 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -259,12 +259,15 @@ prefix_t prefix_table[] = {
 { "file-direntry", "XFDIRENTRY",   NOTMUCH_FIELD_NO_FLAGS },
 { "directory-direntry","XDDIRENTRY",   NOTMUCH_FIELD_NO_FLAGS },
 { "thread","G",NOTMUCH_FIELD_EXTERNAL 
},
-{ "tag",   "K",NOTMUCH_FIELD_EXTERNAL },
-{ "is","K",NOTMUCH_FIELD_EXTERNAL },
+{ "tag",   "K",NOTMUCH_FIELD_EXTERNAL |
+   NOTMUCH_FIELD_PROCESSOR },
+{ "is","K",NOTMUCH_FIELD_EXTERNAL |
+   NOTMUCH_FIELD_PROCESSOR },
 { "id","Q",NOTMUCH_FIELD_EXTERNAL },
 { "mid",   "Q",NOTMUCH_FIELD_EXTERNAL |
NOTMUCH_FIELD_PROCESSOR },
-{ "path",  "P",NOTMUCH_FIELD_EXTERNAL },
+{ "path",  "P",NOTMUCH_FIELD_EXTERNAL|
+   NOTMUCH_FIELD_PROCESSOR },
 { "property",  "XPROPERTY",NOTMUCH_FIELD_EXTERNAL },
 /*
  * Unconditionally add ':' to reduce potential ambiguity with
@@ -272,7 +275,8 @@ prefix_t prefix_table[] = {
  * letters. See Xapian document termprefixes.html for related
  * discussion.
  */
-{ "folder","XFOLDER:", NOTMUCH_FIELD_EXTERNAL 
},
+{ "folder","XFOLDER:", NOTMUCH_FIELD_EXTERNAL |
+   NOTMUCH_FIELD_PROCESSOR },
 #if HAVE_XAPIAN_FIELD_PROCESSOR
 { "date",  NULL,   NOTMUCH_FIELD_EXTERNAL |
NOTMUCH_FIELD_PROCESSOR },
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 7ae55e70..1598c17f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string 
prefix)
 else if (prefix == "mid")
return NOTMUCH_VALUE_MESSAGE_ID;
 else
-   throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + 
"'");
+   return Xapian::BAD_VALUENO;
 }
 
 RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
@@ -156,15 +156,34 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string 
prefix,
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
-if (str.size () == 0)
-   return Xapian::Query(Xapian::Query::OP_AND_NOT,
+if (str.empty ()) {
+   if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+   return Xapian::Query(Xapian::Query::OP_AND_NOT,
 Xapian::Query::MatchAll,
 Xapian::Query (Xapian::Query::OP_WILDCARD, 
term_prefix));
+   } else {
+   return Xapian::Query (term_prefix);
+   }
+}
 
 if (str.at (0) == '/') {
-   if (str.at (str.size () - 1) == '/'){
-   RegexpPostingSource *postings = new RegexpPostingSource (slot, 
str.substr(1,str.size () - 2));
-   return Xapian::Query (postings->release ());
+   if (str.length() > 1 && str.at (str.size () - 1) == '/'){
+   std::string regexp_str = str.substr(1,str.size () - 2);
+   if (slot != Xapian::BAD_VALUENO) {
+   RegexpPostingSource *postings = new RegexpPostingSource (slot, 
regexp_str);
+   return Xapian::Query (postings->release ());
+   } else {
+   std::vector terms;
+   regex_t regexp;
+
+   compile_regex(regexp, regexp_str.c_str ());
+   for (Xapian::TermIterator it = 
notmuch->xapian_db->allterms_begin (term_prefix);
+it != notmuch->xapian_db->allterms_end (); ++it) {
+   if (regexec (, (*it).c_str (), 0, NULL, 0) == 0)
+   terms.push_back(*it);
+   }
+   return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), 
terms.end());
+   }
} else {
throw Xapian::QueryParserError ("unmatched regex delimiter in '" + 
str + "'");
}
-- 
2.11.0

___
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch


Re: revised foo:"" handling

2017-03-29 Thread David Bremner
Tomi Ollila  writes:

> On Sat, Mar 25 2017, David Bremner  wrote:
>
>> This obsoletes the first two patches of
>>
>>  id:20170318030303.17344-1-da...@tethera.net
>>  
>> I think this is a more meaningful interpretation than matching all messages.
>
> These changes look good (AFAIU). tests pass (debian unstable container on
> fedora 25 host)

I pushed those to release and master
___
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch


Re: [PATCH] NEWS: initial NEWS changes for 0.24.1

2017-03-29 Thread David Bremner
David Bremner  writes:

> I expect these to be updated as a few more patches are added to the release.

I pushed an updated version.
___
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch


Re: revised foo:"" handling

2017-03-29 Thread Tomi Ollila
On Sat, Mar 25 2017, David Bremner  wrote:

> This obsoletes the first two patches of
>
>  id:20170318030303.17344-1-da...@tethera.net
>  
> I think this is a more meaningful interpretation than matching all messages.

These changes look good (AFAIU). tests pass (debian unstable container on
fedora 25 host)

Tomi
___
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch


Re: "search --path=directory/" is lame(-ish)

2017-03-29 Thread David Bremner
David Edmondson  writes:

> Adding a terminal slash to a directory name when using --path causes the
> search to fail. Removing the terminal slash produces results.
>
> Given that many shells will add the terminal slash during completion,
> this is lame(-ish).

This would be relatively straightforward to impliment on top of

 id:20170324121436.28978-2-da...@tethera.net

In particular add a filter to strip trailing / in the non-regex case.

d
___
notmuch mailing list
notmuch@notmuchmail.org
https://notmuchmail.org/mailman/listinfo/notmuch