v2 regexp search for mid/folder/path
This is mainly a rebase against current master, but also required an update to the (just pushed) empty query string handling Apparently there's at least 4 ways to test a C++ string for being empty, so here I try a different way of writing it. Interdiff follows: diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 26b22fe2..1598c17f 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -156,12 +156,17 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { -if (str.size () == 0) - return Xapian::Query(Xapian::Query::OP_AND_NOT, +if (str.empty ()) { + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + return Xapian::Query(Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix)); + } else { + return Xapian::Query (term_prefix); + } +} -if (str.length() > 0 && str.at (0) == '/') { +if (str.at (0) == '/') { if (str.length() > 1 && str.at (str.size () - 1) == '/'){ std::string regexp_str = str.substr(1,str.size () - 2); if (slot != Xapian::BAD_VALUENO) { ___ notmuch mailing list notmuch@notmuchmail.org https://notmuchmail.org/mailman/listinfo/notmuch
[PATCH 1/2] lib: Add regexp searching for mid: prefix
The bulk of the change is passing in the field options to the regexp field processor, so that we can properly handle the fallback (non-regexp case). --- lib/database.cc | 6 -- lib/regexp-fields.cc | 36 +--- lib/regexp-fields.h | 4 +++- test/T650-regexp-query.sh | 16 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 5bc131a3..49b3849c 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -262,7 +262,8 @@ prefix_t prefix_table[] = { { "tag", "K",NOTMUCH_FIELD_EXTERNAL }, { "is","K",NOTMUCH_FIELD_EXTERNAL }, { "id","Q",NOTMUCH_FIELD_EXTERNAL }, -{ "mid", "Q",NOTMUCH_FIELD_EXTERNAL }, +{ "mid", "Q",NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, { "path", "P",NOTMUCH_FIELD_EXTERNAL }, { "property", "XPROPERTY",NOTMUCH_FIELD_EXTERNAL }, /* @@ -313,7 +314,8 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch) else if (STRNCMP_LITERAL(prefix->name, "query") == 0) fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release (); else - fp = (new RegexpFieldProcessor (prefix->name, *notmuch->query_parser, notmuch))->release (); + fp = (new RegexpFieldProcessor (prefix->name, prefix->flags, + *notmuch->query_parser, notmuch))->release (); /* we treat all field-processor fields as boolean in order to get the raw input */ notmuch->query_parser->add_boolean_prefix (prefix->name, fp); diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 1651677c..7ae55e70 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -135,13 +135,21 @@ static inline Xapian::valueno _find_slot (std::string prefix) return NOTMUCH_VALUE_FROM; else if (prefix == "subject") return NOTMUCH_VALUE_SUBJECT; +else if (prefix == "mid") + return NOTMUCH_VALUE_MESSAGE_ID; else throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'"); } -RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser _, notmuch_database_t *notmuch_) - : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())), - parser (parser_), notmuch (notmuch_) +RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, + notmuch_field_flag_t options_, + Xapian::QueryParser _, + notmuch_database_t *notmuch_) + : slot (_find_slot (prefix)), + term_prefix (_find_prefix (prefix.c_str ())), + options (options_), + parser (parser_), + notmuch (notmuch_) { }; @@ -161,16 +169,22 @@ RegexpFieldProcessor::operator() (const std::string & str) throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'"); } } else { - /* TODO replace this with a nicer API level triggering of -* phrase parsing, when possible */ - std::string query_str; + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + /* TODO replace this with a nicer API level triggering of +* phrase parsing, when possible */ + std::string query_str; - if (str.find (' ') != std::string::npos) - query_str = '"' + str + '"'; - else - query_str = str; + if (str.find (' ') != std::string::npos) + query_str = '"' + str + '"'; + else + query_str = str; - return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix); + return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix); + } else { + /* Boolean prefix */ + std::string term = term_prefix + str; + return Xapian::Query (term); + } } } #endif diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h index a4ba7ad8..d5f93445 100644 --- a/lib/regexp-fields.h +++ b/lib/regexp-fields.h @@ -65,11 +65,13 @@ class RegexpFieldProcessor : public Xapian::FieldProcessor { protected: Xapian::valueno slot; std::string term_prefix; +notmuch_field_flag_t options; Xapian::QueryParser notmuch_database_t *notmuch; public: -RegexpFieldProcessor (std::string prefix, Xapian::QueryParser _, notmuch_database_t *notmuch_); +RegexpFieldProcessor (std::string prefix, notmuch_field_flag_t options, + Xapian::QueryParser _, notmuch_database_t *notmuch_); ~RegexpFieldProcessor () { }; diff --git
[PATCH 2/2] lib: Add regexp expansion for for tags and paths
From a ui perspective this looks similar to what was already provided for from, subject, and mid, but the implimentation is quite different. It uses the database's list of terms to construct a term based query equivalent to the passed regular expression. --- lib/database.cc | 12 lib/regexp-fields.cc | 31 +-- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/lib/database.cc b/lib/database.cc index 49b3849c..5b13f541 100644 --- a/lib/database.cc +++ b/lib/database.cc @@ -259,12 +259,15 @@ prefix_t prefix_table[] = { { "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "directory-direntry","XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS }, { "thread","G",NOTMUCH_FIELD_EXTERNAL }, -{ "tag", "K",NOTMUCH_FIELD_EXTERNAL }, -{ "is","K",NOTMUCH_FIELD_EXTERNAL }, +{ "tag", "K",NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, +{ "is","K",NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, { "id","Q",NOTMUCH_FIELD_EXTERNAL }, { "mid", "Q",NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, -{ "path", "P",NOTMUCH_FIELD_EXTERNAL }, +{ "path", "P",NOTMUCH_FIELD_EXTERNAL| + NOTMUCH_FIELD_PROCESSOR }, { "property", "XPROPERTY",NOTMUCH_FIELD_EXTERNAL }, /* * Unconditionally add ':' to reduce potential ambiguity with @@ -272,7 +275,8 @@ prefix_t prefix_table[] = { * letters. See Xapian document termprefixes.html for related * discussion. */ -{ "folder","XFOLDER:", NOTMUCH_FIELD_EXTERNAL }, +{ "folder","XFOLDER:", NOTMUCH_FIELD_EXTERNAL | + NOTMUCH_FIELD_PROCESSOR }, #if HAVE_XAPIAN_FIELD_PROCESSOR { "date", NULL, NOTMUCH_FIELD_EXTERNAL | NOTMUCH_FIELD_PROCESSOR }, diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc index 7ae55e70..1598c17f 100644 --- a/lib/regexp-fields.cc +++ b/lib/regexp-fields.cc @@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix) else if (prefix == "mid") return NOTMUCH_VALUE_MESSAGE_ID; else - throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'"); + return Xapian::BAD_VALUENO; } RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, @@ -156,15 +156,34 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::Query RegexpFieldProcessor::operator() (const std::string & str) { -if (str.size () == 0) - return Xapian::Query(Xapian::Query::OP_AND_NOT, +if (str.empty ()) { + if (options & NOTMUCH_FIELD_PROBABILISTIC) { + return Xapian::Query(Xapian::Query::OP_AND_NOT, Xapian::Query::MatchAll, Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix)); + } else { + return Xapian::Query (term_prefix); + } +} if (str.at (0) == '/') { - if (str.at (str.size () - 1) == '/'){ - RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2)); - return Xapian::Query (postings->release ()); + if (str.length() > 1 && str.at (str.size () - 1) == '/'){ + std::string regexp_str = str.substr(1,str.size () - 2); + if (slot != Xapian::BAD_VALUENO) { + RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str); + return Xapian::Query (postings->release ()); + } else { + std::vector terms; + regex_t regexp; + + compile_regex(regexp, regexp_str.c_str ()); + for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix); +it != notmuch->xapian_db->allterms_end (); ++it) { + if (regexec (, (*it).c_str (), 0, NULL, 0) == 0) + terms.push_back(*it); + } + return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end()); + } } else { throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'"); } -- 2.11.0 ___ notmuch mailing list notmuch@notmuchmail.org https://notmuchmail.org/mailman/listinfo/notmuch
Re: revised foo:"" handling
Tomi Ollilawrites: > On Sat, Mar 25 2017, David Bremner wrote: > >> This obsoletes the first two patches of >> >> id:20170318030303.17344-1-da...@tethera.net >> >> I think this is a more meaningful interpretation than matching all messages. > > These changes look good (AFAIU). tests pass (debian unstable container on > fedora 25 host) I pushed those to release and master ___ notmuch mailing list notmuch@notmuchmail.org https://notmuchmail.org/mailman/listinfo/notmuch
Re: [PATCH] NEWS: initial NEWS changes for 0.24.1
David Bremnerwrites: > I expect these to be updated as a few more patches are added to the release. I pushed an updated version. ___ notmuch mailing list notmuch@notmuchmail.org https://notmuchmail.org/mailman/listinfo/notmuch
Re: revised foo:"" handling
On Sat, Mar 25 2017, David Bremnerwrote: > This obsoletes the first two patches of > > id:20170318030303.17344-1-da...@tethera.net > > I think this is a more meaningful interpretation than matching all messages. These changes look good (AFAIU). tests pass (debian unstable container on fedora 25 host) Tomi ___ notmuch mailing list notmuch@notmuchmail.org https://notmuchmail.org/mailman/listinfo/notmuch
Re: "search --path=directory/" is lame(-ish)
David Edmondsonwrites: > Adding a terminal slash to a directory name when using --path causes the > search to fail. Removing the terminal slash produces results. > > Given that many shells will add the terminal slash during completion, > this is lame(-ish). This would be relatively straightforward to impliment on top of id:20170324121436.28978-2-da...@tethera.net In particular add a filter to strip trailing / in the non-regex case. d ___ notmuch mailing list notmuch@notmuchmail.org https://notmuchmail.org/mailman/listinfo/notmuch