[PATCH v3 4/4] cli: Add an option to filter our duplicate addresses
This adds a --filter-by option to "notmuch search". It can be used to filter out duplicate addresses in --output=sender/receivers. The code here is an extended version of a patch from Jani Nikula. --- completion/notmuch-completion.bash | 6 ++- completion/notmuch-completion.zsh | 3 +- doc/man1/notmuch-search.rst| 32 + notmuch-search.c | 93 +++--- test/T095-search-filter-by.sh | 55 ++ 5 files changed, 181 insertions(+), 8 deletions(-) create mode 100755 test/T095-search-filter-by.sh diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash index cfbd389..41dd85b 100644 --- a/completion/notmuch-completion.bash +++ b/completion/notmuch-completion.bash @@ -305,12 +305,16 @@ _notmuch_search() COMPREPLY=( $( compgen -W "true false flag all" -- "${cur}" ) ) return ;; + --filter-by) + COMPREPLY=( $( compgen -W "addr addrfold name" -- "${cur}" ) ) + return + ;; esac ! $split && case "${cur}" in -*) - local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate=" + local options="--format= --output= --sort= --offset= --limit= --exclude= --duplicate= --filter-by=" compopt -o nospace COMPREPLY=( $(compgen -W "$options" -- ${cur}) ) ;; diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh index 3e52a00..17b345f 100644 --- a/completion/notmuch-completion.zsh +++ b/completion/notmuch-completion.zsh @@ -53,7 +53,8 @@ _notmuch_search() '--max-threads=[display only the first x threads from the search results]:number of threads to show: ' \ '--first=[omit the first x threads from the search results]:number of threads to omit: ' \ '--sort=[sort results]:sorting:((newest-first\:"reverse chronological order" oldest-first\:"chronological order"))' \ -'--output=[select what to output]:output:((summary threads messages files tags sender recipients))' +'--output=[select what to output]:output:((summary threads messages files tags sender recipients))' \ +'--filter-by=[filter out duplicate addresses]:filter-by:((addr\:"address part" addrfold\:"case-insensitive address part" name\:"name part"))' } _notmuch() diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst index c9d38b1..0fed76e 100644 --- a/doc/man1/notmuch-search.rst +++ b/doc/man1/notmuch-search.rst @@ -85,6 +85,9 @@ Supported options for **search** include (--format=text0), as a JSON array (--format=json), or as an S-Expression list (--format=sexp). +Handling of duplicate addresses and/or names can be +controlled with the --filter-by option. + Note: Searching for **sender** should be much faster than searching for **recipients**, because sender addresses are cached directly in the database whereas other addresses @@ -151,6 +154,35 @@ Supported options for **search** include prefix. The prefix matches messages based on filenames. This option filters filenames of the matching messages. +``--filter-by=``\ (**addr**\ \|\ **addrfold**\ \|\ **name**) + + Can be used with ``--output=sender`` or + ``--output=recipients`` to filter out duplicate addresses. The + filtering algorithm receives a sequence of email addresses and + outputs the same sequence without the addresses that are + considered a duplicate of a previously output address. What is + considered a duplicate depends on how two addresses are + compared and this can be controlled by the follwing flags: + + **addr** means that the address part is compared in + case-sensitive manner. For example, the addresses "John Doe + " and "Dr. John Doe " will + be considered duplicate. + + **addrfold** is similar to **addr**, but in addition to it + case folding is performed before comparison. For example, the + addresses "John Doe " and "Dr. John Doe + " will be considered duplicate. + + **name** means that the name part is compared in case-sensitive + manner. For example, the addresses "John Doe " + and "John Doe " will be considered duplicate. + + This option can be given multiple times to combine the effects + of the flags. For example, + ``--filter-by=name --filter-by=addr`` will print unique + case-sensitive combinations of both name and address parts. + EXIT STATUS === diff --git a/notmuch-search.c b/notmuch-search.c index 74588f8..df678ad 100644 --- a/notmuch-search.c +++ b/notmuch-search.c @@ -33,6 +33,12 @@ typedef enum { OUTPUT_ADDRESSES = OUTPUT_SENDER | OUTPUT_RECIPIENTS, } output_t; +typedef enum { +FILTER_FLAG_ADDR = 1 << 0, +FILTER_FLAG_NAME = 1 << 1, +
[PATCH v3 4/4] cli: Add an option to filter our duplicate addresses
This adds a --filter-by option to notmuch search. It can be used to filter out duplicate addresses in --output=sender/receivers. The code here is an extended version of a patch from Jani Nikula. --- completion/notmuch-completion.bash | 6 ++- completion/notmuch-completion.zsh | 3 +- doc/man1/notmuch-search.rst| 32 + notmuch-search.c | 93 +++--- test/T095-search-filter-by.sh | 55 ++ 5 files changed, 181 insertions(+), 8 deletions(-) create mode 100755 test/T095-search-filter-by.sh diff --git a/completion/notmuch-completion.bash b/completion/notmuch-completion.bash index cfbd389..41dd85b 100644 --- a/completion/notmuch-completion.bash +++ b/completion/notmuch-completion.bash @@ -305,12 +305,16 @@ _notmuch_search() COMPREPLY=( $( compgen -W true false flag all -- ${cur} ) ) return ;; + --filter-by) + COMPREPLY=( $( compgen -W addr addrfold name -- ${cur} ) ) + return + ;; esac ! $split case ${cur} in -*) - local options=--format= --output= --sort= --offset= --limit= --exclude= --duplicate= + local options=--format= --output= --sort= --offset= --limit= --exclude= --duplicate= --filter-by= compopt -o nospace COMPREPLY=( $(compgen -W $options -- ${cur}) ) ;; diff --git a/completion/notmuch-completion.zsh b/completion/notmuch-completion.zsh index 3e52a00..17b345f 100644 --- a/completion/notmuch-completion.zsh +++ b/completion/notmuch-completion.zsh @@ -53,7 +53,8 @@ _notmuch_search() '--max-threads=[display only the first x threads from the search results]:number of threads to show: ' \ '--first=[omit the first x threads from the search results]:number of threads to omit: ' \ '--sort=[sort results]:sorting:((newest-first\:reverse chronological order oldest-first\:chronological order))' \ -'--output=[select what to output]:output:((summary threads messages files tags sender recipients))' +'--output=[select what to output]:output:((summary threads messages files tags sender recipients))' \ +'--filter-by=[filter out duplicate addresses]:filter-by:((addr\:address part addrfold\:case-insensitive address part name\:name part))' } _notmuch() diff --git a/doc/man1/notmuch-search.rst b/doc/man1/notmuch-search.rst index c9d38b1..0fed76e 100644 --- a/doc/man1/notmuch-search.rst +++ b/doc/man1/notmuch-search.rst @@ -85,6 +85,9 @@ Supported options for **search** include (--format=text0), as a JSON array (--format=json), or as an S-Expression list (--format=sexp). +Handling of duplicate addresses and/or names can be +controlled with the --filter-by option. + Note: Searching for **sender** should be much faster than searching for **recipients**, because sender addresses are cached directly in the database whereas other addresses @@ -151,6 +154,35 @@ Supported options for **search** include prefix. The prefix matches messages based on filenames. This option filters filenames of the matching messages. +``--filter-by=``\ (**addr**\ \|\ **addrfold**\ \|\ **name**) + + Can be used with ``--output=sender`` or + ``--output=recipients`` to filter out duplicate addresses. The + filtering algorithm receives a sequence of email addresses and + outputs the same sequence without the addresses that are + considered a duplicate of a previously output address. What is + considered a duplicate depends on how two addresses are + compared and this can be controlled by the follwing flags: + + **addr** means that the address part is compared in + case-sensitive manner. For example, the addresses John Doe + j...@example.com and Dr. John Doe j...@example.com will + be considered duplicate. + + **addrfold** is similar to **addr**, but in addition to it + case folding is performed before comparison. For example, the + addresses John Doe j...@example.com and Dr. John Doe + j...@example.com will be considered duplicate. + + **name** means that the name part is compared in case-sensitive + manner. For example, the addresses John Doe m...@example.com + and John Doe j...@doe.name will be considered duplicate. + + This option can be given multiple times to combine the effects + of the flags. For example, + ``--filter-by=name --filter-by=addr`` will print unique + case-sensitive combinations of both name and address parts. + EXIT STATUS === diff --git a/notmuch-search.c b/notmuch-search.c index 74588f8..df678ad 100644 --- a/notmuch-search.c +++ b/notmuch-search.c @@ -33,6 +33,12 @@ typedef enum { OUTPUT_ADDRESSES = OUTPUT_SENDER | OUTPUT_RECIPIENTS, } output_t; +typedef enum { +FILTER_FLAG_ADDR =