commit ugrep for openSUSE:Factory

Source-Sync Mon, 05 Jun 2023 09:06:42 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package ugrep for openSUSE:Factory checked 
in at 2023-06-05 18:06:18
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/ugrep (Old)
 and      /work/SRC/openSUSE:Factory/.ugrep.new.15902 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "ugrep"

Mon Jun  5 18:06:18 2023 rev:42 rq:1090765 version:3.12.1

Changes:
--------
--- /work/SRC/openSUSE:Factory/ugrep/ugrep.changes      2023-04-08 
17:39:45.230283627 +0200
+++ /work/SRC/openSUSE:Factory/.ugrep.new.15902/ugrep.changes   2023-06-05 
18:06:23.651058005 +0200
@@ -1,0 +2,9 @@
+Sun Jun  4 18:18:13 UTC 2023 - Andreas Stieger <andreas.stie...@gmx.de>
+
+- update to 3.12.1:
+  * New ug+ and ugrep+ commands to search pdfs, documents and image
+    metadata when the corresponding tools are installed
+  * improved --filter option
+  * speed improvements
+
+-------------------------------------------------------------------

Old:
----
  ugrep-3.11.2.tar.gz

New:
----
  ugrep-3.12.1.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ ugrep.spec ++++++
--- /var/tmp/diff_new_pack.4NorG1/_old  2023-06-05 18:06:24.383062328 +0200
+++ /var/tmp/diff_new_pack.4NorG1/_new  2023-06-05 18:06:24.387062351 +0200
@@ -17,7 +17,7 @@
 
 
 Name:           ugrep
-Version:        3.11.2
+Version:        3.12.1
 Release:        0
 Summary:        Universal grep: a feature-rich grep implementation with focus 
on speed
 License:        BSD-3-Clause
@@ -39,7 +39,7 @@
 fuzzy search.
 
 %prep
-%setup -q
+%autosetup -p1
 
 %build
 %configure \

++++++ ugrep-3.11.2.tar.gz -> ugrep-3.12.1.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/Makefile.am new/ugrep-3.12.1/Makefile.am
--- old/ugrep-3.11.2/Makefile.am        2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/Makefile.am        2023-06-04 19:47:56.000000000 +0200
@@ -11,7 +11,7 @@
 
 .PHONY:                cp2bin
 
-# to copy the ugrep binary to the local ugrep/bin/ugrep and link ugrep/bin/ug
+# to copy the ugrep binary to the local ugrep/bin/ugrep and ugrep/bin/ug
 cp2bin:                $(top_builddir)/src/ugrep$(EXEEXT)
                rm -f $(top_builddir)/bin/ugrep$(EXEEXT)
                mkdir -p $(top_builddir)/bin; \
@@ -24,6 +24,7 @@
                @echo
 
 install-exec-hook:
+               cp -f $(top_builddir)/bin/ugrep+ $(top_builddir)/bin/ug+ 
$(DESTDIR)$(bindir)
                rm -f $(DESTDIR)$(bindir)/ug$(EXEEXT)
                cd $(DESTDIR)$(bindir) && cp -f ugrep$(EXEEXT) ug$(EXEEXT)
 
@@ -45,6 +46,16 @@
                 echo "| located in the working directory or home directory.  
|"; \
                 echo "| Run 'ug --save-config' to create a .ugrep file.      
|"; \
                 echo "|                                                      
|"; \
+                echo "| The ugrep+ and ug+ commands are the same as the      
|"; \
+                echo "| ugrep and ug commands, but also use filters to       
|"; \
+                echo "| search PDFs, documents, e-books, image metadata,     
|"; \
+                echo "| when these filter tools are installed:               
|"; \
+                echo "|                                                      
|"; \
+                echo "|   pdftotext    https://pypi.org/project/pdftotext    
|"; \
+                echo "|   antiword     https://github.com/rsdoiel/antiword   
|"; \
+                echo "|   pandoc       https://pandoc.org                    
|"; \
+                echo "|   exiftool     https://exiftool.sourceforge.net      
|"; \
+                echo "|                                                      
|"; \
                 echo "| Aliases to consider:                                 
|"; \
                 echo "|   alias uq     = 'ug -Q'                             
|"; \
                 echo "|   alias uz     = 'ug -z'                             
|"; \
@@ -67,7 +78,8 @@
                 echo 
"|______________________________________________________|";
 
 uninstall-hook:
-               rm -f $(DESTDIR)$(bindir)/ug$(EXEEXT)
+               rm -f $(DESTDIR)$(bindir)/ug$(EXEEXT) 
$(DESTDIR)$(bindir)/ugrep$(EXEEXT)
+               rm -f $(DESTDIR)$(bindir)/ug+ $(DESTDIR)$(bindir)/ugrep+
                rm -rf $(DESTDIR)$(datadir)/ugrep
 
 .PHONY:                test
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/Makefile.in new/ugrep-3.12.1/Makefile.in
--- old/ugrep-3.11.2/Makefile.in        2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/Makefile.in        2023-06-04 19:47:56.000000000 +0200
@@ -895,7 +895,7 @@
 
 .PHONY:                cp2bin
 
-# to copy the ugrep binary to the local ugrep/bin/ugrep and link ugrep/bin/ug
+# to copy the ugrep binary to the local ugrep/bin/ugrep and ugrep/bin/ug
 cp2bin:                $(top_builddir)/src/ugrep$(EXEEXT)
                rm -f $(top_builddir)/bin/ugrep$(EXEEXT)
                mkdir -p $(top_builddir)/bin; \
@@ -908,6 +908,7 @@
                @echo
 
 install-exec-hook:
+               cp -f $(top_builddir)/bin/ugrep+ $(top_builddir)/bin/ug+ 
$(DESTDIR)$(bindir)
                rm -f $(DESTDIR)$(bindir)/ug$(EXEEXT)
                cd $(DESTDIR)$(bindir) && cp -f ugrep$(EXEEXT) ug$(EXEEXT)
 
@@ -929,6 +930,16 @@
                 echo "| located in the working directory or home directory.  
|"; \
                 echo "| Run 'ug --save-config' to create a .ugrep file.      
|"; \
                 echo "|                                                      
|"; \
+                echo "| The ugrep+ and ug+ commands are the same as the      
|"; \
+                echo "| ugrep and ug commands, but also use filters to       
|"; \
+                echo "| search PDFs, documents, e-books, image metadata,     
|"; \
+                echo "| when these filter tools are installed:               
|"; \
+                echo "|                                                      
|"; \
+                echo "|   pdftotext    https://pypi.org/project/pdftotext    
|"; \
+                echo "|   antiword     https://github.com/rsdoiel/antiword   
|"; \
+                echo "|   pandoc       https://pandoc.org                    
|"; \
+                echo "|   exiftool     https://exiftool.sourceforge.net      
|"; \
+                echo "|                                                      
|"; \
                 echo "| Aliases to consider:                                 
|"; \
                 echo "|   alias uq     = 'ug -Q'                             
|"; \
                 echo "|   alias uz     = 'ug -z'                             
|"; \
@@ -951,7 +962,8 @@
                 echo 
"|______________________________________________________|";
 
 uninstall-hook:
-               rm -f $(DESTDIR)$(bindir)/ug$(EXEEXT)
+               rm -f $(DESTDIR)$(bindir)/ug$(EXEEXT) 
$(DESTDIR)$(bindir)/ugrep$(EXEEXT)
+               rm -f $(DESTDIR)$(bindir)/ug+ $(DESTDIR)$(bindir)/ugrep+
                rm -rf $(DESTDIR)$(datadir)/ugrep
 
 .PHONY:                test
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/README.md new/ugrep-3.12.1/README.md
--- old/ugrep-3.11.2/README.md  2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/README.md  2023-06-04 19:47:56.000000000 +0200
@@ -1,15 +1,17 @@
 [![build status][ci-image]][ci-url] [![license][bsd-3-image]][bsd-3-url]
 
-**ugrep v3.11 is now available: more features & even faster than before**
+Ugrep is like grep, but faster, user-friendly, and equipped with must-have 
features.  Ugrep's speed and features beat grep, ripgrep, silver searcher, ack, 
sift, etc.
 
-Search for anything in everything... ultra fast
+The ugrep tools include the following powerful commands:
+- **ug** for interactive use with a .ugrep configuration file with your 
preferences located in the working directory or home directory (run 'ug 
--save-config' to create a .ugrep file you can edit)
+- **ug+** for interactive use, also searches pdfs, documents, e-books, image 
metadata
+- **ugrep** for batch use
+- **ugrep+** for batch use, also searches pdfs, documents, e-books, image 
metadata
 
-*New option -Q opens a query TUI to search files as you type!*
+*Option -Q opens a query TUI to search files as you type!*
 <br>
 <img src="https://www.genivia.com/images/scranim.gif"; width="438" alt="">
 
-- Ultra fast with new match algorithms and features beating grep, ripgrep, 
silver searcher, ack, sift, etc.
-
 - Written in clean and efficient C++11 for advanced features and speed, 
thoroughly tested
 
 - Portable (Linux, Unix, MacOS, Windows, etc), includes binaries for Windows 
in the [releases](https://github.com/Genivia/ugrep/releases)
@@ -22,25 +24,25 @@
 
 - Built-in help: `ugrep --help WHAT` displays options related to `WHAT` you 
are looking for
 
-  > ð¡**ProTip** try `--help help`, `--help regex` and `--help globs`.
+  ð¡**ProTip** try `--help help`, `--help regex` and `--help globs`.
 
 - User-friendly with sensible defaults and customizable [configuration 
files](#config) used by the `ug` command intended for interactive use that 
loads a .ugrep configuration file with your preferences
 
       ug PATTERN ...                         ugrep --config PATTERN ...
 
-  > ð¡**ProTip** `ug --save-config ...options...` saves a .ugrep config file 
in the working directory.
+  ð¡**ProTip** `ug --save-config ...options...` saves a .ugrep config file 
in the working directory.
 
 - Interactive [query TUI](#query), press F1 or CTRL-Z for help and 
TAB/SHIFT-TAB to navigate to dirs and files
 
       ug -Q                                  ug -Q -e PATTERN    
 
-  > ð¡**ProTip** `-Q` replaces `PATTERN` on the command line to type your 
patterns interactively instead.  Specify `-e PATTERN` to search and edit the 
`PATTERN` in the TUI.  For quicker search responses to keypresses, try `-Q1` 
(fast, 100ms delay) to `-Q5` (default 500ms delay).
+  ð¡**ProTip** `-Q` replaces `PATTERN` on the command line to type your 
patterns interactively instead.  Specify `-e PATTERN` to search and edit the 
`PATTERN` in the TUI.  For quicker search responses to keypresses, try `-Q1` 
(fast, 100ms delay) to `-Q5` (default 500ms delay).
 
 - Find approximate pattern matches with [fuzzy search](#fuzzy), within the 
specified Levenshtein distance
 
       ug -Z PATTERN ...                      ug -Z3 PATTTERN ...
 
-  > ð¡**ProTip** `-Zn` matches up to `n` extra, missing or replaced 
characters, `-Z+n` matches up to `n` extra characters, `-Z-n` matches with up 
to `n` missing characters and `-Z~n` matches up to `n` replaced characters.  
`-Z` defaults to `-Z1`.
+  ð¡**ProTip** `-Zn` matches up to `n` extra, missing or replaced 
characters, `-Z+n` matches up to `n` extra characters, `-Z-n` matches with up 
to `n` missing characters and `-Z~n` matches up to `n` replaced characters.  
`-Z` defaults to `-Z1`.
 
 - Search with Google-like [Boolean query patterns](#bool) using `--bool` 
patterns with `AND` (or just space), `OR` (or a bar `|`), `NOT` (or a dash 
`-`), using quotes to match exactly, and grouping with `( )`; or with options 
`-e` (as an "or"), `--and`, `--andnot`, and `--not` regex patterns
 
@@ -52,29 +54,32 @@
 
   where `A`, `B` and `C` are arbitrary regex patterns (use option `-F` to 
search strings)
 
-  > ð¡**ProTip** specify `--files --bool` to apply the Boolean query to 
files as a whole: a file matches if all Boolean conditions are satisfied by 
matching patterns file-wide.  Otherwise, Boolean conditions apply to single 
lines by default, since grep utilities are generally line-based pattern 
matchers.  Option `--stats` displays the query in human-readable form after the 
search completes.
+  ð¡**ProTip** specify `--files --bool` to apply the Boolean query to files 
as a whole: a file matches if all Boolean conditions are satisfied by matching 
patterns file-wide.  Otherwise, Boolean conditions apply to single lines by 
default, since grep utilities are generally line-based pattern matchers.  
Option `--stats` displays the query in human-readable form after the search 
completes.
 
 - Fzf-like search with regex (or fixed strings with `-F`), fuzzy matching with 
up to 4 extra characters with `-Z+4` and words only with `-w`, using `--files 
--bool` for file-wide Boolean searches
 
       ug -Q1 --files --bool -l -w -Z+4 --sort=best
 
-  > ð¡**ProTip** `-l` lists the matching files in the TUI, press `TAB` then 
`ALT-y` to view a file, `SHIFT-TAB` and `Alt-l` to go back to view the list of 
matching files ordered by best match
+  ð¡**ProTip** `-l` lists the matching files in the TUI, press `TAB` then 
`ALT-y` to view a file, `SHIFT-TAB` and `Alt-l` to go back to view the list of 
matching files ordered by best match
 
 - Search the contents of [archives](#archives) (cpio, jar, tar, pax, zip) and 
[compressed files](#archives) (zip, gz, Z, bz, bz2, lzma, xz, lz4, zstd)
 
       ug -z PATTERN ...                      ug -z --zmax=2 PATTERN ...
 
-  > ð¡**ProTip** specify `-z --zmax=2` to search compressed files and 
archives nested within archives, e.g. to search zip files stored in 
(compressed) tar files.  The `--zmax` argument may range from 1 (default) to 99 
for up to 99 decompression and de-archiving steps, far more than you will ever 
need!  Larger `--zmax` slows searching.
+  ð¡**ProTip** specify `-z --zmax=2` to search compressed files and archives 
nested within archives, e.g. to search zip files stored in (compressed) tar 
files.  The `--zmax` argument may range from 1 (default) to 99 for up to 99 
decompression and de-archiving steps, far more than you will ever need!  Larger 
`--zmax` slows searching.
 
-- Search pdf, doc, docx, xls, xlxs, and more [using filters](#filter)
+- Search pdf, doc, docx, e-book, and more with `ug+` [using filters](#filter) 
associated with filename extensions:
 
+      ug+ PATTERN ...
       ug --filter='pdf:pdftotext % -' PATTERN ...
+      ug --filter='doc:antiword %' PATTERN ...
+      ug --filter='odt,docx,epub,rtf:pandoc --wrap=preserve -t markdown % -o 
-' PATTERN ...
       ug --filter='odt,doc,docx,rtf,xls,xlsx,ppt,pptx:soffice --headless --cat 
%' PATTERN ...
       ug --filter='pem:openssl x509 -text,cer,crt,der:openssl x509 -text 
-inform der' PATTERN ...
       ug --filter='latin1:iconv -f LATIN1 -t UTF-8' PATTERN ...
       ug --filter='7z:7z x -so -si' PATTERN ...
 
-  > ð¡**ProTip** filters are selected based on the specified list of 
filename extensions.  Filters can be any commands (including your own scripts 
and executables) that take standard input to produce standard output.
+  ð¡**ProTip** the `ug+` command is the same as the `ug` command, but also 
uses filters to search PDFs, documents, and image metadata, when the 
[`pdftotext`](https://pypi.org/project/pdftotext), 
[`antiword`](https://github.com/rsdoiel/antiword), 
[`pandoc`](https://pandoc.org), and 
[`exiftool`](https://exiftool.sourceforge.net) are installed (optionally, not 
used when not installed).
 
 - Search [binary files](#binary) and display hexdumps with binary pattern 
matches (Unicode text or `-U` for byte patterns)
 
@@ -82,7 +87,7 @@
       ug -X -U BYTEPATTERN ...               ug -X TEXTPATTERN ...
       ug -W -U BYTEPATTERN ...               ug -W TEXTPATTERN ...
 
-  > ð¡**ProTip** `--hexdump=4chC1` displays `4` columns of hex without a 
character column `c`, no hex spacing `h`, and with one extra hex line `C1` 
before and after a match.  Option `-X` is the same as `--hexdump=2C` with `2` 
columns of hex and the whole matching line as `C` context in hex.
+  ð¡**ProTip** `--hexdump=4chC1` displays `4` columns of hex without a 
character column `c`, no hex spacing `h`, and with one extra hex line `C1` 
before and after a match.  Option `-X` is the same as `--hexdump=2C` with `2` 
columns of hex and the whole matching line as `C` context in hex.
 
 - Include files to search by [filename extensions](#magic) or exclude them 
with `^`
 
@@ -129,7 +134,7 @@
       ug --csv PATTERN ...                   ug --json PATTERN ...
       ug --xml PATTERN ...                   ug --format='file=%f line=%n 
match=%O%~' PATTERN ...
 
-  > ð¡**ProTip** `ug --help format` displays help on format `%` fields.
+  ð¡**ProTip** `ug --help format` displays help on format `%` fields.
 
 - Search with PCRE's Perl-compatible regex patterns and display or replace 
[subpattern matches](#replace)
 
@@ -141,7 +146,7 @@
       ug --replace='(%m:%o)' PATTERN ...     ug -y --replace='(%m:%o)' PATTERN 
...
       ug -P --replace='%1' PATTERN ...       ug -y -P --replace='%1' PATTERN 
...
 
-  > ð¡**ProTip** `ug --help format` displays help on format `%` fields to 
optionally use with `--replace`.
+  ð¡**ProTip** `ug --help format` displays help on format `%` fields to 
optionally use with `--replace`.
 
 - Search files with a specific [encoding](#encoding) format such as ISO-8859-1 
thru 16, CP 437, CP 850, MACROMAN, KOI8, etc.
 
@@ -210,7 +215,7 @@
 
 ### Homebrew for MacOS (and Linux)
 
-Install the latest **ugrep** with [Homebrew](https://brew.sh):
+Install the latest ugrep commands with [Homebrew](https://brew.sh):
 
     $ brew install ugrep
 
@@ -443,7 +448,7 @@
 
 ð [Back to table of contents](#toc)
 
-<a name="speed">
+<a name="speed"/>
 
 Performance comparisons
 -----------------------
@@ -2388,7 +2393,7 @@
 
 ð [Back to table of contents](#toc)
 
-<a name="hidden">
+<a name="hidden"/>
 
 ### Search hidden files with -.
 
@@ -2410,15 +2415,16 @@
             Filter files through the specified COMMANDS first before searching.
             COMMANDS is a comma-separated list of `exts:command [option ...]',
             where `exts' is a comma-separated list of filename extensions and
-            `command' is a filter utility.  The filter utility should read from
-            standard input and write to standard output.  Files matching one of
-            `exts' are filtered.  When `exts' is `*', files with non-matching
-            extensions are filtered.  One or more `option' separated by spacing
-            may be specified, which are passed verbatim to the command.  A `%'
-            as `option' expands into the pathname to search.  For example,
-            --filter='pdf:pdftotext % -' searches PDF files.  The `%' expands
-            into a `-' when searching standard input.  Option --label=.ext may
-            be used to specify extension `ext' when searching standard input.
+            `command' is a filter utility.  Files matching one of `exts' are
+            filtered.  When `exts' is a `*', all files are filtered.  One or
+            more `option' separated by spacing may be specified, which are
+            passed verbatim to the command.  A `%' as `option' expands into the
+            pathname to search.  For example, --filter='pdf:pdftotext % -'
+            searches PDF files.  The `%' expands into a `-' when searching
+            standard input.  When a `%' is not specified, a filter utility
+            should read from standard input and write to standard output.
+            Option --label=.ext may be used to specify extension `ext' when
+            searching standard input.  This option may be repeated.
     --filter-magic-label=LABEL:MAGIC
             Associate LABEL with files whose signature "magic bytes" match the
             MAGIC regex pattern.  Only files that have no filename extension
@@ -2439,17 +2445,22 @@
 
 Common filter utilities are `cat` (concat, pass through), `head` (select first
 lines or bytes) `tr` (translate), `iconv` and `uconv` (convert), and more
-advanced utilities such as:
+advanced utilities, such as:
 
-- [`pdftotext`](https://pypi.org/project/pdftotext) to convert PDF to text
+- [`pdftotext`](https://pypi.org/project/pdftotext) to convert pdf to text
+- [`antiword`](https://github.com/rsdoiel/antiword) to convert doc to text
 - [`pandoc`](https://pandoc.org) to convert .docx, .epub, and other document
   formats
+- [`exiftool`](https://exiftool.sourceforge.net) to read meta information
+  embedded in image and video media formats.
 - [`soffice`](https://www.libreoffice.org) to convert office documents
 - [`csvkit`](https://pypi.org/project/csvkit) to convert spreadsheets
 - [`openssl`](https://wiki.openssl.org/index.php/Command_Line_Utilities) to
   convert certificates and key files to text and other formats
-- [`exiftool`](http://exiftool.sourceforge.net) to read meta information
-  embedded in image and video media formats.
+
+The `ugrep+` and `ug+` commands use the `pdftotext`, `antiword`, `pandoc` and
+`exiftool` filters, when installed, to search pdfs, documents, e-books, and
+image metadata.
 
 Also decompressors may be used as filter utilities, such as `unzip`, `gunzip`,
 `bunzip2`, `unlzma`, `unxz`, `lzop` and `7z` that decompress files to standard
@@ -2459,11 +2470,11 @@
     ug --filter='gz:gunzip -d --stdout -' ...
     ug --filter='7z:7z x -so %' ...
 
-The `--filter='lzo:lzop -d --stdout -' option decompresses files with extension
-`lzo` to standard output with `--stdout` with the compressed stream being read
-from standard input with `-`.  The `--filter='7z:7z x -so -si` option
-decompresses files with extension `7z` to standard output `-so` while reading
-standard input `-si` with the compressed file contents.
+The `--filter='lzo:lzop -d --stdout -'` option decompresses files with
+extension `lzo` to standard output with `--stdout` with the compressed stream
+being read from standard input with `-`.  The `--filter='7z:7z x -so -si`
+option decompresses files with extension `7z` to standard output `-so` while
+reading standard input `-si` with the compressed file contents.
 
 Note that **ugrep** option `-z` is typically faster to search compressed files
 compared to `--filter`.
@@ -2531,7 +2542,7 @@
 **Important:** the `soffice` utility will not output any text when one or more
 LibreOffice GUIs are open.  Make sure to quit all LibreOffice apps first.  This
 looks like a bug, but the LibreOffice developers do not appear to fix this
-any time soon (unless perhaps more people complain.)
+any time soon (unless perhaps more people complain?)
 
 To recursively search and display rows of .csv, .xls, and .xlsx spreadsheets
 that contain `10/6` using the `in2csv` filter of csvkit:
@@ -2667,7 +2678,7 @@
 
 ð [Back to table of contents](#toc)
 
-<a name="nobinary">
+<a name="nobinary"/>
 
 ### Ignore binary files with -I
 
@@ -3877,6 +3888,10 @@
            see CONFIGURATION.  ug is equivalent to ugrep --config and sorts 
files by
            name by default.
 
+           The ugrep+ and ug+ commands are the same as the ugrep and ug 
commands,
+           but also use filters to search pdfs, documents, e-books, and image
+           metadata, when the corresponding filter tools are installed.
+
            ugrep accepts input of various encoding formats and normalizes the 
output
            to UTF-8.  When a UTF byte order mark is present in the input, the 
input
            is automatically normalized; otherwise, ugrep assumes the input is 
ASCII,
@@ -4157,16 +4172,17 @@
                   Filter files through the specified COMMANDS first before
                   searching.  COMMANDS is a comma-separated list of 
`exts:command
                   [option ...]', where `exts' is a comma-separated list of 
filename
-                  extensions and `command' is a filter utility.  The filter 
utility
-                  should read from standard input and write to standard output.
-                  Files matching one of `exts' are filtered.  When `exts' is 
`*',
-                  files with non-matching extensions are filtered.  One or more
-                  `option' separated by spacing may be specified, which are 
passed
-                  verbatim to the command.  A `%' as `option' expands into the
-                  pathname to search.  For example, --filter='pdf:pdftotext % 
-'
-                  searches PDF files.  The `%' expands into a `-' when 
searching
-                  standard input.  Option --label=.ext may be used to specify
-                  extension `ext' when searching standard input.
+                  extensions and `command' is a filter utility.  Files 
matching one
+                  of `exts' are filtered.  When `exts' is a `*', all files are
+                  filtered.  One or more `option' separated by spacing may be
+                  specified, which are passed verbatim to the command.  A `%' 
as
+                  `option' expands into the pathname to search.  For example,
+                  --filter='pdf:pdftotext % -' searches PDF files.  The `%' 
expands
+                  into a `-' when searching standard input.  When a `%' is not
+                  specified, a filter utility should read from standard input 
and
+                  write to standard output.  Option --label=.ext may be used to
+                  specify extension `ext' when searching standard input.  This
+                  option may be repeated.
 
            --filter-magic-label=[+]LABEL:MAGIC
                   Associate LABEL with files whose signature "magic bytes" 
match the
@@ -5263,7 +5279,7 @@
 
 
 
-    ugrep 3.11.2                      April 7, 2023                         
UGREP(1)
+    ugrep 3.12.0                      June 3, 2023                          
UGREP(1)
 
 ð [Back to table of contents](#toc)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/bin/ug+ new/ugrep-3.12.1/bin/ug+
--- old/ugrep-3.11.2/bin/ug+    1970-01-01 01:00:00.000000000 +0100
+++ new/ugrep-3.12.1/bin/ug+    2023-06-04 19:47:56.000000000 +0200
@@ -0,0 +1,15 @@
+#!/bin/bash
+filters=
+if [ -x "$(command -v pdftotext)" ] && pdftotext --help 2>&1 | ugrep -qw 
Poppler ; then
+  filters="${filters}${filters:+,}pdf:pdftotext % -"
+fi
+if [ -x "$(command -v antiword)" ] && antiword 2>&1 | ugrep -qw Adri ; then
+  filters="${filters}${filters:+,}doc:antiword %"
+fi
+if [ -x "$(command -v pandoc)" ] && pandoc --version 2>&1 | ugrep -qw 
pandoc.org ; then
+  filters="${filters}${filters:+,}odt,docx,epub,rtf:pandoc --wrap=preserve -t 
markdown % -o -"
+fi
+if [ -x "$(command -v exiftool)" ] ; then
+  filters="${filters}${filters:+,}gif,jpg,jpeg,mpg,mpeg,png,tiff:exiftool %"
+fi
+ug --filter="${filters}" "$@"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/bin/ugrep+ new/ugrep-3.12.1/bin/ugrep+
--- old/ugrep-3.11.2/bin/ugrep+ 1970-01-01 01:00:00.000000000 +0100
+++ new/ugrep-3.12.1/bin/ugrep+ 2023-06-04 19:47:56.000000000 +0200
@@ -0,0 +1,15 @@
+#!/bin/bash
+filters=
+if [ -x "$(command -v pdftotext)" ] && pdftotext --help 2>&1 | ugrep -qw 
Poppler ; then
+  filters="${filters}${filters:+,}pdf:pdftotext % -"
+fi
+if [ -x "$(command -v antiword)" ] && antiword 2>&1 | ugrep -qw Adri ; then
+  filters="${filters}${filters:+,}doc:antiword %"
+fi
+if [ -x "$(command -v pandoc)" ] && pandoc --version 2>&1 | ugrep -qw 
pandoc.org ; then
+  filters="${filters}${filters:+,}odt,docx,epub,rtf:pandoc --wrap=preserve -t 
markdown % -o -"
+fi
+if [ -x "$(command -v exiftool)" ] ; then
+  filters="${filters}${filters:+,}gif,jpg,jpeg,mpg,mpeg,png,tiff:exiftool %"
+fi
+ugrep --filter="${filters}" "$@"
Binary files old/ugrep-3.11.2/bin/win32/ugrep.exe and 
new/ugrep-3.12.1/bin/win32/ugrep.exe differ
Binary files old/ugrep-3.11.2/bin/win64/ugrep.exe and 
new/ugrep-3.12.1/bin/win64/ugrep.exe differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/build.sh new/ugrep-3.12.1/build.sh
--- old/ugrep-3.11.2/build.sh   2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/build.sh   2023-06-04 19:47:56.000000000 +0200
@@ -56,13 +56,48 @@
 exit 1
 fi
 
+echo "Checking whether ug+ and ugrep+ search pdfs, documents, image metadata:"
+if [ -x "$(command -v pdftotext)" ] && pdftotext --help 2>&1 | src/ugrep -qw 
Poppler ; then
+  echo "pdf: yes"
+else
+  echo "pdf: no, requires pdftotext"
+fi
+if [ -x "$(command -v antiword)" ] && antiword 2>&1 | src/ugrep -qw Adri ; then
+  echo "doc: yes"
+else
+  echo "doc: no, requires antiword"
+fi
+if [ -x "$(command -v pandoc)" ] && pandoc --version 2>&1 | src/ugrep -qw 
pandoc.org ; then
+  echo "docx: yes"
+  echo "epub: yes"
+  echo "odt: yes"
+  echo "rtf: yes"
+else
+  echo "docx: no, requires pandoc"
+  echo "epub: no, requires pandoc"
+  echo "odt: no, requires pandoc"
+  echo "rtf: no, requires pandoc"
+fi
+if [ -x "$(command -v exiftool)" ] ; then
+  echo "gif: yes"
+  echo "jpg: yes"
+  echo "mpg: yes"
+  echo "png: yes"
+  echo "tiff: yes"
+else
+  echo "gif: no, requires exiftool"
+  echo "jpg: no, requires exiftool"
+  echo "mpg: no, requires exiftool"
+  echo "png: no, requires exiftool"
+  echo "tiff: no, requires exiftool"
+fi
+
 echo
-echo "ugrep was successfully built in ugrep/bin and tested:"
-ls -l bin/ug bin/ugrep
+echo "ugrep was successfully built in $(pwd)/bin and tested:"
+ls -l bin/ug bin/ug+ bin/ugrep bin/ugrep+
 echo
-echo "Copy ugrep/bin/ugrep and ugrep/bin/ug to a bin/ on your PATH"
+echo "Copy bin/ug, bin/ug+, bin/ugrep, and bin/ugrep+ to a bin/ on your PATH"
 echo
-echo "Or install ugrep and ug on your system by executing:"
+echo "Or install the ugrep tools on your system by executing:"
 echo "sudo make install"
 echo
-
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/include/reflex/abslexer.h 
new/ugrep-3.12.1/include/reflex/abslexer.h
--- old/ugrep-3.11.2/include/reflex/abslexer.h  2023-04-07 23:00:04.000000000 
+0200
+++ new/ugrep-3.12.1/include/reflex/abslexer.h  2023-06-04 19:47:56.000000000 
+0200
@@ -130,7 +130,7 @@
   {
     in_ = input;
     if (has_matcher())
-      matcher().input(input); // reset and assign new input
+      matcher().input(in_); // reset and assign new input
     return *this;
   }
   /// Reset the matcher and start scanning from the given byte sequence.
@@ -346,7 +346,7 @@
   {
     return matcher().columno();
   }
-#if defined(WITH_SPAN)
+#if WITH_SPAN
   /// Returns the number of bytes from the begin of line of the match.
   inline size_t border() const
     /// @returns border offset
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/include/reflex/pattern.h 
new/ugrep-3.12.1/include/reflex/pattern.h
--- old/ugrep-3.11.2/include/reflex/pattern.h   2023-04-07 23:00:04.000000000 
+0200
+++ new/ugrep-3.12.1/include/reflex/pattern.h   2023-06-04 19:47:56.000000000 
+0200
@@ -363,7 +363,7 @@
     }
     return true;
   }
-  /// Returns zero when match is predicted or nonzero shift value, based on 
s[0..3].
+  /// Returns zero when match is predicted (removed shift distance return code)
   static inline size_t predict_match(const Pred pma[], const char *s)
   {
     uint8_t b0 = s[0];
@@ -379,15 +379,7 @@
     Pred a3 = pma[h3];
     Pred p = (a0 & 0xc0) | (a1 & 0x30) | (a2 & 0x0c) | (a3 & 0x03);
     Pred m = ((((((p >> 2) | p) >> 2) | p) >> 1) | p);
-    if (m != 0xff)
-      return 0;
-    if ((pma[b1] & 0xc0) != 0xc0)
-      return 1;
-    if ((pma[b2] & 0xc0) != 0xc0)
-      return 2;
-    if ((pma[b3] & 0xc0) != 0xc0)
-      return 3;
-    return 4;
+    return m == 0xff;
   }
  protected:
   /// Throw an error.
@@ -1109,6 +1101,7 @@
   float                 vms_; ///< ms elapsed time to compile DFA vertices
   float                 ems_; ///< ms elapsed time to compile DFA edges
   float                 wms_; ///< ms elapsed time to assemble code words
+  size_t                npy_; ///< entropy derived from bitap array
   bool                  one_; ///< true if matching one string in pre_[] 
without meta/anchors
 };
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/lib/matcher.cpp 
new/ugrep-3.12.1/lib/matcher.cpp
--- old/ugrep-3.11.2/lib/matcher.cpp    2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/lib/matcher.cpp    2023-06-04 19:47:56.000000000 +0200
@@ -640,150 +640,140 @@
       if (loc + min > end_)
         return false;
     }
-    if (min >= 4)
+    const Pattern::Pred *pma = pat_->pma_;
+    if (min >= 2 && pat_->npy_ <= 16)
     {
-      const Pattern::Pred *bit = pat_->bit_;
-      Pattern::Pred state = ~0;
-      Pattern::Pred mask = (1 << (min - 1));
-      while (true)
+      if (min >= 4)
       {
-        const char *s = buf_ + loc;
-        const char *e = buf_ + end_;
-        while (s < e)
-        {
-          state = (state << 1) | bit[static_cast<uint8_t>(*s)];
-          if ((state & mask) == 0)
-            break;
-          ++s;
-        }
-        if (s < e)
-        {
-          s -= min - 1;
-          loc = s - buf_;
-          if (Pattern::predict_match(pat_->pmh_, s, min))
+        const Pattern::Pred *bit = pat_->bit_;
+        Pattern::Pred state = ~0;
+        Pattern::Pred mask = (1 << (min - 1));
+        while (true)
+        {
+          const char *s = buf_ + loc;
+          const char *e = buf_ + end_;
+          while (s < e)
           {
-            set_current(loc);
-            return true;
+            state = (state << 1) | bit[static_cast<uint8_t>(*s)];
+            if ((state & mask) == 0)
+              break;
+            ++s;
+          }
+          if (s < e)
+          {
+            s -= min - 1;
+            loc = s - buf_;
+            if (Pattern::predict_match(pat_->pmh_, s, min))
+            {
+              set_current(loc);
+              return true;
+            }
+            loc += min;
+          }
+          else
+          {
+            loc = s - buf_;
+            set_current_match(loc - min);
+            (void)peek_more();
+            loc = cur_ + min;
+            if (loc >= end_)
+              return false;
           }
-          loc += min;
-        }
-        else
-        {
-          loc = s - buf_;
-          set_current_match(loc - min);
-          (void)peek_more();
-          loc = cur_ + min;
-          if (loc >= end_)
-            return false;
         }
       }
-    }
-    const Pattern::Pred *pma = pat_->pma_;
-    if (min == 3)
-    {
-      const Pattern::Pred *bit = pat_->bit_;
-      Pattern::Pred state = ~0;
-      while (true)
+      if (min == 3)
       {
-        const char *s = buf_ + loc;
-        const char *e = buf_ + end_;
-        while (s < e)
-        {
-          state = (state << 1) | bit[static_cast<uint8_t>(*s)];
-          if ((state & 4) == 0)
-            break;
-          ++s;
-        }
-        if (s < e)
-        {
-          s -= 2;
-          loc = s - buf_;
-          if (s + 4 > e || Pattern::predict_match(pma, s) == 0)
+        const Pattern::Pred *bit = pat_->bit_;
+        Pattern::Pred state = ~0;
+        while (true)
+        {
+          const char *s = buf_ + loc;
+          const char *e = buf_ + end_;
+          while (s < e)
           {
-            set_current(loc);
-            return true;
+            state = (state << 1) | bit[static_cast<uint8_t>(*s)];
+            if ((state & 4) == 0)
+              break;
+            ++s;
+          }
+          if (s < e)
+          {
+            s -= 2;
+            loc = s - buf_;
+            if (s + 4 > e || Pattern::predict_match(pma, s) == 0)
+            {
+              set_current(loc);
+              return true;
+            }
+            loc += 3;
+          }
+          else
+          {
+            loc = s - buf_;
+            set_current_match(loc - 3);
+            (void)peek_more();
+            loc = cur_ + 3;
+            if (loc >= end_)
+              return false;
           }
-          loc += 3;
-        }
-        else
-        {
-          loc = s - buf_;
-          set_current_match(loc - 3);
-          (void)peek_more();
-          loc = cur_ + 3;
-          if (loc >= end_)
-            return false;
         }
       }
-    }
-    if (min == 2)
-    {
-      const Pattern::Pred *bit = pat_->bit_;
-      Pattern::Pred state = ~0;
-      while (true)
+      if (min == 2)
       {
-        const char *s = buf_ + loc;
-        const char *e = buf_ + end_;
-        while (s < e)
-        {
-          state = (state << 1) | bit[static_cast<uint8_t>(*s)];
-          if ((state & 2) == 0)
-            break;
-          ++s;
-        }
-        if (s < e)
-        {
-          s -= 1;
-          loc = s - buf_;
-          if (s + 4 > e || Pattern::predict_match(pma, s) == 0)
+        const Pattern::Pred *bit = pat_->bit_;
+        Pattern::Pred state = ~0;
+        while (true)
+        {
+          const char *s = buf_ + loc;
+          const char *e = buf_ + end_;
+          while (s < e)
           {
-            set_current(loc);
-            return true;
+            state = (state << 1) | bit[static_cast<uint8_t>(*s)];
+            if ((state & 2) == 0)
+              break;
+            ++s;
+          }
+          if (s < e)
+          {
+            s -= 1;
+            loc = s - buf_;
+            if (s + 4 > e || Pattern::predict_match(pma, s) == 0)
+            {
+              set_current(loc);
+              return true;
+            }
+            loc += 2;
+          }
+          else
+          {
+            loc = s - buf_;
+            set_current_match(loc - 2);
+            (void)peek_more();
+            loc = cur_ + 2;
+            if (loc >= end_)
+              return false;
           }
-          loc += 2;
-        }
-        else
-        {
-          loc = s - buf_;
-          set_current_match(loc - 2);
-          (void)peek_more();
-          loc = cur_ + 2;
-          if (loc >= end_)
-            return false;
         }
       }
     }
     while (true)
     {
       const char *s = buf_ + loc;
-      const char *e = buf_ + end_;
-      while (s < e && (pma[static_cast<uint8_t>(*s)] & 0xc0) == 0xc0)
+      const char *e = buf_ + end_ - 3;
+      while (s < e && Pattern::predict_match(pma, s))
         ++s;
-      if (s < e)
+      if (s < e + 3)
       {
         loc = s - buf_;
-        if (s + 4 > e)
-        {
-          set_current(loc);
-          return true;
-        }
-        size_t k = Pattern::predict_match(pma, s);
-        if (k == 0)
-        {
-          set_current(loc);
-          return true;
-        }
-        loc += k;
-      }
-      else
-      {
-        loc = s - buf_;
-        set_current_match(loc - 1);
-        (void)peek_more();
-        loc = cur_ + 1;
-        if (loc >= end_)
-          return false;
+        set_current(loc);
+        return true;
       }
+      loc = s - buf_;
+      set_current_match(loc - 1);
+      (void)peek_more();
+      loc = cur_ + 1;
+      if (loc >= end_)
+        return false;
     }
   }
   const char *pre = pat_->pre_;
@@ -799,19 +789,34 @@
       {
         loc = s - buf_;
         set_current(loc);
-        return true;
+        if (min == 0)
+          return true;
+        if (min >= 4)
+        {
+          if (s + 1 + min > e || Pattern::predict_match(pat_->pmh_, s + 1, 
min))
+            return true;
+        }
+        else
+        {
+          if (s + 5 > e || Pattern::predict_match(pat_->pma_, s + 1) == 0)
+            return true;
+        }
+        ++loc;
+      }
+      else
+      {
+        loc = e - buf_;
+        set_current_match(loc - 1);
+        (void)peek_more();
+        loc = cur_ + 1;
+        if (loc + len > end_)
+          return false;
       }
-      loc = e - buf_;
-      set_current_match(loc - 1);
-      (void)peek_more();
-      loc = cur_ + 1;
-      if (loc + len > end_)
-        return false;
     }
   }
   if (bmd_ == 0)
   {
-    // Boyer-Moore preprocessing of the given pattern pat of length len, 
generates bmd_ > 0 and bms_[] shifts.
+    // Boyer-Moore preprocessing of the given string pattern pat of length 
len, generates bmd_ > 0 and bms_[] shifts.
     // updated relative frequency table of English letters (with 
upper/lower-case ratio = 0.0563), punctuation and UTF-8 bytes
     static unsigned char freq[256] =
       // x64 binary ugrep.exe frequencies combined with ASCII TAB/LF/CR 
control code frequencies
@@ -867,7 +872,7 @@
       if (pre[j - 1] == pre[i])
         break;
     bmd_ = i - j + 1;
-#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) || 
defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2 || 
defined(HAVE_NEON)
+#if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2) || 
defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2 || 
!defined(HAVE_NEON)
     size_t score = 0;
     for (i = 0; i < n; ++i)
       score += bms_[static_cast<uint8_t>(pre[i])];
@@ -876,21 +881,18 @@
 #if defined(HAVE_AVX512BW) || defined(HAVE_AVX2) || defined(HAVE_SSE2)
     if (!have_HW_SSE2() && !have_HW_AVX2() && !have_HW_AVX512BW())
     {
-      // if scoring is high and freq is high, then use our improved 
Boyer-Moore instead
-#if defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
-      // SSE2 is available, expect fast memchr()
-      if (score > 1 && fch > 35 && (score > 3 || fch > 50) && fch + score > 52)
-        lcs_ = 0xffff;
-#else
-      // no SSE2 available, expect slow memchr()
-      if (fch > 37 || (fch > 8 && score > 0))
-        lcs_ = 0xffff;
-#endif
+      // SSE2/AVX2 not available: if B-M scoring is high and freq is high, 
then use our improved Boyer-Moore
+      if (score > 1 && fch > 35 && (score > 4 || fch > 50) && fch + score > 52)
+        lcs_ = 0xffff; // force B-M
     }
-#elif defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2 || 
defined(HAVE_NEON)
-    // SIMD is available, if scoring is high and freq is high, then use our 
improved Boyer-Moore
+#elif defined(__SSE2__) || defined(__x86_64__) || _M_IX86_FP == 2
+    // SSE2 is available: only if B-M scoring is high and freq is high, then 
use our improved Boyer-Moore
+    if (score > 1 && fch > 35 && (score > 4 || fch > 50) && fch + score > 52)
+      lcs_ = 0xffff; // force B-M
+#elif !defined(HAVE_NEON)
+    // no SIMD available: if B-M scoring is high and freq is high, then use 
our improved Boyer-Moore
     if (score > 1 && fch > 35 && (score > 3 || fch > 50) && fch + score > 52)
-      lcs_ = 0xffff;
+      lcs_ = 0xffff; // force B-M
 #endif
 #endif
   }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/lib/pattern.cpp 
new/ugrep-3.12.1/lib/pattern.cpp
--- old/ugrep-3.11.2/lib/pattern.cpp    2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/lib/pattern.cpp    2023-06-04 19:47:56.000000000 +0200
@@ -168,6 +168,7 @@
   nop_ = 0;
   len_ = 0;
   min_ = 0;
+  npy_ = 0;
   one_ = false;
   vno_ = 0;
   eno_ = 0;
@@ -191,6 +192,21 @@
           for (size_t i = 0; i < 256; ++i)
             bit_[i] = ~pred[i + n];
           n += 256;
+          npy_ = 0;
+          for (Char i = 0; i < 256; ++i)
+          {
+            bit_[i] |= ~((1 << min_) - 1);
+            npy_ += (bit_[i] & 0x01) == 0;
+            npy_ += (bit_[i] & 0x02) == 0;
+            npy_ += (bit_[i] & 0x04) == 0;
+            npy_ += (bit_[i] & 0x08) == 0;
+            npy_ += (bit_[i] & 0x10) == 0;
+            npy_ += (bit_[i] & 0x20) == 0;
+            npy_ += (bit_[i] & 0x40) == 0;
+            npy_ += (bit_[i] & 0x80) == 0;
+          }
+          // bitap entropy to estimate false positive rate, we don't use bitap 
when entropy is too high
+          npy_ /= min_;
         }
         if (min_ >= 4)
         {
@@ -3512,8 +3528,6 @@
     Char lo = state->edges.begin()->first;
     if (!is_meta(lo) && lo == state->edges.begin()->second.first)
     {
-      if (lo != state->edges.begin()->second.first)
-        break;
       if (len_ >= 255)
       {
         one_ = false;
@@ -3588,8 +3602,23 @@
   for (int level = 1; level < 8; ++level)
     for (std::map<DFA::State*,ORanges<Hash> >::iterator from = states[level - 
1].begin(); from != states[level - 1].end(); ++from)
       gen_predict_match_transitions(level, from->first, from->second, 
states[level]);
-  for (Char i = 0; i < 256; ++i)
-    bit_[i] &= (1 << min_) - 1;
+  if (min_ > 0)
+  {
+    for (Char i = 0; i < 256; ++i)
+    {
+      bit_[i] |= ~((1 << min_) - 1);
+      npy_ += (bit_[i] & 0x01) == 0;
+      npy_ += (bit_[i] & 0x02) == 0;
+      npy_ += (bit_[i] & 0x04) == 0;
+      npy_ += (bit_[i] & 0x08) == 0;
+      npy_ += (bit_[i] & 0x10) == 0;
+      npy_ += (bit_[i] & 0x20) == 0;
+      npy_ += (bit_[i] & 0x40) == 0;
+      npy_ += (bit_[i] & 0x80) == 0;
+    }
+    // bitap entropy to estimate false positive rate, we don't use bitap when 
entropy is too high
+    npy_ /= min_;
+  }
 }
 
 void Pattern::gen_predict_match_transitions(DFA::State *state, 
std::map<DFA::State*,ORanges<Hash> >& states)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/man/ugrep.1 new/ugrep-3.12.1/man/ugrep.1
--- old/ugrep-3.11.2/man/ugrep.1        2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/man/ugrep.1        2023-06-04 19:47:56.000000000 +0200
@@ -1,4 +1,4 @@
-.TH UGREP "1" "April 07, 2023" "ugrep 3.11.2" "User Commands"
+.TH UGREP "1" "June 03, 2023" "ugrep 3.12.0" "User Commands"
 .SH NAME
 \fBugrep\fR, \fBug\fR -- file pattern searcher
 .SH SYNOPSIS
@@ -19,6 +19,10 @@
 CONFIGURATION.  \fBug\fR is equivalent to \fBugrep --config\fR and sorts files
 by name by default.
 .PP
+The \fBugrep+\fR and \fBug+\fR commands are the same as the \fBugrep\fR and
+\fBug\fR commands, but also use filters to search pdfs, documents, e-books,
+and image metadata, when the corresponding filter tools are installed.
+.PP
 \fBugrep\fR accepts input of various encoding formats and normalizes the output
 to UTF-8.  When a UTF byte order mark is present in the input, the input is
 automatically normalized; otherwise, \fBugrep\fR assumes the input is ASCII,
@@ -292,15 +296,16 @@
 Filter files through the specified COMMANDS first before searching.
 COMMANDS is a comma\-separated list of `exts:command [option ...]',
 where `exts' is a comma\-separated list of filename extensions and
-`command' is a filter utility.  The filter utility should read from
-standard input and write to standard output.  Files matching one of
-`exts' are filtered.  When `exts' is `*', files with non\-matching
-extensions are filtered.  One or more `option' separated by spacing
-may be specified, which are passed verbatim to the command.  A `%'
-as `option' expands into the pathname to search.  For example,
-\fB\-\-filter\fR='pdf:pdftotext % \-' searches PDF files.  The `%' expands
-into a `\-' when searching standard input.  Option \fB\-\-label\fR=.ext may
-be used to specify extension `ext' when searching standard input.
+`command' is a filter utility.  Files matching one of `exts' are
+filtered.  When `exts' is a `*', all files are filtered.  One or
+more `option' separated by spacing may be specified, which are
+passed verbatim to the command.  A `%' as `option' expands into the
+pathname to search.  For example, \fB\-\-filter\fR='pdf:pdftotext % \-'
+searches PDF files.  The `%' expands into a `\-' when searching
+standard input.  When a `%' is not specified, a filter utility
+should read from standard input and write to standard output.
+Option \fB\-\-label\fR=.ext may be used to specify extension `ext' when
+searching standard input.  This option may be repeated.
 .TP
 \fB\-\-filter\-magic\-label\fR=[+]LABEL:MAGIC
 Associate LABEL with files whose signature "magic bytes" match the
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/man.sh new/ugrep-3.12.1/man.sh
--- old/ugrep-3.11.2/man.sh     2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/man.sh     2023-06-04 19:47:56.000000000 +0200
@@ -34,6 +34,10 @@
 CONFIGURATION.  \fBug\fR is equivalent to \fBugrep --config\fR and sorts files
 by name by default.
 .PP
+The \fBugrep+\fR and \fBug+\fR commands are the same as the \fBugrep\fR and
+\fBug\fR commands, but also use filters to search pdfs, documents, e-books,
+and image metadata, when the corresponding filter tools are installed.
+.PP
 \fBugrep\fR accepts input of various encoding formats and normalizes the output
 to UTF-8.  When a UTF byte order mark is present in the input, the input is
 automatically normalized; otherwise, \fBugrep\fR assumes the input is ASCII,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/src/flag.hpp 
new/ugrep-3.12.1/src/flag.hpp
--- old/ugrep-3.11.2/src/flag.hpp       2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/src/flag.hpp       2023-06-04 19:47:56.000000000 +0200
@@ -163,7 +163,6 @@
 extern const char *flag_devices;
 extern const char *flag_directories;
 extern const char *flag_encoding;
-extern const char *flag_filter;
 extern const char *flag_format;
 extern const char *flag_format_begin;
 extern const char *flag_format_close;
@@ -185,6 +184,7 @@
 extern const char *flag_tag;
 extern const char *flag_view;
 extern std::string flag_config_file;
+extern std::string flag_filter;
 extern std::string flag_hyperlink_prefix;
 extern std::string flag_hyperlink_path;
 extern std::set<std::string> flag_config_options;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/src/query.cpp 
new/ugrep-3.12.1/src/query.cpp
--- old/ugrep-3.11.2/src/query.cpp      2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/src/query.cpp      2023-06-04 19:47:56.000000000 +0200
@@ -47,7 +47,7 @@
 
 #include <direct.h>
 
-// non-blocking pipe (Windows named pipe)
+// create a non-blocking pipe (Windows named pipe)
 inline HANDLE nonblocking_pipe(int fd[2])
 {
   DWORD pid = GetCurrentProcessId();
@@ -91,6 +91,7 @@
   return -1;
 }
 
+// make a pipe block
 inline void set_blocking(int fd0)
 {
   fcntl(fd0, F_SETFL, fcntl(fd0, F_GETFL) & ~O_NONBLOCK);
@@ -103,7 +104,7 @@
 static constexpr const char *LARROW = "Â«";             // left arrow
 static constexpr const char *RARROW = "Â»";             // right arrow
 
-// return pointer to character at screen col, taking UTF-8 double wide 
characters into account
+// return pointer to character in the query search line at screen col, taking 
UTF-8 double wide characters into account
 char *Query::line_ptr(int col)
 {
   char *ptr = line_;
@@ -117,13 +118,13 @@
   return ptr;
 }
 
-// return pointer to the character at pos distance after the screen col
+// return pointer to the character in the query search line at pos distance 
after the screen col
 char *Query::line_ptr(int col, int pos)
 {
   return Screen::mbstring_pos(line_ptr(col), pos);
 }
 
-// return pointer to the end of the line
+// return pointer to the end of the query search line
 char *Query::line_end()
 {
   char *ptr = line_;
@@ -132,7 +133,7 @@
   return ptr;
 }
 
-// return number of character positions on the line up to the current screen 
Query::col_, taking UTF-8 double wide characters into account
+// return number of character positions on the query search line up to the 
current screen Query::col_, taking UTF-8 double wide characters into account
 int Query::line_pos()
 {
   char *ptr = line_;
@@ -146,13 +147,13 @@
   return pos;
 }
 
-// return the length of the line as a number of screen columns displayed
+// return the length of the query search line as a number of screen columns 
displayed
 int Query::line_len()
 {
   return Screen::mbstring_width(line_);
 }
 
-// return the length of the line as the number of wide characters
+// return the length of the query search line as the number of wide characters
 int Query::line_wsize()
 {
   int num = 0;
@@ -165,7 +166,7 @@
   return num;
 }
 
-// draw a textual part of the query search line
+// draw a textual part of the query search line, this function is called by 
draw()
 void Query::display(int col, int len)
 {
   char *ptr = line_ptr(col);
@@ -227,6 +228,7 @@
     if (select_ == -1)
     {
       start_ = 0;
+
       Screen::home();
 
       if (row_ > 0)
@@ -516,15 +518,20 @@
       row_ = select_ - Screen::rows + 3;
     else if (select_ >= 0 && select_ < row_)
       row_ = select_ - 1;
+
     if (row_ >= rows_)
       row_ = rows_ - 1;
     if (row_ < 0)
       row_ = 0;
+
     int end = rows_;
+
     if (end > row_ + Screen::rows - 1)
       end = row_ + Screen::rows - 1;
+
     for (int i = row_; i < end; ++i)
       disp(i);
+
     if (!message_)
       draw();
   }
@@ -567,7 +574,7 @@
 
 #endif
 
-// move the cursor to a column
+// move the cursor to a column in the query search line
 void Query::move(int col)
 {
   int dir = 0;
@@ -583,16 +590,11 @@
     col += dir; // direction is -1 or 1 to jump at or after full width char
   col_ = col;
   if (len_ >= Screen::cols - start_ && col >= Screen::cols - start_ - shift_)
-  {
     draw();
-  }
+  else if (offset_ > 0)
+    draw();
   else
-  {
-    if (offset_ > 0)
-      draw();
-    else
-      Screen::setpos(0, start_ + col_ - offset_);
-  }
+    Screen::setpos(0, start_ + col_ - offset_);
 }
 
 // insert text to line at cursor
@@ -634,9 +636,13 @@
 {
   char *ptr = line_ptr(col_);
   char *next = line_ptr(col_, num);
+  char *skip = next;
+  while (*skip != '\0' && Screen::mbchar_width(next, const_cast<const 
char**>(&skip)) == 0)
+    next = skip;
   if (next > ptr)
   {
-    memmove(ptr, next, line_end() - next + 1);
+    const char *end = line_end();
+    memmove(ptr, next, end - next + 1);
     updated_ = true;
     error_ = -1;
     len_ = line_len();
@@ -1171,7 +1177,7 @@
             Screen::alert();
           break;
 
-        case VKey::CTRL_R: // CTRL-R: jump to bookmark
+        case VKey::CTRL_R: // CTRL-R: restore bookmarked state
         case VKey::FN(4):
           if (mark_.row >= 0)
           {
@@ -1854,7 +1860,7 @@
   }
 }
 
-// scroll back one file
+// move back up one file
 void Query::back()
 {
   if (rows_ <= 0)
@@ -1914,7 +1920,7 @@
   redraw();
 }
 
-// scroll to next file
+// move down to the next file
 void Query::next()
 {
   // if output is not suitable to scroll by filename, then PGDN
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/src/screen.cpp 
new/ugrep-3.12.1/src/screen.cpp
--- old/ugrep-3.11.2/src/screen.cpp     2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/src/screen.cpp     2023-06-04 19:47:56.000000000 +0200
@@ -30,7 +30,7 @@
 @file      screen.cpp
 @brief     ANSI SGR code controlled screen API - static, not thread safe
 @author    Robert van Engelen - enge...@genivia.com
-@copyright (c) 2019-2022, Robert van Engelen, Genivia Inc. All rights reserved.
+@copyright (c) 2019-2023, Robert van Engelen, Genivia Inc. All rights reserved.
 @copyright (c) BSD-3 License - see LICENSE.txt
 */
 
@@ -46,7 +46,7 @@
 // max collective length of ANSI CSI escape sequences collected when skipping 
lead text with skip>0
 #define SCREEN_MAX_CODELEN 256
 
-// enable to interpret backspace (CTRL-H), not recommended because search 
results may not match what is shown
+// enable to perform backspace (CTRL-H), not recommended because search 
results may not match what is shown
 // #define WITH_BACKSPACE
 
 // emit ANSI SGR CSI sequence with one numeric parameter
@@ -341,7 +341,7 @@
 #endif
 }
 
-// return character width, 0 (invalid character), 1 (single width) or 2 
(double width)
+// return character width, 0 (non-spacing or invalid character), 1 (single 
width) or 2 (double width)
 int Screen::wchar_width(uint32_t wc)
 {
   /* based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c with full table
@@ -421,7 +421,7 @@
         (wc >= 0x30000 && wc <= 0x3fffd)))));
 }
 
-// return UCS-4 code of the specified UTF-8 sequence, or 0 for invalid UTF-8
+// return UCS-4 code of the specified UTF-8 sequence, or 0 for invalid UTF-8, 
set endptr after the sequence
 uint32_t Screen::wchar(const char *ptr, const char **endptr)
 {
   uint32_t c1, c2, c3, c4;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/src/screen.hpp 
new/ugrep-3.12.1/src/screen.hpp
--- old/ugrep-3.11.2/src/screen.hpp     2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/src/screen.hpp     2023-06-04 19:47:56.000000000 +0200
@@ -30,7 +30,7 @@
 @file      screen.hpp
 @brief     ANSI SGR code controlled screen API - static, not thread safe
 @author    Robert van Engelen - enge...@genivia.com
-@copyright (c) 2019-2022, Robert van Engelen, Genivia Inc. All rights reserved.
+@copyright (c) 2019-2023, Robert van Engelen, Genivia Inc. All rights reserved.
 @copyright (c) BSD-3 License - see LICENSE.txt
 */
 
@@ -210,7 +210,7 @@
   // return UCS-4 code of the specified UTF-8 sequence, or 0 for invalid UTF-8
   static uint32_t wchar(const char *ptr, const char **endptr);
 
-  // return character width of the specified UTF-8 sequence, 0 (invalid 
character), 1 (single width) or 2 (double width)
+  // return character width of the specified UTF-8 sequence, 0 (invalid or 
non-spacing character), 1 (single width) or 2 (double width)
   static int mbchar_width(const char *ptr, const char **endptr)
   {
     return wchar_width(wchar(ptr, endptr));
@@ -228,16 +228,24 @@
   // return pointer to string after pos screen columns
   static const char *mbstring_pos(const char *ptr, int pos)
   {
-    while (--pos >= 0 && *ptr != '\0')
-      wchar(ptr, &ptr);
+    while (pos > 0 && *ptr != '\0')
+      if (wchar(ptr, &ptr) > 0)
+        --pos;
+    const char *skp = ptr;
+    while (*skp != '\0' && wchar(skp, &skp) == 0)
+      ptr = skp;
     return ptr;
   }
 
   // return pointer to string after pos screen columns
   static char *mbstring_pos(char *ptr, int pos)
   {
-    while (--pos >= 0 && *ptr != '\0')
-      wchar(ptr, const_cast<const char**>(&ptr));
+    while (pos > 0 && *ptr != '\0')
+      if (wchar(ptr, const_cast<const char**>(&ptr)) > 0)
+        --pos;
+    char *skp = ptr;
+    while (*skp != '\0' && wchar(skp, const_cast<const char**>(&skp)) == 0)
+      ptr = skp;
     return ptr;
   }
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/src/ugrep.cpp 
new/ugrep-3.12.1/src/ugrep.cpp
--- old/ugrep-3.11.2/src/ugrep.cpp      2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/src/ugrep.cpp      2023-06-04 19:47:56.000000000 +0200
@@ -404,7 +404,6 @@
 const char *flag_devices           = NULL;
 const char *flag_directories       = NULL;
 const char *flag_encoding          = NULL;
-const char *flag_filter            = NULL;
 const char *flag_format            = NULL;
 const char *flag_format_begin      = NULL;
 const char *flag_format_close      = NULL;
@@ -426,6 +425,7 @@
 const char *flag_tag               = NULL;
 const char *flag_view              = "";
 std::string              flag_config_file;
+std::string              flag_filter;
 std::string              flag_hyperlink_prefix;
 std::string              flag_hyperlink_path;
 std::set<std::string>    flag_config_options;
@@ -1381,7 +1381,7 @@
           int header_len = is_odc ? 76 : 110;
 
           char tmp[16];
-          char *rest;
+          char *rest = NULL;
 
           // get the namesize
           size_t namesize;
@@ -3103,7 +3103,7 @@
 #ifndef OS_WIN
 
     // --filter
-    if (flag_filter != NULL && in != NULL)
+    if (!flag_filter.empty() && in != NULL)
     {
       const char *basename = strrchr(pathname, PATHSEPCHR);
       if (basename == NULL)
@@ -3164,7 +3164,7 @@
 
       size_t sep = strlen(suffix);
 
-      const char *command = flag_filter;
+      const char *command = flag_filter.c_str();
       const char *default_command = NULL;
 
       // find the command corresponding to the suffix
@@ -3290,7 +3290,7 @@
             fclose(in);
           in = NULL;
 
-          warning("--filter: cannot create pipe", flag_filter);
+          warning("--filter: cannot create pipe", flag_filter.c_str());
 
           return false;
         }
@@ -4330,9 +4330,9 @@
       fprintf(file, "ignore-files=%s\n", ignore.c_str());
     fprintf(file, "\n");
   }
-  if (flag_filter != NULL)
+  if (!flag_filter.empty())
   {
-    fprintf(file, "# Filtering\nfilter=%s\n\n", flag_filter);
+    fprintf(file, "# Filter search with file format conversion 
tools\nfilter=%s\n\n", flag_filter.c_str());
     if (!flag_filter_magic_label.empty())
     {
       fprintf(file, "# Filter by file signature magic bytes\n");
@@ -4536,7 +4536,7 @@
                 else if (strcmp(arg, "fixed-strings") == 0)
                   flag_fixed_strings = true;
                 else if (strncmp(arg, "filter=", 7) == 0)
-                  flag_filter = arg + 7;
+                  flag_filter.append(flag_filter.empty() ? "" : 
",").append(arg + 7);
                 else if (strncmp(arg, "filter-magic-label=", 19) == 0)
                   flag_filter_magic_label.emplace_back(arg + 19);
                 else if (strncmp(arg, "format=", 7) == 0)
@@ -4733,6 +4733,8 @@
                   flag_empty = false;
                 else if (strcmp(arg, "no-filename") == 0)
                   flag_no_filename = true;
+                else if (strcmp(arg, "no-filter") == 0)
+                  flag_filter.clear();
                 else if (strcmp(arg, "no-group-separator") == 0)
                   flag_group_separator = NULL;
                 else if (strcmp(arg, "no-heading") == 0)
@@ -4782,7 +4784,7 @@
                 else if (strcmp(arg, "neg-regexp") == 0)
                   usage("missing argument for --", arg);
                 else
-                  usage("invalid option --", arg, "--neg-regexp, --not, 
--no-any-line, --no-binary, --no-bool, --no-break, --no-byte-offset, 
--no-color, --no-confirm, --no-decompress, --no-dereference, --no-dotall, 
--no-empty, --no-filename, --no-group-separator, --no-heading, --no-hidden, 
--no-hyperlink, --no-ignore-binary, --no-ignore-case, --no-ignore-files 
--no-initial-tab, --no-invert-match, --no-line-number, --no-only-line-number, 
--no-only-matching, --no-messages, --no-mmap, --no-pager, --no-pretty, 
--no-smart-case, --no-sort, --no-stats, --no-tree, --no-ungroup, --no-view or 
--null");
+                  usage("invalid option --", arg, "--neg-regexp, --not, 
--no-any-line, --no-binary, --no-bool, --no-break, --no-byte-offset, 
--no-color, --no-confirm, --no-decompress, --no-dereference, --no-dotall, 
--no-empty, --no-filename, --no-filter, --no-group-separator, --no-heading, 
--no-hidden, --no-hyperlink, --no-ignore-binary, --no-ignore-case, 
--no-ignore-files --no-initial-tab, --no-invert-match, --no-line-number, 
--no-only-line-number, --no-only-matching, --no-messages, --no-mmap, 
--no-pager, --no-pretty, --no-smart-case, --no-sort, --no-stats, --no-tree, 
--no-ungroup, --no-view or --null");
                 break;
 
               case 'o':
@@ -6956,7 +6958,7 @@
         else if (!flag_quiet && !flag_files_with_matches && 
!flag_files_without_match)
         {
           if (flag_hex)
-            regex = ".*\\n?";
+            regex = ".*\\n?"; // include trailing \n of a line when outputting 
hex
           else
             regex = "^.*"; // use ^.* to prevent -o from reporting an extra 
empty match
         }
@@ -7175,14 +7177,18 @@
     }
   }
 
-  // -y: disable -A, -B and -C
-  if (flag_any_line)
-    flag_after_context = flag_before_context = 0;
-
   // -v or -y: disable -o and -u
   if (flag_invert_match || flag_any_line)
     flag_only_matching = flag_ungroup = false;
 
+  // --match: when matching everything disable -A, -B and -C unless -o
+  if (flag_match && !flag_only_matching)
+    flag_after_context = flag_before_context = 0;
+
+  // -y: disable -A, -B and -C
+  if (flag_any_line)
+    flag_after_context = flag_before_context = 0;
+
   // --depth: if -R or -r is not specified then enable -r
   if ((flag_min_depth > 0 || flag_max_depth > 0) && flag_directories_action == 
Action::UNSP)
     flag_directories_action = Action::RECURSE;
@@ -12006,15 +12012,16 @@
             Filter files through the specified COMMANDS first before 
searching.\n\
             COMMANDS is a comma-separated list of `exts:command [option 
...]',\n\
             where `exts' is a comma-separated list of filename extensions 
and\n\
-            `command' is a filter utility.  The filter utility should read 
from\n\
-            standard input and write to standard output.  Files matching one 
of\n\
-            `exts' are filtered.  When `exts' is `*', files with 
non-matching\n\
-            extensions are filtered.  One or more `option' separated by 
spacing\n\
-            may be specified, which are passed verbatim to the command.  A 
`%'\n\
-            as `option' expands into the pathname to search.  For example,\n\
-            --filter='pdf:pdftotext % -' searches PDF files.  The `%' 
expands\n\
-            into a `-' when searching standard input.  Option --label=.ext 
may\n\
-            be used to specify extension `ext' when searching standard 
input.\n\
+            `command' is a filter utility.  Files matching one of `exts' are\n\
+            filtered.  When `exts' is a `*', all files are filtered.  One or\n\
+            more `option' separated by spacing may be specified, which are\n\
+            passed verbatim to the command.  A `%' as `option' expands into 
the\n\
+            pathname to search.  For example, --filter='pdf:pdftotext % -'\n\
+            searches PDF files.  The `%' expands into a `-' when searching\n\
+            standard input.  When a `%' is not specified, a filter utility\n\
+            should read from standard input and write to standard output.\n\
+            Option --label=.ext may be used to specify extension `ext' when\n\
+            searching standard input.  This option may be repeated.\n\
     --filter-magic-label=[+]LABEL:MAGIC\n\
             Associate LABEL with files whose signature \"magic bytes\" match 
the\n\
             MAGIC regex pattern.  Only files that have no filename extension\n\
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/ugrep-3.11.2/src/ugrep.hpp 
new/ugrep-3.12.1/src/ugrep.hpp
--- old/ugrep-3.11.2/src/ugrep.hpp      2023-04-07 23:00:04.000000000 +0200
+++ new/ugrep-3.12.1/src/ugrep.hpp      2023-06-04 19:47:56.000000000 +0200
@@ -38,7 +38,7 @@
 #define UGREP_HPP
 
 // ugrep version
-#define UGREP_VERSION "3.11.2"
+#define UGREP_VERSION "3.12.1"
 
 // disable mmap because mmap is almost always slower than the file reading 
speed improvements since 3.0.0
 #define WITH_NO_MMAP

commit ugrep for openSUSE:Factory

Reply via email to