Add --fuzzy-search option, and --search-similarity option to adjust
the minimum similarity for search results (defaults to 80%).

X-Gentoo-bug: 65566
X-Gentoo-bug-url: https://bugs.gentoo.org/show_bug.cgi?id=65566
---
 man/emerge.1           | 14 ++++++++++++++
 pym/_emerge/actions.py |  7 +++++--
 pym/_emerge/main.py    | 32 +++++++++++++++++++++++++++++++-
 pym/_emerge/search.py  | 26 ++++++++++++++++++++++++--
 4 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/man/emerge.1 b/man/emerge.1
index da1d852..7442220 100644
--- a/man/emerge.1
+++ b/man/emerge.1
@@ -565,6 +565,14 @@ packages (fetch things from SRC_URI based upon USE 
setting).
 Instead of doing any package building, just perform fetches for all
 packages (fetch everything in SRC_URI regardless of USE setting).
 .TP
+.BR "\-\-fuzzy\-search [ y | n ]"
+Enable or disable fuzzy search for search actions. When fuzzy search
+is enabled, a result is returned if it is sufficiently similar to the
+search string, without requiring an exact match. This option is enabled
+by default. Fuzzy search does not support regular expressions, therefore
+it is automatically disabled for regular expression searches. Fuzzy
+search is slightly slower than non\-fuzzy search.
+.TP
 .BR "\-\-getbinpkg [ y | n ] (\-g short option)"
 Using the server and location defined in \fIPORTAGE_BINHOST\fR (see
 \fBmake.conf\fR(5)), portage will download the information from each binary
@@ -874,6 +882,12 @@ enabled by default. The search index needs to be 
regenerated by
 to \fBEMERGE_DEFAULT_OPTS\fR (see \fBmake.conf\fR(5)) and later
 overridden via the command line.
 .TP
+.BR "\-\-search\-similarity PERCENTAGE"
+Set the minimum similarity percentage (a floating-point number between
+0 and 100). Search results with similarity percentages lower than this
+are discarded (default: \'80\'). This option has no effect unless the
+\fB\-\-fuzzy\-search\fR option is enabled.
+.TP
 .BR "\-\-select [ y | n ] (\-w short option)"
 Add specified packages to the world set (inverse of
 \fB\-\-oneshot\fR). This is useful if you want to
diff --git a/pym/_emerge/actions.py b/pym/_emerge/actions.py
index 1dc2b0d..6704afc 100644
--- a/pym/_emerge/actions.py
+++ b/pym/_emerge/actions.py
@@ -1,4 +1,4 @@
-# Copyright 1999-2015 Gentoo Foundation
+# Copyright 1999-2016 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
 from __future__ import division, print_function, unicode_literals
@@ -1974,7 +1974,10 @@ def action_search(root_config, myopts, myfiles, spinner):
                        spinner, "--searchdesc" in myopts,
                        "--quiet" not in myopts, "--usepkg" in myopts,
                        "--usepkgonly" in myopts,
-                       search_index = myopts.get("--search-index", "y") != "n")
+                       search_index=myopts.get("--search-index", "y") != "n",
+                       search_similarity=myopts.get("--search-similarity"),
+                       fuzzy=myopts.get("--fuzzy-search") != "n",
+                       )
                for mysearch in myfiles:
                        try:
                                searchinstance.execute(mysearch)
diff --git a/pym/_emerge/main.py b/pym/_emerge/main.py
index 0e672a2..eae1954 100644
--- a/pym/_emerge/main.py
+++ b/pym/_emerge/main.py
@@ -1,4 +1,4 @@
-# Copyright 1999-2015 Gentoo Foundation
+# Copyright 1999-2016 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
 from __future__ import print_function
@@ -141,6 +141,7 @@ def insert_optional_args(args):
                '--deselect'             : y_or_n,
                '--binpkg-respect-use'   : y_or_n,
                '--fail-clean'           : y_or_n,
+               '--fuzzy-search'         : y_or_n,
                '--getbinpkg'            : y_or_n,
                '--getbinpkgonly'        : y_or_n,
                '--jobs'       : valid_integers,
@@ -458,6 +459,11 @@ def parse_opts(tmpcmdline, silent=False):
                        "choices" : true_y_or_n
                },
 
+               "--fuzzy-search": {
+                       "help": "Enable or disable fuzzy search",
+                       "choices": true_y_or_n
+               },
+
                "--ignore-built-slot-operator-deps": {
                        "help": "Ignore the slot/sub-slot := operator parts of 
dependencies that have "
                                "been recorded when packages where built. This 
option is intended "
@@ -658,6 +664,12 @@ def parse_opts(tmpcmdline, silent=False):
                        "choices": y_or_n
                },
 
+               "--search-similarity": {
+                       "help": ("Set minimum similarity percentage for fuzzy 
seach "
+                               "(a floating-point number between 0 and 100)"),
+                       "action": "store"
+               },
+
                "--select": {
                        "shortopt" : "-w",
                        "help"    : "add specified packages to the world set " 
+ \
@@ -855,6 +867,9 @@ def parse_opts(tmpcmdline, silent=False):
        if myoptions.fail_clean in true_y:
                myoptions.fail_clean = True
 
+       if myoptions.fuzzy_search in true_y:
+               myoptions.fuzzy_search = True
+
        if myoptions.getbinpkg in true_y:
                myoptions.getbinpkg = True
        else:
@@ -1009,6 +1024,21 @@ def parse_opts(tmpcmdline, silent=False):
 
                myoptions.rebuilt_binaries_timestamp = 
rebuilt_binaries_timestamp
 
+       if myoptions.search_similarity:
+               try:
+                       search_similarity = float(myoptions.search_similarity)
+               except ValueError:
+                       parser.error("Invalid --search-similarity parameter "
+                               "(not a number): '{}'\n".format(
+                               myoptions.search_similarity))
+
+               if search_similarity < 0 or search_similarity > 100:
+                       parser.error("Invalid --search-similarity parameter "
+                               "(not between 0 and 100): '{}'\n".format(
+                               myoptions.search_similarity))
+
+               myoptions.search_similarity = search_similarity
+
        if myoptions.use_ebuild_visibility in true_y:
                myoptions.use_ebuild_visibility = True
        else:
diff --git a/pym/_emerge/search.py b/pym/_emerge/search.py
index 32d326e..20a0c02 100644
--- a/pym/_emerge/search.py
+++ b/pym/_emerge/search.py
@@ -1,8 +1,9 @@
-# Copyright 1999-2015 Gentoo Foundation
+# Copyright 1999-2016 Gentoo Foundation
 # Distributed under the terms of the GNU General Public License v2
 
 from __future__ import unicode_literals
 
+import difflib
 import re
 import portage
 from portage import os
@@ -28,7 +29,8 @@ class search(object):
        # public interface
        #
        def __init__(self, root_config, spinner, searchdesc,
-               verbose, usepkg, usepkgonly, search_index=True):
+               verbose, usepkg, usepkgonly, search_index=True,
+               search_similarity=None, fuzzy=True):
                """Searches the available and installed packages for the 
supplied search key.
                The list of available and installed packages is created at 
object instantiation.
                This makes successive searches faster."""
@@ -42,6 +44,9 @@ class search(object):
                self.spinner = None
                self.root_config = root_config
                self.setconfig = root_config.setconfig
+               self.fuzzy = fuzzy
+               self.search_similarity = (80 if search_similarity is None
+                       else search_similarity)
                self.matches = {"pkg" : []}
                self.mlen = 0
 
@@ -248,11 +253,26 @@ class search(object):
                if self.searchkey.startswith('@'):
                        match_category = 1
                        self.searchkey = self.searchkey[1:]
+               fuzzy = False
                if regexsearch:
                        self.searchre=re.compile(self.searchkey,re.I)
                else:
                        self.searchre=re.compile(re.escape(self.searchkey), 
re.I)
 
+                       # Fuzzy search does not support regular expressions, 
therefore
+                       # it is disabled for regular expression searches.
+                       if self.fuzzy:
+                               fuzzy = True
+                               cutoff = float(self.search_similarity) / 100
+                               seq_match = difflib.SequenceMatcher()
+                               seq_match.set_seq2(self.searchkey.lower())
+
+                               def fuzzy_search(match_string):
+                                       seq_match.set_seq1(match_string.lower())
+                                       return (seq_match.real_quick_ratio() >= 
cutoff and
+                                               seq_match.quick_ratio() >= 
cutoff and
+                                               seq_match.ratio() >= cutoff)
+
                for package in self._cp_all():
                        self._spinner_update()
 
@@ -263,6 +283,8 @@ class search(object):
 
                        if self.searchre.search(match_string):
                                yield ("pkg", package)
+                       elif fuzzy and fuzzy_search(match_string):
+                               yield ("pkg", package)
                        elif self.searchdesc: # DESCRIPTION searching
                                # Use _first_cp to avoid an expensive 
visibility check,
                                # since the visibility check can be avoided 
entirely
-- 
2.7.4


Reply via email to