Revision: 77171
          http://sourceforge.net/p/brlcad/code/77171
Author:   starseeker
Date:     2020-09-17 19:43:34 +0000 (Thu, 17 Sep 2020)
Log Message:
-----------
Checkpoint an experiment to use Damerau-Levenshtein edit distance lookup to 
suggest corrections to potentially mistyped ged commands.

Modified Paths:
--------------
    brlcad/trunk/include/bu/str.h
    brlcad/trunk/include/ged/commands.h
    brlcad/trunk/src/gtools/gsh.cpp
    brlcad/trunk/src/libbu/CMakeLists.txt
    brlcad/trunk/src/libged/ged_init.cpp

Added Paths:
-----------
    brlcad/trunk/src/libbu/damlevlim.cpp

Modified: brlcad/trunk/include/bu/str.h
===================================================================
--- brlcad/trunk/include/bu/str.h       2020-09-17 13:29:57 UTC (rev 77170)
+++ brlcad/trunk/include/bu/str.h       2020-09-17 19:43:34 UTC (rev 77171)
@@ -346,7 +346,15 @@
  */
 BU_EXPORT extern char **bu_argv_dupinsert(int insert, size_t insertArgc, const 
char *insertArgv[], size_t argc, const char *argv[]);
 
+/**
+ * Calculate the Damarau Levenshtein edit distance between two strings.  When
+ * max_dist is defined, calculation will terminate once that distance is 
reached
+ * and max_dist will be returned.  When max_dist is 0, the calculation will
+ * proceed up to an internally defined calculation limit. */
+BU_EXPORT unsigned long
+bu_editdist(const char *s1, const char *s2, unsigned long max_dist);
 
+
 __END_DECLS
 
 /** @} */

Modified: brlcad/trunk/include/ged/commands.h
===================================================================
--- brlcad/trunk/include/ged/commands.h 2020-09-17 13:29:57 UTC (rev 77170)
+++ brlcad/trunk/include/ged/commands.h 2020-09-17 19:43:34 UTC (rev 77171)
@@ -41,6 +41,9 @@
 GED_EXPORT extern int ged_exec(struct ged *gedp, int argc, const char *argv[]);
 /** @} */
 
+GED_EXPORT extern int
+ged_cmd_lookup(const char **ncmd, const char *cmd);
+
 /** @addtogroup ged_objects */
 /** @{ */
 /**

Modified: brlcad/trunk/src/gtools/gsh.cpp
===================================================================
--- brlcad/trunk/src/gtools/gsh.cpp     2020-09-17 13:29:57 UTC (rev 77170)
+++ brlcad/trunk/src/gtools/gsh.cpp     2020-09-17 19:43:34 UTC (rev 77171)
@@ -249,9 +249,17 @@
 
        /* If we're not opening or closing, and we have an active gedp,
         * make a standard libged call */
-       ged_exec(gedp, ac, (const char **)av);
-       printf("%s\n", bu_vls_cstr(gedp->ged_result_str));
-       bu_vls_trunc(gedp->ged_result_str, 0);
+       if (ged_cmd_valid(av[0], NULL)) {
+           const char *ccmd = NULL;
+           int edist = ged_cmd_lookup(&ccmd, av[0]);
+           if (edist) {
+               printf("Command %s not found, did you mean %s (edit distance 
%d)?\n", av[0], ccmd, edist);
+           }
+       } else {
+           ged_exec(gedp, ac, (const char **)av);
+           printf("%s\n", bu_vls_cstr(gedp->ged_result_str));
+           bu_vls_trunc(gedp->ged_result_str, 0);
+       }
 #if 0
        int (*func)(struct ged *, int, char *[]);
        *(void **)(&func) = bu_dlsym(libged, av[0]);

Modified: brlcad/trunk/src/libbu/CMakeLists.txt
===================================================================
--- brlcad/trunk/src/libbu/CMakeLists.txt       2020-09-17 13:29:57 UTC (rev 
77170)
+++ brlcad/trunk/src/libbu/CMakeLists.txt       2020-09-17 19:43:34 UTC (rev 
77171)
@@ -62,6 +62,7 @@
   ctype.c
   dir.c
   dirent.c
+  damlevlim.cpp
   datetime.c
   dylib.c
   encode.c

Added: brlcad/trunk/src/libbu/damlevlim.cpp
===================================================================
--- brlcad/trunk/src/libbu/damlevlim.cpp                                (rev 0)
+++ brlcad/trunk/src/libbu/damlevlim.cpp        2020-09-17 19:43:34 UTC (rev 
77171)
@@ -0,0 +1,187 @@
+// Copyright (C) 2019 Robert Jacobson. Released under the MIT license.
+//
+// Based on "Iosifovich", Copyright (C) 2019 Frederik Hertzum, which is
+// licensed under the MIT license: https://bitbucket.org/clearer/iosifovich.
+//
+// The MIT License
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+
+/*
+ * Compute the Damarau Levenshtein edit distance between two strings when the
+ * edit distance is less than a given number.
+ *
+ * https://github.com/rljacobson/Levenshtein
+ */
+
+#include "common.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <numeric>
+
+#include "bu/str.h"
+
+// Limits
+#define DAMLEVLIM_MAX_EDIT_DIST 16384
+
+unsigned long
+bu_editdist(const char *s1, const char *s2, unsigned long max_dist)
+{
+    // Cap at the defined maximum edit distance.
+    unsigned long max = (max_dist < DAMLEVLIM_MAX_EDIT_DIST) ? max_dist : 
DAMLEVLIM_MAX_EDIT_DIST;
+
+    // If we don't have both strings, trivial equality
+    if (!s1 || !s2) {
+       return 0;
+    }
+
+    // If we have equal strings, trivial equality
+    if (BU_STR_EQUAL(s1, s2)) {
+       return 0;
+    }
+
+    // If max == 0, go the limit
+    if (max == 0) {
+       max = DAMLEVLIM_MAX_EDIT_DIST;
+    }
+
+    // If one of either of the strings doesn't exist, the length of the other
+    // string is our edit distance
+    if (!s1 || !s2) {
+       if (!s1)
+           return strlen(s2);
+       if (!s2)
+           return strlen(s1);
+    }
+
+    // Set up buffer
+    char *ptr = (char *)new(std::nothrow) 
std::vector<size_t>(DAMLEVLIM_MAX_EDIT_DIST);
+    std::vector<size_t> &buffer = *(std::vector<size_t> *)ptr;
+
+    // Let's make some string views so we can use the STL.
+    std::string subject(s1);
+    std::string query(s2);
+
+    // Skip any common prefix.
+    auto mb = std::mismatch(subject.begin(), subject.end(), query.begin());
+    auto start_offset = (size_t)std::distance(subject.begin(), mb.first);
+
+    // If one of the strings is a prefix of the other, done.
+    if (subject.length() == start_offset) {
+       delete ptr;
+       return (size_t)(query.length() - start_offset);
+    } else if (query.length() == start_offset) {
+       delete ptr;
+       return (size_t)(subject.length() - start_offset);
+    }
+
+    // Skip any common suffix.
+    auto me = std::mismatch(
+                 subject.rbegin(), 
static_cast<decltype(subject.rend())>(mb.first - 1),
+                 query.rbegin());
+    auto end_offset = std::min((size_t)std::distance(subject.rbegin(), 
me.first),
+                              (size_t)(subject.size() - start_offset));
+
+    // Take the different part.
+    subject = subject.substr(start_offset, subject.size() - end_offset - 
start_offset + 1);
+    query = query.substr(start_offset, query.size() - end_offset - 
start_offset + 1);
+
+    // Make "subject" the smaller one.
+    if (query.length() < subject.length()) {
+       std::swap(subject, query);
+    }
+    // If one of the strings is a suffix of the other.
+    if (subject.length() == 0) {
+       delete ptr;
+       return query.length();
+    }
+
+    // Init buffer.
+    std::iota(buffer.begin(), buffer.begin() + query.length() + 1, 0);
+
+    size_t end_j = 0;
+    for (size_t i = 1; i < subject.length() + 1; ++i) {
+       // temp = i - 1;
+       size_t temp = buffer[0]++;
+       size_t prior_temp = 0;
+
+       // Setup for max distance, which only needs to look in the window
+       // between i-max <= j <= i+max.
+       // The result of the max is positive, but we need the second argument
+       // to be allowed to be negative.
+       const size_t start_j = static_cast<size_t>(std::max(1ll, 
static_cast<long long>(i) -
+                              DAMLEVLIM_MAX_EDIT_DIST/2));
+       end_j = std::min(static_cast<size_t>(query.length() + 1),
+                        static_cast<size_t >(i + DAMLEVLIM_MAX_EDIT_DIST/2));
+       size_t column_min = DAMLEVLIM_MAX_EDIT_DIST;     // Sentinels
+       for (size_t j = start_j; j < end_j; ++j) {
+           const size_t p = temp; // p = buffer[j - 1];
+           const size_t r = buffer[j];
+           /*
+              auto min = r;
+              if (p < min) min = p;
+              if (prior_temp < min) min = prior_temp;
+              min++;
+              temp = temp + (subject[i - 1] == query[j - 1] ? 0 : 1);
+              if (min < temp) temp = min;
+              */
+           temp = std::min(std::min(r,  // Insertion.
+                                    p   // Deletion.
+                                   ) + 1,
+
+                           std::min(
+                               // Transposition.
+                               prior_temp + 1,
+                               // Substitution.
+                               temp + (subject[i - 1] == query[j - 1] ? 0 : 1)
+                           )
+                          );
+           // Keep track of column minimum.
+           if (temp < column_min) {
+               column_min = temp;
+           }
+           // Record matrix value mat[i-2][j-2].
+           prior_temp = temp;
+           std::swap(buffer[j], temp);
+       }
+       if (column_min >= DAMLEVLIM_MAX_EDIT_DIST) {
+           // There is no way to get an edit distance > column_min.
+           // We can bail out early.
+           delete ptr;
+           return DAMLEVLIM_MAX_EDIT_DIST;
+       }
+    }
+
+    unsigned long ret = buffer[end_j-1];
+    delete ptr;
+    return ret;
+}
+
+// Local Variables:
+// tab-width: 8
+// mode: C++
+// c-basic-offset: 4
+// indent-tabs-mode: t
+// c-file-style: "stroustrup"
+// End:
+// ex: shiftwidth=4 tabstop=8


Property changes on: brlcad/trunk/src/libbu/damlevlim.cpp
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: brlcad/trunk/src/libged/ged_init.cpp
===================================================================
--- brlcad/trunk/src/libged/ged_init.cpp        2020-09-17 13:29:57 UTC (rev 
77170)
+++ brlcad/trunk/src/libged/ged_init.cpp        2020-09-17 19:43:34 UTC (rev 
77171)
@@ -88,18 +88,59 @@
     if (cmd_invalid) {
        return cmd_invalid;
     }
-    ged_func_ptr c1 = cmd_it->second->i->cmd;
-    std::map<std::string, const struct ged_cmd *>::iterator func_it = 
ged_cmd_map.find(std::string(func));
-    if (func_it == ged_cmd_map.end()) {
-       // func not in table, nothing to validate against - return invalid
-       return 1;
+
+    if (func) {
+       ged_func_ptr c1 = cmd_it->second->i->cmd;
+       std::map<std::string, const struct ged_cmd *>::iterator func_it = 
ged_cmd_map.find(std::string(func));
+       if (func_it == ged_cmd_map.end()) {
+           // func not in table, nothing to validate against - return invalid
+           return 1;
+       }
+       ged_func_ptr c2 = func_it->second->i->cmd;
+       int mismatched_functions = 2;
+       if (c1 == c2) {
+           mismatched_functions = 0;
+       }
+       return mismatched_functions;
     }
-    ged_func_ptr c2 = func_it->second->i->cmd;
-    int mismatched_functions = 2;
-    if (c1 == c2) {
-       mismatched_functions = 0;
+
+    return 0;
+}
+
+/* Use bu_editdist to see if there is a command name similar to cmd
+ * defined.  Return the closest match and the edit distance.  (0 indicates
+ * an exact match, -1 an error) */
+int
+ged_cmd_lookup(const char **ncmd, const char *cmd)
+{
+    if (!cmd || !ncmd) {
+       return -1;
     }
-    return mismatched_functions;
+    unsigned long min_dist = LONG_MAX;
+
+    // On OpenBSD, if the executable was launched in a way that requires
+    // bu_setprogname to find the BRL-CAD root directory the iniital libged
+    // initialization would have failed.  If we have no ged_cmds at all this is
+    // probably what happened, so call libged_init again here.  By the time we
+    // are calling ged_cmd_valid bu_setprogname should be set and we should be
+    // ready to actually find the commands.
+    if (!ged_cmd_map.size()) {
+       libged_init();
+    }
+
+    const char *ccmd = NULL;
+    std::string scmd(cmd);
+    std::map<std::string, const struct ged_cmd *>::iterator cmd_it;
+    for (cmd_it = ged_cmd_map.begin(); cmd_it != ged_cmd_map.end(); cmd_it++) {
+       unsigned long edist = bu_editdist(cmd, cmd_it->first.c_str(), 0);
+       if (edist < min_dist) {
+           ccmd = (*cmd_it).first.c_str();
+           min_dist = edist;
+       }
+    }
+
+    (*ncmd) = ccmd;
+    return (int)min_dist;
 }
 
 size_t

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.



_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits

Reply via email to