Revision: 77171
http://sourceforge.net/p/brlcad/code/77171
Author: starseeker
Date: 2020-09-17 19:43:34 +0000 (Thu, 17 Sep 2020)
Log Message:
-----------
Checkpoint an experiment to use Damerau-Levenshtein edit distance lookup to
suggest corrections to potentially mistyped ged commands.
Modified Paths:
--------------
brlcad/trunk/include/bu/str.h
brlcad/trunk/include/ged/commands.h
brlcad/trunk/src/gtools/gsh.cpp
brlcad/trunk/src/libbu/CMakeLists.txt
brlcad/trunk/src/libged/ged_init.cpp
Added Paths:
-----------
brlcad/trunk/src/libbu/damlevlim.cpp
Modified: brlcad/trunk/include/bu/str.h
===================================================================
--- brlcad/trunk/include/bu/str.h 2020-09-17 13:29:57 UTC (rev 77170)
+++ brlcad/trunk/include/bu/str.h 2020-09-17 19:43:34 UTC (rev 77171)
@@ -346,7 +346,15 @@
*/
BU_EXPORT extern char **bu_argv_dupinsert(int insert, size_t insertArgc, const
char *insertArgv[], size_t argc, const char *argv[]);
+/**
+ * Calculate the Damarau Levenshtein edit distance between two strings. When
+ * max_dist is defined, calculation will terminate once that distance is
reached
+ * and max_dist will be returned. When max_dist is 0, the calculation will
+ * proceed up to an internally defined calculation limit. */
+BU_EXPORT unsigned long
+bu_editdist(const char *s1, const char *s2, unsigned long max_dist);
+
__END_DECLS
/** @} */
Modified: brlcad/trunk/include/ged/commands.h
===================================================================
--- brlcad/trunk/include/ged/commands.h 2020-09-17 13:29:57 UTC (rev 77170)
+++ brlcad/trunk/include/ged/commands.h 2020-09-17 19:43:34 UTC (rev 77171)
@@ -41,6 +41,9 @@
GED_EXPORT extern int ged_exec(struct ged *gedp, int argc, const char *argv[]);
/** @} */
+GED_EXPORT extern int
+ged_cmd_lookup(const char **ncmd, const char *cmd);
+
/** @addtogroup ged_objects */
/** @{ */
/**
Modified: brlcad/trunk/src/gtools/gsh.cpp
===================================================================
--- brlcad/trunk/src/gtools/gsh.cpp 2020-09-17 13:29:57 UTC (rev 77170)
+++ brlcad/trunk/src/gtools/gsh.cpp 2020-09-17 19:43:34 UTC (rev 77171)
@@ -249,9 +249,17 @@
/* If we're not opening or closing, and we have an active gedp,
* make a standard libged call */
- ged_exec(gedp, ac, (const char **)av);
- printf("%s\n", bu_vls_cstr(gedp->ged_result_str));
- bu_vls_trunc(gedp->ged_result_str, 0);
+ if (ged_cmd_valid(av[0], NULL)) {
+ const char *ccmd = NULL;
+ int edist = ged_cmd_lookup(&ccmd, av[0]);
+ if (edist) {
+ printf("Command %s not found, did you mean %s (edit distance
%d)?\n", av[0], ccmd, edist);
+ }
+ } else {
+ ged_exec(gedp, ac, (const char **)av);
+ printf("%s\n", bu_vls_cstr(gedp->ged_result_str));
+ bu_vls_trunc(gedp->ged_result_str, 0);
+ }
#if 0
int (*func)(struct ged *, int, char *[]);
*(void **)(&func) = bu_dlsym(libged, av[0]);
Modified: brlcad/trunk/src/libbu/CMakeLists.txt
===================================================================
--- brlcad/trunk/src/libbu/CMakeLists.txt 2020-09-17 13:29:57 UTC (rev
77170)
+++ brlcad/trunk/src/libbu/CMakeLists.txt 2020-09-17 19:43:34 UTC (rev
77171)
@@ -62,6 +62,7 @@
ctype.c
dir.c
dirent.c
+ damlevlim.cpp
datetime.c
dylib.c
encode.c
Added: brlcad/trunk/src/libbu/damlevlim.cpp
===================================================================
--- brlcad/trunk/src/libbu/damlevlim.cpp (rev 0)
+++ brlcad/trunk/src/libbu/damlevlim.cpp 2020-09-17 19:43:34 UTC (rev
77171)
@@ -0,0 +1,187 @@
+// Copyright (C) 2019 Robert Jacobson. Released under the MIT license.
+//
+// Based on "Iosifovich", Copyright (C) 2019 Frederik Hertzum, which is
+// licensed under the MIT license: https://bitbucket.org/clearer/iosifovich.
+//
+// The MIT License
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+
+/*
+ * Compute the Damarau Levenshtein edit distance between two strings when the
+ * edit distance is less than a given number.
+ *
+ * https://github.com/rljacobson/Levenshtein
+ */
+
+#include "common.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <numeric>
+
+#include "bu/str.h"
+
+// Limits
+#define DAMLEVLIM_MAX_EDIT_DIST 16384
+
+unsigned long
+bu_editdist(const char *s1, const char *s2, unsigned long max_dist)
+{
+ // Cap at the defined maximum edit distance.
+ unsigned long max = (max_dist < DAMLEVLIM_MAX_EDIT_DIST) ? max_dist :
DAMLEVLIM_MAX_EDIT_DIST;
+
+ // If we don't have both strings, trivial equality
+ if (!s1 || !s2) {
+ return 0;
+ }
+
+ // If we have equal strings, trivial equality
+ if (BU_STR_EQUAL(s1, s2)) {
+ return 0;
+ }
+
+ // If max == 0, go the limit
+ if (max == 0) {
+ max = DAMLEVLIM_MAX_EDIT_DIST;
+ }
+
+ // If one of either of the strings doesn't exist, the length of the other
+ // string is our edit distance
+ if (!s1 || !s2) {
+ if (!s1)
+ return strlen(s2);
+ if (!s2)
+ return strlen(s1);
+ }
+
+ // Set up buffer
+ char *ptr = (char *)new(std::nothrow)
std::vector<size_t>(DAMLEVLIM_MAX_EDIT_DIST);
+ std::vector<size_t> &buffer = *(std::vector<size_t> *)ptr;
+
+ // Let's make some string views so we can use the STL.
+ std::string subject(s1);
+ std::string query(s2);
+
+ // Skip any common prefix.
+ auto mb = std::mismatch(subject.begin(), subject.end(), query.begin());
+ auto start_offset = (size_t)std::distance(subject.begin(), mb.first);
+
+ // If one of the strings is a prefix of the other, done.
+ if (subject.length() == start_offset) {
+ delete ptr;
+ return (size_t)(query.length() - start_offset);
+ } else if (query.length() == start_offset) {
+ delete ptr;
+ return (size_t)(subject.length() - start_offset);
+ }
+
+ // Skip any common suffix.
+ auto me = std::mismatch(
+ subject.rbegin(),
static_cast<decltype(subject.rend())>(mb.first - 1),
+ query.rbegin());
+ auto end_offset = std::min((size_t)std::distance(subject.rbegin(),
me.first),
+ (size_t)(subject.size() - start_offset));
+
+ // Take the different part.
+ subject = subject.substr(start_offset, subject.size() - end_offset -
start_offset + 1);
+ query = query.substr(start_offset, query.size() - end_offset -
start_offset + 1);
+
+ // Make "subject" the smaller one.
+ if (query.length() < subject.length()) {
+ std::swap(subject, query);
+ }
+ // If one of the strings is a suffix of the other.
+ if (subject.length() == 0) {
+ delete ptr;
+ return query.length();
+ }
+
+ // Init buffer.
+ std::iota(buffer.begin(), buffer.begin() + query.length() + 1, 0);
+
+ size_t end_j = 0;
+ for (size_t i = 1; i < subject.length() + 1; ++i) {
+ // temp = i - 1;
+ size_t temp = buffer[0]++;
+ size_t prior_temp = 0;
+
+ // Setup for max distance, which only needs to look in the window
+ // between i-max <= j <= i+max.
+ // The result of the max is positive, but we need the second argument
+ // to be allowed to be negative.
+ const size_t start_j = static_cast<size_t>(std::max(1ll,
static_cast<long long>(i) -
+ DAMLEVLIM_MAX_EDIT_DIST/2));
+ end_j = std::min(static_cast<size_t>(query.length() + 1),
+ static_cast<size_t >(i + DAMLEVLIM_MAX_EDIT_DIST/2));
+ size_t column_min = DAMLEVLIM_MAX_EDIT_DIST; // Sentinels
+ for (size_t j = start_j; j < end_j; ++j) {
+ const size_t p = temp; // p = buffer[j - 1];
+ const size_t r = buffer[j];
+ /*
+ auto min = r;
+ if (p < min) min = p;
+ if (prior_temp < min) min = prior_temp;
+ min++;
+ temp = temp + (subject[i - 1] == query[j - 1] ? 0 : 1);
+ if (min < temp) temp = min;
+ */
+ temp = std::min(std::min(r, // Insertion.
+ p // Deletion.
+ ) + 1,
+
+ std::min(
+ // Transposition.
+ prior_temp + 1,
+ // Substitution.
+ temp + (subject[i - 1] == query[j - 1] ? 0 : 1)
+ )
+ );
+ // Keep track of column minimum.
+ if (temp < column_min) {
+ column_min = temp;
+ }
+ // Record matrix value mat[i-2][j-2].
+ prior_temp = temp;
+ std::swap(buffer[j], temp);
+ }
+ if (column_min >= DAMLEVLIM_MAX_EDIT_DIST) {
+ // There is no way to get an edit distance > column_min.
+ // We can bail out early.
+ delete ptr;
+ return DAMLEVLIM_MAX_EDIT_DIST;
+ }
+ }
+
+ unsigned long ret = buffer[end_j-1];
+ delete ptr;
+ return ret;
+}
+
+// Local Variables:
+// tab-width: 8
+// mode: C++
+// c-basic-offset: 4
+// indent-tabs-mode: t
+// c-file-style: "stroustrup"
+// End:
+// ex: shiftwidth=4 tabstop=8
Property changes on: brlcad/trunk/src/libbu/damlevlim.cpp
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified: brlcad/trunk/src/libged/ged_init.cpp
===================================================================
--- brlcad/trunk/src/libged/ged_init.cpp 2020-09-17 13:29:57 UTC (rev
77170)
+++ brlcad/trunk/src/libged/ged_init.cpp 2020-09-17 19:43:34 UTC (rev
77171)
@@ -88,18 +88,59 @@
if (cmd_invalid) {
return cmd_invalid;
}
- ged_func_ptr c1 = cmd_it->second->i->cmd;
- std::map<std::string, const struct ged_cmd *>::iterator func_it =
ged_cmd_map.find(std::string(func));
- if (func_it == ged_cmd_map.end()) {
- // func not in table, nothing to validate against - return invalid
- return 1;
+
+ if (func) {
+ ged_func_ptr c1 = cmd_it->second->i->cmd;
+ std::map<std::string, const struct ged_cmd *>::iterator func_it =
ged_cmd_map.find(std::string(func));
+ if (func_it == ged_cmd_map.end()) {
+ // func not in table, nothing to validate against - return invalid
+ return 1;
+ }
+ ged_func_ptr c2 = func_it->second->i->cmd;
+ int mismatched_functions = 2;
+ if (c1 == c2) {
+ mismatched_functions = 0;
+ }
+ return mismatched_functions;
}
- ged_func_ptr c2 = func_it->second->i->cmd;
- int mismatched_functions = 2;
- if (c1 == c2) {
- mismatched_functions = 0;
+
+ return 0;
+}
+
+/* Use bu_editdist to see if there is a command name similar to cmd
+ * defined. Return the closest match and the edit distance. (0 indicates
+ * an exact match, -1 an error) */
+int
+ged_cmd_lookup(const char **ncmd, const char *cmd)
+{
+ if (!cmd || !ncmd) {
+ return -1;
}
- return mismatched_functions;
+ unsigned long min_dist = LONG_MAX;
+
+ // On OpenBSD, if the executable was launched in a way that requires
+ // bu_setprogname to find the BRL-CAD root directory the iniital libged
+ // initialization would have failed. If we have no ged_cmds at all this is
+ // probably what happened, so call libged_init again here. By the time we
+ // are calling ged_cmd_valid bu_setprogname should be set and we should be
+ // ready to actually find the commands.
+ if (!ged_cmd_map.size()) {
+ libged_init();
+ }
+
+ const char *ccmd = NULL;
+ std::string scmd(cmd);
+ std::map<std::string, const struct ged_cmd *>::iterator cmd_it;
+ for (cmd_it = ged_cmd_map.begin(); cmd_it != ged_cmd_map.end(); cmd_it++) {
+ unsigned long edist = bu_editdist(cmd, cmd_it->first.c_str(), 0);
+ if (edist < min_dist) {
+ ccmd = (*cmd_it).first.c_str();
+ min_dist = edist;
+ }
+ }
+
+ (*ncmd) = ccmd;
+ return (int)min_dist;
}
size_t
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
_______________________________________________
BRL-CAD Source Commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-commits