This patch updates the edit-distance algorithm in spellcheck.c to
support transpositions as well as additions/deletions/substitutions,
so that a transposition error counts as a distance of 1 rather than 2.
This leads to saner suggestions for such cases.
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
OK for trunk?
gcc/fortran/ChangeLog:
PR other/69968
* misc.c (gfc_closest_fuzzy_match): Update for renaming of
levenshtein_distance to get_edit_distance.
gcc/ChangeLog:
PR other/69968
* spellcheck-tree.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this, and update for underlying renaming.
* spellcheck-tree.h (levenshtein_distance): Rename to...
(get_edit_distance): ...this.
* spellcheck.c (levenshtein_distance): Rename to...
(get_edit_distance): ...this. Convert from Levenshtein distance
to Damerau-Levenshtein distance by supporting transpositions of
adjacent characters. Rename "v1" to "v_next" and "v0" to
"v_one_ago".
(selftest::levenshtein_distance_unit_test_oneway): Rename to...
(selftest::test_edit_distance_unit_test_oneway): ...this, and
update for underlying renaming.
(selftest::levenshtein_distance_unit_test): Rename to...
(selftest::test_get_edit_distance_unit): ...this, and update for
underlying renaming.
(selftest::test_find_closest_string): Add example from PR 69968
where transposition helps
(selftest::test_metric_conditions): Update for renaming.
(selftest::test_metric_conditions): Likewise.
(selftest::spellcheck_c_tests): Likewise.
* spellcheck.h (levenshtein_distance): Rename both overloads to...
(get_edit_distance): ...this.
(best_match::consider): Update for renaming.
gcc/testsuite/ChangeLog:
PR other/69968
* gcc.dg/spellcheck-transposition.c: New test.
---
gcc/fortran/misc.c | 4 +-
gcc/spellcheck-tree.c | 12 +-
gcc/spellcheck-tree.h | 2 +-
gcc/spellcheck.c| 143 +++-
gcc/spellcheck.h| 14 +--
gcc/testsuite/gcc.dg/spellcheck-transposition.c | 20
6 files changed, 124 insertions(+), 71 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/spellcheck-transposition.c
diff --git a/gcc/fortran/misc.c b/gcc/fortran/misc.c
index ec1f548..fb18c5c 100644
--- a/gcc/fortran/misc.c
+++ b/gcc/fortran/misc.c
@@ -286,7 +286,7 @@ get_c_kind(const char *c_kind_name, CInteropKind_t
kinds_table[])
/* For a given name TYPO, determine the best candidate from CANDIDATES
- perusing Levenshtein distance. Frees CANDIDATES before returning. */
+ using get_edit_distance. Frees CANDIDATES before returning. */
const char *
gfc_closest_fuzzy_match (const char *typo, char **candidates)
@@ -299,7 +299,7 @@ gfc_closest_fuzzy_match (const char *typo, char
**candidates)
while (cand && *cand)
{
- edit_distance_t dist = levenshtein_distance (typo, tl, *cand,
+ edit_distance_t dist = get_edit_distance (typo, tl, *cand,
strlen (*cand));
if (dist < best_distance)
{
diff --git a/gcc/spellcheck-tree.c b/gcc/spellcheck-tree.c
index 2a66649..596293e 100644
--- a/gcc/spellcheck-tree.c
+++ b/gcc/spellcheck-tree.c
@@ -27,18 +27,18 @@ along with GCC; see the file COPYING3. If not see
#include "selftest.h"
#include "stringpool.h"
-/* Calculate Levenshtein distance between two identifiers. */
+/* Calculate edit distance between two identifiers. */
edit_distance_t
-levenshtein_distance (tree ident_s, tree ident_t)
+get_edit_distance (tree ident_s, tree ident_t)
{
gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
- return levenshtein_distance (IDENTIFIER_POINTER (ident_s),
- IDENTIFIER_LENGTH (ident_s),
- IDENTIFIER_POINTER (ident_t),
- IDENTIFIER_LENGTH (ident_t));
+ return get_edit_distance (IDENTIFIER_POINTER (ident_s),
+ IDENTIFIER_LENGTH (ident_s),
+ IDENTIFIER_POINTER (ident_t),
+ IDENTIFIER_LENGTH (ident_t));
}
/* Given TARGET, an identifier, and CANDIDATES, a vec of identifiers,
diff --git a/gcc/spellcheck-tree.h b/gcc/spellcheck-tree.h
index 1debef5..4324bd2 100644
--- a/gcc/spellcheck-tree.h
+++ b/gcc/spellcheck-tree.h
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3. If not see
/* spellcheck-tree.c */
extern edit_distance_t
-levenshtein_distance (tree ident_s, tree ident_t);
+get_edit_distance (tree ident_s, tree ident_t);
extern tree
find_closest_identifier (tree target, const auto_vec *candidates);
diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c
index