On Fri, 2015-11-13 at 07:57 +0100, Marek Polacek wrote: > Probably coming too late, sorry.
> On Thu, Nov 12, 2015 at 09:08:36PM -0500, David Malcolm wrote: > > index 4335a87..eb4e1fc 100644 > > --- a/gcc/c/c-typeck.c > > +++ b/gcc/c/c-typeck.c > > @@ -47,6 +47,7 @@ along with GCC; see the file COPYING3. If not see > > #include "c-family/c-ubsan.h" > > #include "cilk.h" > > #include "gomp-constants.h" > > +#include "spellcheck.h" > > > > /* Possible cases of implicit bad conversions. Used to select > > diagnostic messages in convert_for_assignment. */ > > @@ -2242,6 +2243,72 @@ lookup_field (tree type, tree component) > > return tree_cons (NULL_TREE, field, NULL_TREE); > > } > > > > +/* Recursively append candidate IDENTIFIER_NODEs to CANDIDATES. */ > > + > > +static void > > +lookup_field_fuzzy_find_candidates (tree type, tree component, > > + vec<tree> *candidates) > > +{ > > + tree field; > > + for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) > > I'd prefer declaring field in the for loop, so > for (tree field = TYPE_FIELDS... > > > + && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE > > + || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE)) > > This is RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)). I based this code on the code in lookup_field right above it; I copied-and-pasted that conditional, so presumably it should also be changed in lookup_field (which has the condition twice)? FWIW I notice RECORD_OR_UNION_TYPE_P also covers QUAL_UNION_TYPE. /* Nonzero if TYPE is a record or union type. */ #define RECORD_OR_UNION_TYPE_P(TYPE) \ (TREE_CODE (TYPE) == RECORD_TYPE \ || TREE_CODE (TYPE) == UNION_TYPE \ || TREE_CODE (TYPE) == QUAL_UNION_TYPE) FWIW I've made the change in the attached patch (both to the new function, and to lookup_field). > > + { > > + lookup_field_fuzzy_find_candidates (TREE_TYPE (field), > > + component, > > + candidates); > > + } > > Lose the brackets around a single statement. Done. > > + if (DECL_NAME (field)) > > + candidates->safe_push (DECL_NAME (field)); > > + } > > +} > > + > > +/* Like "lookup_field", but find the closest matching IDENTIFIER_NODE, > > + rather than returning a TREE_LIST for an exact match. */ > > + > > +static tree > > +lookup_field_fuzzy (tree type, tree component) > > +{ > > + gcc_assert (TREE_CODE (component) == IDENTIFIER_NODE); > > + > > + /* First, gather a list of candidates. */ > > + auto_vec <tree> candidates; > > + > > + lookup_field_fuzzy_find_candidates (type, component, > > + &candidates); > > + > > + /* Now determine which is closest. */ > > + int i; > > + tree identifier; > > + tree best_identifier = NULL; > > NULL_TREE Fixed. > > + edit_distance_t best_distance = MAX_EDIT_DISTANCE; > > + FOR_EACH_VEC_ELT (candidates, i, identifier) > > + { > > + gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE); > > + edit_distance_t dist = levenshtein_distance (component, identifier); > > + if (dist < best_distance) > > + { > > + best_distance = dist; > > + best_identifier = identifier; > > + } > > + } > > + > > + /* If more than half of the letters were misspelled, the suggestion is > > + likely to be meaningless. */ > > + if (best_identifier) > > + { > > + unsigned int cutoff = MAX (IDENTIFIER_LENGTH (component), > > + IDENTIFIER_LENGTH (best_identifier)) / 2; > > + if (best_distance > cutoff) > > + return NULL; > > NULL_TREE Fixed. > > +/* The Levenshtein distance is an "edit-distance": the minimal > > + number of one-character insertions, removals or substitutions > > + that are needed to change one string into another. > > + > > + This implementation uses the Wagner-Fischer algorithm. */ > > + > > +static edit_distance_t > > +levenshtein_distance (const char *s, int len_s, > > + const char *t, int len_t) > > +{ > > + const bool debug = false; > > + > > + if (debug) > > + { > > + printf ("s: \"%s\" (len_s=%i)\n", s, len_s); > > + printf ("t: \"%s\" (len_t=%i)\n", t, len_t); > > + } > > Did you leave this debug stuff here intentionally? I find it useful, but I believe it's against our policy, so I've deleted it in the attached patch. > > + /* Build the rest of the row by considering neighbours to > > + the north, west and northwest. */ > > + for (int j = 0; j < len_s; j++) > > + { > > + edit_distance_t cost = (s[j] == t[i] ? 0 : 1); > > + edit_distance_t deletion = v1[j] + 1; > > + edit_distance_t insertion = v0[j + 1] + 1; > > The formatting doesn't look right here. It's correct; it's "diff" inserting two spaces before a tab combined with our mixed spaces+tab convention: the "for" is at column 6 (6 spaces), whereas the other lines are at column 8 (1 tab), which looks weird in a diff. Patch attached; only tested lightly so far (compiles, and passes spellcheck subset of tests). OK for trunk if it passes bootstrap®rtest?
>From b8ed3cbe9cc000416941e0108036f24f4483cdb0 Mon Sep 17 00:00:00 2001 From: David Malcolm <dmalc...@redhat.com> Date: Fri, 13 Nov 2015 07:22:16 -0500 Subject: [PATCH] Cleanups of spellchecking code gcc/c/ChangeLog: * c-typeck.c (lookup_field): Use RECORD_OR_UNION_TYPE_P in two places. (lookup_field_fuzzy_find_candidates): Use RECORD_OR_UNION_TYPE_P; formatting cleanups. (lookup_field_fuzzy): Use NULL_TREE rather than NULL. gcc/ChangeLog: * spellcheck.c (levenshtein_distance): Remove debug code. --- gcc/c/c-typeck.c | 24 +++++++++--------------- gcc/spellcheck.c | 24 ------------------------ 2 files changed, 9 insertions(+), 39 deletions(-) diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c index eb4e1fc..b084ca5 100644 --- a/gcc/c/c-typeck.c +++ b/gcc/c/c-typeck.c @@ -2166,8 +2166,7 @@ lookup_field (tree type, tree component) while (DECL_NAME (field_array[bot]) == NULL_TREE) { field = field_array[bot++]; - if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE - || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE) + if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field))) { tree anon = lookup_field (TREE_TYPE (field), component); @@ -2213,8 +2212,7 @@ lookup_field (tree type, tree component) for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (DECL_NAME (field) == NULL_TREE - && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE - || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE)) + && RECORD_OR_UNION_TYPE_P (TREE_TYPE (field))) { tree anon = lookup_field (TREE_TYPE (field), component); @@ -2249,17 +2247,13 @@ static void lookup_field_fuzzy_find_candidates (tree type, tree component, vec<tree> *candidates) { - tree field; - for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) + for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (DECL_NAME (field) == NULL_TREE - && (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE - || TREE_CODE (TREE_TYPE (field)) == UNION_TYPE)) - { - lookup_field_fuzzy_find_candidates (TREE_TYPE (field), - component, - candidates); - } + && RECORD_OR_UNION_TYPE_P (TREE_TYPE (field))) + lookup_field_fuzzy_find_candidates (TREE_TYPE (field), + component, + candidates); if (DECL_NAME (field)) candidates->safe_push (DECL_NAME (field)); @@ -2283,7 +2277,7 @@ lookup_field_fuzzy (tree type, tree component) /* Now determine which is closest. */ int i; tree identifier; - tree best_identifier = NULL; + tree best_identifier = NULL_TREE; edit_distance_t best_distance = MAX_EDIT_DISTANCE; FOR_EACH_VEC_ELT (candidates, i, identifier) { @@ -2303,7 +2297,7 @@ lookup_field_fuzzy (tree type, tree component) unsigned int cutoff = MAX (IDENTIFIER_LENGTH (component), IDENTIFIER_LENGTH (best_identifier)) / 2; if (best_distance > cutoff) - return NULL; + return NULL_TREE; } return best_identifier; diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c index 32854cf..6432e385 100644 --- a/gcc/spellcheck.c +++ b/gcc/spellcheck.c @@ -34,14 +34,6 @@ edit_distance_t levenshtein_distance (const char *s, int len_s, const char *t, int len_t) { - const bool debug = false; - - if (debug) - { - printf ("s: \"%s\" (len_s=%i)\n", s, len_s); - printf ("t: \"%s\" (len_t=%i)\n", t, len_t); - } - if (len_s == 0) return len_t; if (len_t == 0) @@ -67,14 +59,6 @@ levenshtein_distance (const char *s, int len_s, /* Build successive rows. */ for (int i = 0; i < len_t; i++) { - if (debug) - { - printf ("i:%i v0 = ", i); - for (int j = 0; j < len_s + 1; j++) - printf ("%i ", v0[j]); - printf ("\n"); - } - /* The initial column is for the case of an empty source string; we can reach prefixes of the target string of length i by inserting i characters. */ @@ -98,14 +82,6 @@ levenshtein_distance (const char *s, int len_s, v0[j] = v1[j]; } - if (debug) - { - printf ("final v1 = "); - for (int j = 0; j < len_s + 1; j++) - printf ("%i ", v1[j]); - printf ("\n"); - } - edit_distance_t result = v1[len_s]; delete[] v0; delete[] v1; -- 1.8.5.3