Re: [HACKERS] Doing better at HINTing an appropriate column within errorMissingColumn()

Peter Geoghegan Sat, 20 Dec 2014 15:19:15 -0800

On Tue, Dec 16, 2014 at 12:18 PM, Stephen Frost <[email protected]> wrote:
> * Tom Lane ([email protected]) wrote:
>> So my two cents is that when considering a qualified name, this patch
>> should take levenshtein distance across the two components equally.
>> There's no good reason to suppose that typos will attack one name
>> component more (nor less) than the other.
>
> Agreed (since it seems like folks are curious for the opinion's of
> mostly bystanders).
>
> +1 to the above for my part.


Okay, then. Attached patch implements this scheme. It is identical to
the previous revision, except that iff there was an alias specified
and that alias does not match the correct name (alias/table name) of
the RTE currently under consideration, we charge the distance between
the differing aliases rather than a fixed distance of 1.

-- 
Peter Geoghegan

From 91087191ba49e5fed7fdfa43f98deb009c2b3e0e Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <[email protected]>
Date: Wed, 12 Nov 2014 15:31:37 -0800
Subject: [PATCH] Levenshtein distance column HINT

Add a new HINT -- a guess as to what column the user might have intended
to reference, to be shown in various contexts where an
ERRCODE_UNDEFINED_COLUMN error is raised.  The user will see this HINT
when he or she fat-fingers a column reference in an ad-hoc SQL query, or
incorrectly pluralizes or fails to pluralize a column reference, or
incorrectly omits or includes an underscore or other punctuation
character.

The HINT suggests a column in the range table with the lowest
Levenshtein distance, or the tied-for-best pair of matching columns in
the event of there being exactly two equally likely candidates (these
may come from multiple RTEs, or the same RTE).  Limiting to two the
number of cases where multiple equally likely suggestions are all
offered at once (i.e.  giving no hint when the number of equally likely
candidates exceeds two) is a measure against suggestions that are of low
quality in an absolute sense.

A further, final measure is taken against suggestions that are of low
absolute quality:  If the distance exceeds a normalized distance
threshold, no suggestion is given.
---
 src/backend/parser/parse_expr.c           |   9 +-
 src/backend/parser/parse_func.c           |   2 +-
 src/backend/parser/parse_relation.c       | 325 +++++++++++++++++++++++++++---
 src/backend/utils/adt/levenshtein.c       |   9 +
 src/include/parser/parse_relation.h       |  20 +-
 src/test/regress/expected/alter_table.out |   3 +
 src/test/regress/expected/join.out        |  38 ++++
 src/test/regress/sql/join.sql             |  24 +++
 8 files changed, 392 insertions(+), 38 deletions(-)

diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 4a8aaf6..a77a3a0 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -621,7 +621,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field2);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
@@ -666,7 +667,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field3);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
@@ -724,7 +726,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
 				colname = strVal(field4);
 
 				/* Try to identify as a column of the RTE */
-				node = scanRTEForColumn(pstate, rte, colname, cref->location);
+				node = scanRTEForColumn(pstate, rte, colname, cref->location,
+										NULL);
 				if (node == NULL)
 				{
 					/* Try it as a function call on the whole row */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 9ebd3fd..472e15e 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -1779,7 +1779,7 @@ ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg,
 									 ((Var *) first_arg)->varno,
 									 ((Var *) first_arg)->varlevelsup);
 		/* Return a Var if funcname matches a column, else NULL */
-		return scanRTEForColumn(pstate, rte, funcname, location);
+		return scanRTEForColumn(pstate, rte, funcname, location, NULL);
 	}
 
 	/*
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index 478584d..e6adee1 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include <ctype.h>
+#include <limits.h>
 
 #include "access/htup_details.h"
 #include "access/sysattr.h"
@@ -520,6 +521,69 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
 }
 
 /*
+ * updateFuzzyAttrMatchState
+ *	  Using Levenshtein distance, consider if column is best fuzzy match.
+ */
+static void
+updateFuzzyAttrMatchState(FuzzyAttrMatchState *fuzzystate, const char *actual,
+						  const char *match, int attnum)
+{
+	int		columndistance;
+
+	/*
+	 * Outright reject dropped columns, which can appear here with apparent
+	 * empty actual names, per remarks within scanRTEForColumn().
+	 */
+	if (strcmp(actual, "") == 0)
+		columndistance = INT_MAX;
+	else
+		/* Use standard costs for Levenshtein distance */
+		columndistance = varstr_levenshtein_less_equal(actual, strlen(actual),
+													   match, strlen(match),
+													   1, 1, 1,
+													   fuzzystate->distance);
+
+	if (columndistance < fuzzystate->distance)
+	{
+		/* Store new lowest observed distance for RTE */
+		fuzzystate->distance = columndistance;
+		fuzzystate->first = attnum;
+		fuzzystate->second = InvalidAttrNumber;
+	}
+	else if (columndistance == fuzzystate->distance)
+	{
+		/*
+		 * This match distance may equal a prior match within this same
+		 * range table.  When that happens, the prior match may also be
+		 * given, but only if there is no more than two equally distant
+		 * matches from the RTE (in turn, our caller will only accept
+		 * two equally distant matches overall).
+		 */
+		if (AttributeNumberIsValid(fuzzystate->second))
+		{
+			/* Too many RTE-level matches */
+			fuzzystate->first = fuzzystate->second = InvalidAttrNumber;
+			/* Clearly, distance is too low a bar (for *any* RTE) */
+			fuzzystate->distance = columndistance - 1;
+		}
+		else if (AttributeNumberIsValid(fuzzystate->first))
+		{
+			/* Record as provisional second match for RTE */
+			fuzzystate->second = attnum;
+		}
+		else
+		{
+			/*
+			 * Record as provisional first match (this can occasionally
+			 * occur because previous lowest distance was "too low a
+			 * bar", rather than being associated with a real match)
+			 */
+			fuzzystate->first = attnum;
+		}
+	}
+}
+
+/*
  * scanRTEForColumn
  *	  Search the column names of a single RTE for the given name.
  *	  If found, return an appropriate Var node, else return NULL.
@@ -527,10 +591,22 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
  *
  * Side effect: if we find a match, mark the RTE as requiring read access
  * for the column.
+ *
+ * For those callers that will settle for a fuzzy match (for the purposes of
+ * building diagnostic messages), we match the column attribute whose name has
+ * the lowest Levenshtein distance from colname.  Such callers should not rely
+ * on the return value (even when there is an exact match), nor should they
+ * expect the usual side effect (unless there is an exact match).  This hardly
+ * matters in practice, since an error is imminent.
+ *
+ * If there are two or more attributes in the range table entry tied for
+ * closest, or if there are no matches, accurately report the shortest distance
+ * found overall while not setting a closest attribute.  Note that we never
+ * consider system column names when performing fuzzy matching.
  */
 Node *
 scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
-				 int location)
+				 int location, FuzzyAttrMatchState *fuzzystate)
 {
 	Node	   *result = NULL;
 	int			attnum = 0;
@@ -548,12 +624,16 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
 	 * Should this somehow go wrong and we try to access a dropped column,
 	 * we'll still catch it by virtue of the checks in
 	 * get_rte_attribute_type(), which is called by make_var().  That routine
-	 * has to do a cache lookup anyway, so the check there is cheap.
+	 * has to do a cache lookup anyway, so the check there is cheap.  Callers
+	 * interested in finding match with shortest distance need to defend
+	 * against this directly, though.
 	 */
 	foreach(c, rte->eref->colnames)
 	{
+		const char *attcolname = strVal(lfirst(c));
+
 		attnum++;
-		if (strcmp(strVal(lfirst(c)), colname) == 0)
+		if (strcmp(attcolname, colname) == 0)
 		{
 			if (result)
 				ereport(ERROR,
@@ -566,6 +646,14 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
 			markVarForSelectPriv(pstate, var, rte);
 			result = (Node *) var;
 		}
+
+		/*
+		 * Consider updating fuzzy state passed by callers concerned with
+		 * diagnostic messages.  Fuzzy state will be set for the best (or joint
+		 * best) matching colname observed so far.
+		 */
+		if (fuzzystate != NULL)
+			updateFuzzyAttrMatchState(fuzzystate, attcolname, colname, attnum);
 	}
 
 	/*
@@ -642,7 +730,8 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
 				continue;
 
 			/* use orig_pstate here to get the right sublevels_up */
-			newresult = scanRTEForColumn(orig_pstate, rte, colname, location);
+			newresult = scanRTEForColumn(orig_pstate, rte, colname, location,
+										 NULL);
 
 			if (newresult)
 			{
@@ -668,8 +757,15 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
 
 /*
  * searchRangeTableForCol
- *	  See if any RangeTblEntry could possibly provide the given column name.
- *	  If so, return a pointer to the RangeTblEntry; else return NULL.
+ *	  See if any RangeTblEntry could possibly provide the given column name (or
+ *	  find the best match available).  Returns state with relevant details.
+ *
+ * Column name may be matched fuzzily;  we provide the closet column(s) if
+ * there was not an exact match.  Caller can depend on returned state to find
+ * right attribute.  If first attribute is InvalidAttrNumber, but corresponding
+ * RTE is set, that indicates an exact match (i.e. column name is present, but
+ * presumably not visible).  However, if the wrong alias was specified by user,
+ * the first match attribute *is* set.
  *
  * This is different from colNameToVar in that it considers every entry in
  * the ParseState's rangetable(s), not only those that are currently visible
@@ -678,10 +774,16 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
  * matches, but only one will be returned).  This must be used ONLY as a
  * heuristic in giving suitable error messages.  See errorMissingColumn.
  */
-static RangeTblEntry *
-searchRangeTableForCol(ParseState *pstate, char *colname, int location)
+static FuzzyAttrMatchState *
+searchRangeTableForCol(ParseState *pstate, const char *alias, char *colname,
+					   int location)
 {
 	ParseState *orig_pstate = pstate;
+	FuzzyAttrMatchState *state = palloc(sizeof(FuzzyAttrMatchState));
+
+	state->distance = INT_MAX;
+	state->rsecond = state->rfirst = NULL;
+	state->second = state->first = InvalidAttrNumber;
 
 	while (pstate != NULL)
 	{
@@ -689,15 +791,136 @@ searchRangeTableForCol(ParseState *pstate, char *colname, int location)
 
 		foreach(l, pstate->p_rtable)
 		{
-			RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+			RangeTblEntry	   *rte = (RangeTblEntry *) lfirst(l);
+			FuzzyAttrMatchState	rtestate;
+			bool				wrongalias;
 
-			if (scanRTEForColumn(orig_pstate, rte, colname, location))
-				return rte;
+			/*
+			 * Typically, it is not useful to look for matches within join
+			 * RTEs;  they effectively duplicate other RTEs for our purposes,
+			 * and if a match is chosen from a join RTE, an unhelpful alias is
+			 * displayed in the final diagnostic message.
+			 */
+			if (rte->rtekind == RTE_JOIN)
+				continue;
+
+			/*
+			 * Get single best match (or pair of joint best matches, or no
+			 * match) from each RTE -- the best two columns ultimately
+			 * suggested may or may not both be from the same RTE.
+			 *
+			 * Initialize RTE's distance to INT_MAX (and not RT state's current
+			 * lowest distance) to ensure that per-RTE penalties do not distort
+			 * per-RT costing.
+			 */
+			rtestate.distance = INT_MAX;
+			rtestate.rsecond = rtestate.rfirst = NULL;
+			rtestate.second = rtestate.first = InvalidAttrNumber;
+			scanRTEForColumn(orig_pstate, rte, colname, location, &rtestate);
+
+			/* Avoid totally non-matching RTEs (e.g. no RTE attributes) */
+			if (!AttributeNumberIsValid(rtestate.first))
+				continue;
+
+			/* Was alias provided by user that does not match entry's alias? */
+			wrongalias = (alias && strcmp(alias, rte->eref->aliasname) != 0);
+
+			if (rtestate.distance == 0)
+			{
+				/*
+				 * Exact match (for "wrong alias" or "wrong level" cases).
+				 *
+				 * Only consider first element for RTE, because there can only
+				 * be one exact match -- it doesn't seem worth considering the
+				 * case where there are multiple exact matches, so we're done.
+				 */
+				state->rfirst = rte;
+				state->first = wrongalias? rtestate.first : InvalidAttrNumber;
+				state->rsecond = NULL;
+				state->second = InvalidAttrNumber;
+
+				return state;
+			}
+
+			/*
+			 * Charge extra (for inexact matches only) when an alias was
+			 * specified that differs from what might have been used to
+			 * correctly qualify this RTE's closest column
+			 */
+			if (wrongalias)
+				rtestate.distance += varstr_levenshtein(alias,
+														strlen(alias),
+														rte->eref->aliasname,
+														strlen(rte->eref->aliasname),
+														1, 1, 1);
+
+			if (rtestate.distance < state->distance)
+			{
+				/*
+				 * New, uncontested best match RTE, with 1 or 2 best match
+				 * columns
+				 */
+				state->distance = rtestate.distance;
+
+				state->rfirst = rte;
+				state->first = rtestate.first;
+				state->rsecond =
+					AttributeNumberIsValid(rtestate.second)? rte: NULL;
+				state->second = rtestate.second;
+			}
+			else if (rtestate.distance == state->distance)
+			{
+				/*
+				 * Can't have 3 or more matches at same distance.
+				 *
+				 * It's useful to provide two matches for the common case where
+				 * two range tables have single equidistant candidates, as when
+				 * an unqualified (and therefore would-be ambiguous) column
+				 * name is specified which is also misspelled by the user --
+				 * there is probably a foreign key relationship between
+				 * tables/RTEs.  It's also possible to usefully give two column
+				 * suggestions originating from the same RTE, which may be
+				 * useful when an alias strongly suggests that RTE, while there
+				 * are 2 somewhat close matches.
+				 *
+				 * However, when there are more than 2 equally distant matches,
+				 * that's probably because the matches are not useful at all,
+				 * so don't suggest anything.
+				 */
+				if (AttributeNumberIsValid(state->second) ||
+					AttributeNumberIsValid(rtestate.second))
+				{
+					/* 3 or more equidistant matches -- RTE is uninteresting */
+					state->rsecond = state->rfirst = NULL;
+					state->second = state->first = InvalidAttrNumber;
+				}
+				else
+				{
+					/* Record as provisional second match for RT */
+					Assert(state->rfirst != NULL &&
+						   AttributeNumberIsValid(state->first));
+					Assert(state->rsecond == NULL &&
+						   !AttributeNumberIsValid(state->second));
+					state->rsecond = rte;
+					state->second = rtestate.first;
+				}
+			}
 		}
 
 		pstate = pstate->parentParseState;
 	}
-	return NULL;
+
+	/*
+	 * Final, absolute quality test:  distance must be less than a normalized
+	 * threshold in order to avoid completely ludicrous suggestions
+	 */
+	if (state->distance > strlen(colname) / 2)
+	{
+		state->rsecond = state->rfirst = NULL;
+		state->second = state->first = InvalidAttrNumber;
+	}
+
+	return state;
 }
 
 /*
@@ -2862,34 +3085,70 @@ void
 errorMissingColumn(ParseState *pstate,
 				   char *relname, char *colname, int location)
 {
-	RangeTblEntry *rte;
+	FuzzyAttrMatchState	   *state;
+	char				   *closestfirst = NULL;
 
 	/*
-	 * If relname was given, just play dumb and report it.  (In practice, a
-	 * bad qualification name should end up at errorMissingRTE, not here, so
-	 * no need to work hard on this case.)
+	 * Search the entire rtable looking for possible matches.  If we find one,
+	 * emit a hint about it.
+	 *
+	 * TODO: improve this code (and also errorMissingRTE) to mention using
+	 * LATERAL if appropriate.
 	 */
-	if (relname)
-		ereport(ERROR,
-				(errcode(ERRCODE_UNDEFINED_COLUMN),
-				 errmsg("column %s.%s does not exist", relname, colname),
-				 parser_errposition(pstate, location)));
+	state = searchRangeTableForCol(pstate, relname, colname, location);
 
 	/*
-	 * Otherwise, search the entire rtable looking for possible matches.  If
-	 * we find one, emit a hint about it.
+	 * In practice a bad qualification name should end up at errorMissingRTE,
+	 * not here, so no need to work hard on this case.
 	 *
-	 * TODO: improve this code (and also errorMissingRTE) to mention using
-	 * LATERAL if appropriate.
+	 * Extract closest col string for best match, if any.
+	 *
+	 * Infer an exact match referenced despite not being visible from the fact
+	 * that an attribute number was not present in state passed back -- this is
+	 * what is reported when !closestfirst.  There might also be an exact match
+	 * that was qualified with an incorrect alias, in which case closestfirst
+	 * will be set (so hint is the same as generic fuzzy case).
 	 */
-	rte = searchRangeTableForCol(pstate, colname, location);
-
-	ereport(ERROR,
-			(errcode(ERRCODE_UNDEFINED_COLUMN),
-			 errmsg("column \"%s\" does not exist", colname),
-			 rte ? errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
-						   colname, rte->eref->aliasname) : 0,
-			 parser_errposition(pstate, location)));
+	if (state->rfirst && AttributeNumberIsValid(state->first))
+		closestfirst = strVal(list_nth(state->rfirst->eref->colnames,
+									   state->first - 1));
+
+	if (!state->rsecond)
+	{
+		/*
+		 * Handle case where there is zero or one column suggestions to hint,
+		 * including exact matches referenced but not visible.
+		 */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 relname?
+				 errmsg("column %s.%s does not exist", relname, colname):
+				 errmsg("column \"%s\" does not exist", colname),
+				 state->rfirst? closestfirst?
+				 errhint("Perhaps you meant to reference the column \"%s\".\"%s\".",
+						 state->rfirst->eref->aliasname, closestfirst):
+				 errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
+						 colname, state->rfirst->eref->aliasname): 0,
+				 parser_errposition(pstate, location)));
+	}
+	else
+	{
+		/* Handle case where there are two equally useful column hints */
+		char				   *closestsecond;
+
+		closestsecond = strVal(list_nth(state->rsecond->eref->colnames,
+										state->second - 1));
+
+		ereport(ERROR,
+				(errcode(ERRCODE_UNDEFINED_COLUMN),
+				 relname?
+				 errmsg("column %s.%s does not exist", relname, colname):
+				 errmsg("column \"%s\" does not exist", colname),
+				 errhint("Perhaps you meant to reference the column \"%s\".\"%s\" or the column \"%s\".\"%s\".",
+						 state->rfirst->eref->aliasname, closestfirst,
+						 state->rsecond->eref->aliasname, closestsecond),
+				 parser_errposition(pstate, location)));
+	}
 }
 
 
diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
index a8670e9..8d565c6 100644
--- a/src/backend/utils/adt/levenshtein.c
+++ b/src/backend/utils/adt/levenshtein.c
@@ -95,6 +95,15 @@ varstr_levenshtein(const char *source, int slen, const char *target, int tlen,
 #define STOP_COLUMN m
 #endif
 
+	/*
+	 * A common use for Levenshtein distance is to match attributes when building
+	 * diagnostic, user-visible messages.  Restrict the size of
+	 * MAX_LEVENSHTEIN_STRLEN at compile time so that this is guaranteed to
+	 * work.
+	 */
+	StaticAssertStmt(NAMEDATALEN <= MAX_LEVENSHTEIN_STRLEN,
+					 "Levenshtein hinting mechanism restricts NAMEDATALEN");
+
 	m = pg_mbstrlen_with_len(source, slen);
 	n = pg_mbstrlen_with_len(target, tlen);
 
diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h
index d8b9493..b587abc 100644
--- a/src/include/parser/parse_relation.h
+++ b/src/include/parser/parse_relation.h
@@ -16,6 +16,24 @@
 
 #include "parser/parse_node.h"
 
+
+/*
+ * Support for fuzzily matching column.
+ *
+ * This is for building diagnostic messages, where non-exact matching
+ * attributes are suggested to the user.  The struct's fields may be facets of
+ * a particular RTE, or of an entire range table, depending on context.
+ */
+typedef struct
+{
+	int				distance;	/* Weighted distance (lowest so far) */
+	RangeTblEntry  *rfirst;		/* RTE of first */
+	AttrNumber		first;		/* Closest attribute so far */
+	RangeTblEntry  *rsecond;	/* RTE of second */
+	AttrNumber		second;		/* Second closest attribute so far */
+} FuzzyAttrMatchState;
+
+
 extern RangeTblEntry *refnameRangeTblEntry(ParseState *pstate,
 					 const char *schemaname,
 					 const char *refname,
@@ -35,7 +53,7 @@ extern RangeTblEntry *GetRTEByRangeTablePosn(ParseState *pstate,
 extern CommonTableExpr *GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte,
 			 int rtelevelsup);
 extern Node *scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte,
-				 char *colname, int location);
+				 char *colname, int location, FuzzyAttrMatchState *fuzzystate);
 extern Node *colNameToVar(ParseState *pstate, char *colname, bool localonly,
 			 int location);
 extern void markVarForSelectPriv(ParseState *pstate, Var *var,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index d233710..51db1b6 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -536,6 +536,7 @@ create table atacc1 ( test int );
 -- add a check constraint (fails)
 alter table atacc1 add constraint atacc_test1 check (test1>3);
 ERROR:  column "test1" does not exist
+HINT:  Perhaps you meant to reference the column "atacc1"."test".
 drop table atacc1;
 -- something a little more complicated
 create table atacc1 ( test int, test2 int, test3 int);
@@ -1342,6 +1343,7 @@ select f1 from c1;
 ERROR:  column "f1" does not exist
 LINE 1: select f1 from c1;
                ^
+HINT:  Perhaps you meant to reference the column "c1"."f2".
 drop table p1 cascade;
 NOTICE:  drop cascades to table c1
 create table p1 (f1 int, f2 int);
@@ -1355,6 +1357,7 @@ select f1 from c1;
 ERROR:  column "f1" does not exist
 LINE 1: select f1 from c1;
                ^
+HINT:  Perhaps you meant to reference the column "c1"."f2".
 drop table p1 cascade;
 NOTICE:  drop cascades to table c1
 create table p1 (f1 int, f2 int);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 2501184..1bb810d 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2222,6 +2222,12 @@ select * from t1 left join t2 on (t1.a = t2.a);
  200 | 1000 | 200 | 2001
 (5 rows)
 
+-- Test matching of column name with wrong alias
+select t1.x from t1 join t3 on (t1.a = t3.x);
+ERROR:  column t1.x does not exist
+LINE 1: select t1.x from t1 join t3 on (t1.a = t3.x);
+               ^
+HINT:  Perhaps you meant to reference the column "t3"."x".
 --
 -- regression test for 8.1 merge right join bug
 --
@@ -3415,6 +3421,38 @@ select * from
 (0 rows)
 
 --
+-- Test hints given on incorrect column references are useful
+--
+select t1.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+ERROR:  column t1.uunique1 does not exist
+LINE 1: select t1.uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t1"."unique1".
+select t2.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+ERROR:  column t2.uunique1 does not exist
+LINE 1: select t2.uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t2"."unique1".
+select uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+ERROR:  column "uunique1" does not exist
+LINE 1: select uunique1 from
+               ^
+HINT:  Perhaps you meant to reference the column "t1"."unique1" or the column "t2"."unique1".
+--
+-- Take care to reference the correct RTE
+--
+select atts.relid::regclass, s.* from pg_stats s join
+    pg_attribute a on s.attname = a.attname and s.tablename =
+    a.attrelid::regclass::text join (select unnest(indkey) attnum,
+    indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+    schemaname != 'pg_catalog';
+ERROR:  column atts.relid does not exist
+LINE 1: select atts.relid::regclass, s.* from pg_stats s join
+               ^
+--
 -- Test LATERAL
 --
 select unique2, x.*
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 718e1d9..ca7f966 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -397,6 +397,10 @@ insert into t2a values (200, 2001);
 
 select * from t1 left join t2 on (t1.a = t2.a);
 
+-- Test matching of column name with wrong alias
+
+select t1.x from t1 join t3 on (t1.a = t3.x);
+
 --
 -- regression test for 8.1 merge right join bug
 --
@@ -1051,6 +1055,26 @@ select * from
   int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
 
 --
+-- Test hints given on incorrect column references are useful
+--
+
+select t1.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+select t2.uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+select uunique1 from
+  tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+
+--
+-- Take care to reference the correct RTE
+--
+
+select atts.relid::regclass, s.* from pg_stats s join
+    pg_attribute a on s.attname = a.attname and s.tablename =
+    a.attrelid::regclass::text join (select unnest(indkey) attnum,
+    indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+    schemaname != 'pg_catalog';
+--
 -- Test LATERAL
 --
 
-- 
1.9.1

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Doing better at HINTing an appropriate column within errorMissingColumn()

Reply via email to