This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6277fc783f5 [SPARK-39374][SQL] Improve error message for user specified column list 6277fc783f5 is described below commit 6277fc783f517656be66da76200b332807fd0595 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Sun Jun 5 11:26:55 2022 +0300 [SPARK-39374][SQL] Improve error message for user specified column list ### What changes were proposed in this pull request? This PR improves error message for user specified column list. For example: ```sql create table t1(c1 int, c2 bigint, c3 string) using parquet; insert into t1(c1, c2, c4) values(1, 2, 3); ``` Before this PR: ``` Cannot resolve column name c4; line 1 pos 0 org.apache.spark.sql.AnalysisException: Cannot resolve column name c4; line 1 pos 0 ``` After this PR: ``` [MISSING_COLUMN] Column 'c4' does not exist. Did you mean one of the following? [c1, c2, c3]; line 1 pos 0 org.apache.spark.sql.AnalysisException: [MISSING_COLUMN] Column 'c4' does not exist. Did you mean one of the following? [c1, c2, c3]; line 1 pos 0 ``` ### Why are the changes needed? Improve error message. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test. Closes #36760 from wangyum/SPARK-39374. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++++--- .../scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala | 4 ---- .../src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala | 4 +++- .../src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala | 3 ++- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 3017fc10dfd..087582a924b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3424,9 +3424,10 @@ class Analyzer(override val catalogManager: CatalogManager) i.userSpecifiedCols, "in the column list", resolver) i.userSpecifiedCols.map { col => - i.table.resolve(Seq(col), resolver) - .getOrElse(throw QueryCompilationErrors.cannotResolveUserSpecifiedColumnsError( - col, i.table)) + i.table.resolve(Seq(col), resolver) + .getOrElse(i.failAnalysis( + errorClass = "MISSING_COLUMN", + messageParameters = Array(col, i.table.output.map(_.name).mkString(", ")))) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 49f979c0639..27ca00b489d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -402,10 +402,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "ORDER BY window_ordering) from table") } - def cannotResolveUserSpecifiedColumnsError(col: String, t: TreeNode[_]): Throwable = { - new AnalysisException(s"Cannot resolve column name $col", t.origin.line, t.origin.startPosition) - } - def writeTableWithMismatchedColumnsError( columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = { new AnalysisException("Cannot write to table due to mismatched user specified column " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala index 5bbd74464ca..11abca1ddc7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -166,7 +166,9 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val cols = Seq("c1", "c2", "c3") createTable("t1", cols, Seq("int", "long", "string")) val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)")) - assert(e1.getMessage.contains("Cannot resolve column name c4")) + assert(e1.getMessage.contains( + "[MISSING_COLUMN] Column 'c4' does not exist. Did you mean one of the following? " + + "[c1, c2, c3]")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 1b70998c642..938d9b87d7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -1277,7 +1277,8 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { sql("create table t(i boolean default true, s bigint default 42) using parquet") assert(intercept[AnalysisException] { sql("insert into t (I) select true from (select 1)") - }.getMessage.contains("Cannot resolve column name I")) + }.getMessage.contains( + "[MISSING_COLUMN] Column 'I' does not exist. Did you mean one of the following? [i, s]")) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org