This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6277fc783f5 [SPARK-39374][SQL] Improve error message for user 
specified column list
6277fc783f5 is described below

commit 6277fc783f517656be66da76200b332807fd0595
Author: Yuming Wang <yumw...@ebay.com>
AuthorDate: Sun Jun 5 11:26:55 2022 +0300

    [SPARK-39374][SQL] Improve error message for user specified column list
    
    ### What changes were proposed in this pull request?
    
    This PR improves error message for user specified column list. For example:
    ```sql
    create table t1(c1 int, c2 bigint, c3 string) using parquet;
    insert into t1(c1, c2, c4) values(1, 2, 3);
    ```
    Before this PR:
    ```
    Cannot resolve column name c4; line 1 pos 0
    org.apache.spark.sql.AnalysisException: Cannot resolve column name c4; line 
1 pos 0
    ```
    After this PR:
    ```
    [MISSING_COLUMN] Column 'c4' does not exist. Did you mean one of the 
following? [c1, c2, c3]; line 1 pos 0
    org.apache.spark.sql.AnalysisException: [MISSING_COLUMN] Column 'c4' does 
not exist. Did you mean one of the following? [c1, c2, c3]; line 1 pos 0
    ```
    
    ### Why are the changes needed?
    
    Improve error message.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing test.
    
    Closes #36760 from wangyum/SPARK-39374.
    
    Authored-by: Yuming Wang <yumw...@ebay.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala    | 7 ++++---
 .../scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala | 4 ----
 .../src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala   | 4 +++-
 .../src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala  | 3 ++-
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3017fc10dfd..087582a924b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3424,9 +3424,10 @@ class Analyzer(override val catalogManager: 
CatalogManager)
         i.userSpecifiedCols, "in the column list", resolver)
 
       i.userSpecifiedCols.map { col =>
-          i.table.resolve(Seq(col), resolver)
-            .getOrElse(throw 
QueryCompilationErrors.cannotResolveUserSpecifiedColumnsError(
-              col, i.table))
+        i.table.resolve(Seq(col), resolver)
+          .getOrElse(i.failAnalysis(
+            errorClass = "MISSING_COLUMN",
+            messageParameters = Array(col, 
i.table.output.map(_.name).mkString(", "))))
       }
     }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 49f979c0639..27ca00b489d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -402,10 +402,6 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
       "ORDER BY window_ordering) from table")
   }
 
-  def cannotResolveUserSpecifiedColumnsError(col: String, t: TreeNode[_]): 
Throwable = {
-    new AnalysisException(s"Cannot resolve column name $col", t.origin.line, 
t.origin.startPosition)
-  }
-
   def writeTableWithMismatchedColumnsError(
       columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = {
     new AnalysisException("Cannot write to table due to mismatched user 
specified column " +
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index 5bbd74464ca..11abca1ddc7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -166,7 +166,9 @@ trait SQLInsertTestSuite extends QueryTest with 
SQLTestUtils {
       val cols = Seq("c1", "c2", "c3")
       createTable("t1", cols, Seq("int", "long", "string"))
       val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) 
values(1, 2, 3)"))
-      assert(e1.getMessage.contains("Cannot resolve column name c4"))
+      assert(e1.getMessage.contains(
+        "[MISSING_COLUMN] Column 'c4' does not exist. Did you mean one of the 
following? " +
+          "[c1, c2, c3]"))
     }
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 1b70998c642..938d9b87d7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -1277,7 +1277,8 @@ class InsertSuite extends DataSourceTest with 
SharedSparkSession {
         sql("create table t(i boolean default true, s bigint default 42) using 
parquet")
         assert(intercept[AnalysisException] {
           sql("insert into t (I) select true from (select 1)")
-        }.getMessage.contains("Cannot resolve column name I"))
+        }.getMessage.contains(
+          "[MISSING_COLUMN] Column 'I' does not exist. Did you mean one of the 
following? [i, s]"))
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to