This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 0ef7afe0dc3 [SPARK-41931][SQL] Better error message for incomplete 
complex type definition
0ef7afe0dc3 is described below

commit 0ef7afe0dc3723b97b750c071a908f363e514a26
Author: Runyao Chen <runyao.c...@databricks.com>
AuthorDate: Fri Jan 27 18:06:32 2023 +0300

    [SPARK-41931][SQL] Better error message for incomplete complex type 
definition
    
    ### What changes were proposed in this pull request?
    
    This PR improves error messages for `ARRAY` / `MAP` / `STRUCT` types 
without element type specification. A new error class 
`INCOMPLETE_TYPE_DEFINITION` with subclasses (`ARRAY`, `MAP`, and `STRUCT`) is 
introduced.
    
    **Details**
    
    In the case where we do `CAST AS` or `CREATE` a complex type without 
specifying its element type,
    e.g.
    ```
    CREATE TABLE t (col ARRAY)
    ```
    `[UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"` error would be 
thrown, while we do support the `ARRAY` type and just require it to be typed.
    
    This PR proposes a better error message like
    ```
    The definition of `ARRAY` type is incomplete. You must provide an element 
type. For example: `ARRAY<elementType>`.
    ```
    
    ### Why are the changes needed?
    
    The previous error message for incomplete complex types is confusing. A 
`UNSUPPORTED_DATATYPE` error would be thrown, while we do support complex 
types. We just require complex types to have their element types specified. We 
need a clear error message with an example in this case.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, this PR changes the error message which is user-facing.
    
    Error message before this PR:
    ```
    spark-sql> SELECT CAST(array(1, 2, 3) AS ARRAY);
    
    [UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"(line 1, pos 30)
    ```
    
    Error message after this PR:
    ```
    [INCOMPLETE_TYPE_DEFINITION.ARRAY] Incomplete complex type: The definition 
of `ARRAY` type is incomplete. You must provide an element type. For example: 
`ARRAY<elementType>`.
    ```
    Similarly for MAP and STRUCT types.
    
    ### How was this patch tested?
    
    Added unit tests covering CAST and CREATE with ARRAY / STRUCT / MAP types 
and their nested combinations.
    
    Closes #39711 from RunyaoChen/better_error_msg_nested_type.
    
    Lead-authored-by: Runyao Chen <runyao.c...@databricks.com>
    Co-authored-by: RunyaoChen <runyao.c...@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 23 +++++++
 .../spark/sql/catalyst/parser/AstBuilder.scala     |  2 +
 .../spark/sql/errors/QueryParsingErrors.scala      | 21 +++++++
 .../spark/sql/errors/QueryParsingErrorsSuite.scala | 72 ++++++++++++++++++++++
 4 files changed, 118 insertions(+)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index e6876751a22..ae766de3e20 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -592,6 +592,29 @@
       "Detected an incompatible DataSourceRegister. Please remove the 
incompatible library from classpath or upgrade it. Error: <message>"
     ]
   },
+  "INCOMPLETE_TYPE_DEFINITION" : {
+    "message" : [
+      "Incomplete complex type:"
+    ],
+    "subClass" : {
+      "ARRAY" : {
+        "message" : [
+          "The definition of \"ARRAY\" type is incomplete. You must provide an 
element type. For example: \"ARRAY<elementType>\"."
+        ]
+      },
+      "MAP" : {
+        "message" : [
+          "The definition of \"MAP\" type is incomplete. You must provide a 
key type and a value type. For example: \"MAP<TIMESTAMP, INT>\"."
+        ]
+      },
+      "STRUCT" : {
+        "message" : [
+          "The definition of \"STRUCT\" type is incomplete. You must provide 
at least one field type. For example: \"STRUCT<Field1: INT>\"."
+        ]
+      }
+    },
+    "sqlState" : "42K01"
+  },
   "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : {
     "message" : [
       "You may get a different result due to the upgrading to"
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c6e50f3f514..d2a1cb1eb16 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2889,6 +2889,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] 
with SQLConfHelper wit
       case ("interval", Nil) => CalendarIntervalType
       case (dt @ ("character" | "char" | "varchar"), Nil) =>
         throw QueryParsingErrors.charTypeMissingLengthError(dt, ctx)
+      case (dt @ ("array" | "struct" | "map"), Nil) =>
+        throw QueryParsingErrors.nestedTypeMissingElementTypeError(dt, ctx)
       case (dt, params) =>
         val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else 
dt
         throw QueryParsingErrors.dataTypeUnsupportedError(dtStr, ctx)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 29766251abd..e54bbb9c9d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -292,6 +292,27 @@ private[sql] object QueryParsingErrors extends 
QueryErrorsBase {
       ctx)
   }
 
+  def nestedTypeMissingElementTypeError(
+      dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
+    dataType match {
+      case "array" =>
+        new ParseException(
+          errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+          messageParameters = Map("elementType" -> "<INT>"),
+          ctx)
+      case "struct" =>
+        new ParseException(
+          errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+          messageParameters = Map.empty,
+          ctx)
+      case "map" =>
+        new ParseException(
+          errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+          messageParameters = Map.empty,
+          ctx)
+    }
+  }
+
   def partitionTransformNotExpectedError(
       name: String, describe: String, ctx: ApplyTransformContext): Throwable = 
{
     new ParseException(
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 71483534d40..b30998b6aa0 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -546,4 +546,76 @@ class QueryParsingErrorsSuite extends QueryTest with 
SharedSparkSession {
         start = 0,
         stop = 124))
   }
+
+  test("INCOMPLETE_TYPE_DEFINITION: array type definition is incomplete") {
+    // Cast simple array without specifying element type
+    checkError(
+      exception = parseException("SELECT CAST(array(1,2,3) AS ARRAY)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "ARRAY", start = 28, stop = 32))
+    // Cast array of array without specifying element type for inner array
+    checkError(
+      exception = parseException("SELECT CAST(array(array(3)) AS 
ARRAY<ARRAY>)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "ARRAY", start = 37, stop = 41))
+    // Create column of array type without specifying element type
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 ARRAY)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: struct type definition is incomplete") {
+    // Cast simple struct without specifying field type
+    checkError(
+      exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 29, stop = 34))
+    // Cast array of struct without specifying field type in struct
+    checkError(
+      exception = parseException("SELECT CAST(array(struct(1,2)) AS 
ARRAY<STRUCT>)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 40, stop = 45))
+    // Create column of struct type without specifying field type
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 STRUCT)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 30, stop = 35))
+    // Invalid syntax `STRUCT<INT>` without field name
+    checkError(
+      exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT<INT>)"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'>'", "hint" -> ""))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
+    // Cast simple map without specifying element type
+    checkError(
+      exception = parseException("SELECT CAST(map(1,'2') AS MAP)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "MAP", start = 26, stop = 28))
+    // Create column of map type without specifying key/value types
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 MAP)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "MAP", start = 30, stop = 32))
+    // Invalid syntax `MAP<String>` with only key type
+    checkError(
+      exception = parseException("SELECT CAST(map('1',2) AS MAP<STRING>)"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'>'", "hint" -> ""))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to