allisonport-db commented on code in PR #38823:
URL: https://github.com/apache/spark/pull/38823#discussion_r1116482692


##########
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala:
##########
@@ -1422,6 +1422,97 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-41290: Generated columns only allowed with TableCatalogs that " +
+    "SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(eventDate DATE, eventYear INT GENERATED ALWAYS AS 
(year(eventDate)))"
+    for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+      // InMemoryTableCatalog.capabilities() = 
{SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS}
+      withTable(s"testcat.$tblName") {
+        if (statement == "REPLACE TABLE") {
+          spark.sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+        }
+        // Can create table with a generated column
+        spark.sql(s"$statement testcat.$tableDefinition USING foo")
+      }
+      // BasicInMemoryTableCatalog.capabilities() = {}
+      withSQLConf("spark.sql.catalog.dummy" -> 
classOf[BasicInMemoryTableCatalog].getName) {
+        val e = intercept[AnalysisException] {
+          sql("USE dummy")
+          spark.sql(s"$statement dummy.$tableDefinition USING foo")
+        }
+        assert(e.getMessage.contains(
+          "does not support creating generated columns with GENERATED ALWAYS 
AS expressions"))
+        assert(e.getErrorClass == "UNSUPPORTED_FEATURE.TABLE_OPERATION")
+      }
+    }
+  }
+
+  test("SPARK-41290: Column cannot have both a generation expression and a 
default value") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(eventDate DATE, eventYear INT GENERATED ALWAYS AS 
(year(eventDate)) DEFAULT 0)"
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> "foo") {
+      for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+        withTable(s"testcat.$tblName") {
+          if (statement == "REPLACE TABLE") {
+            spark.sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+          }
+          checkError(
+            exception = intercept[AnalysisException] {
+              spark.sql(s"$statement testcat.$tableDefinition USING foo")
+            },
+            errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
+            parameters = Map(
+              "colName" -> "eventYear",
+              "defaultValue" -> "0",
+              "genExpr" -> "year(eventDate)")
+          )
+        }
+      }
+    }
+  }
+
+  test("SPARK-41290: Generated column expression must be valid generation 
expression") {
+    // InMemoryTableCatalog.capabilities() = 
{SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS}
+    val tblName = "my_tab"
+    withTable(s"testcat.$tblName") {
+      // Expression cannot be resolved since it doesn't exist
+      var e = intercept[AnalysisException] {
+        spark.sql(s"CREATE TABLE testcat.$tblName(a INT, " +
+          s"b DATE GENERATED ALWAYS AS (not_a_function(a))) USING foo")
+      }
+      assert(e.getErrorClass == "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN")
+      assert(e.getMessage.contains("failed to resolve `not_a_function` to a 
built-in function"))
+
+      // Expression cannot be resolved since it's not a built-in function
+      spark.udf.register("timesTwo", (x: Int) => x * 2)
+      e = intercept[AnalysisException] {
+        spark.sql(s"CREATE TABLE testcat.$tblName(a INT, " +
+          s"b INT GENERATED ALWAYS AS (timesTwo(a))) USING foo")
+      }
+      assert(e.getErrorClass == "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN")
+      assert(e.getMessage.contains("failed to resolve `timesTwo` to a built-in 
function"))
+
+      // Generated column can't reference itself
+      e = intercept[AnalysisException] {
+        spark.sql(s"CREATE TABLE testcat.$tblName(a INT, " +
+          s"b INT GENERATED ALWAYS AS (b + 1)) USING foo")
+      }
+      assert(e.getErrorClass == "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN")
+      assert(e.getMessage.contains("generation expression cannot reference 
itself"))
+
+      // Generated column can't reference non-existent column

Review Comment:
   Added all except the last. See 
https://github.com/apache/spark/pull/38823#discussion_r1116470195



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to