[GitHub] [spark] rdblue commented on a change in pull request #25305: [SPARK-28572][SQL] Simple analyzer checks for CREATE TABLE v2

GitBox Wed, 31 Jul 2019 15:59:15 -0700

rdblue commented on a change in pull request #25305: [SPARK-28572][SQL] Simple 
analyzer checks for CREATE TABLE v2
URL: https://github.com/apache/spark/pull/25305#discussion_r309465641


 ##########
 File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
 ##########
 @@ -160,6 +163,68 @@ class V2SessionCatalogSuite
     assert(catalog.tableExists(testIdent))
   }
 
+  test("createTable: duplicate column names in the table definition") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case 
(caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING $v2Source")
+        }.getMessage
+        assert(errMsg.contains("Found duplicate column(s) in the table 
definition of t"))
+      }
+    }
+  }
+
+  test("createTable: partition column names not in table definition") {
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE TABLE tbl(a int, b string) USING $v2Source PARTITIONED BY 
(c)")
+    }.getMessage
+    assert(e.contains("Couldn't find column c in"))
+  }
+
+  test("createTable: bucket column names not in table definition") {
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE TABLE tbl(a int, b string) " +
+        s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS")
+    }.getMessage
+    assert(e.contains("Couldn't find column c in"))
+  }
+
+  test("createTable: column repeated in partition columns") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case 
(caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val e = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT) USING $v2Source PARTITIONED BY ($c0, 
$c1)")
+        }.getMessage
+        assert(e.contains("Found duplicate column(s) in the partition schema"))
+      }
+    }
+  }
+
+  test("createTable: column repeated in bucket columns") {
+    Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case 
(caseSensitive, (c0, c1)) =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val e = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t($c0 INT) USING $v2Source " +
+            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
+        }.getMessage
+        assert(e.contains("Found duplicate column(s) in the bucket 
definition"))
+      }
+    }
+  }
+
+  test("createTable: all columns used in partitioning") {
+    Seq(
+      "PARTITIONED BY (a, b)",
+      "CLUSTERED BY (a, b) INTO 2 BUCKETS",
+      "PARTITIONED BY (a) CLUSTERED BY (b) INTO 2 BUCKETS").foreach { 
partitioning =>
+
+      val e = intercept[AnalysisException] {
+        sql(s"CREATE TABLE t(a INT, b STRING) USING $v2Source $partitioning")
+      }.getMessage
+      assert(e.contains("Cannot use all columns for partitioning."))
 
 Review comment:
   Why not? I think this is perfectly reasonable if the data uses partition 
transforms.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] rdblue commented on a change in pull request #25305: [SPARK-28572][SQL] Simple analyzer checks for CREATE TABLE v2

Reply via email to