rdblue commented on a change in pull request #25305: [SPARK-28572][SQL] Simple
analyzer checks for CREATE TABLE v2
URL: https://github.com/apache/spark/pull/25305#discussion_r309465641
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
##########
@@ -160,6 +163,68 @@ class V2SessionCatalogSuite
assert(catalog.tableExists(testIdent))
}
+ test("createTable: duplicate column names in the table definition") {
+ Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case
(caseSensitive, (c0, c1)) =>
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+ val errMsg = intercept[AnalysisException] {
+ sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING $v2Source")
+ }.getMessage
+ assert(errMsg.contains("Found duplicate column(s) in the table
definition of t"))
+ }
+ }
+ }
+
+ test("createTable: partition column names not in table definition") {
+ val e = intercept[AnalysisException] {
+ sql(s"CREATE TABLE tbl(a int, b string) USING $v2Source PARTITIONED BY
(c)")
+ }.getMessage
+ assert(e.contains("Couldn't find column c in"))
+ }
+
+ test("createTable: bucket column names not in table definition") {
+ val e = intercept[AnalysisException] {
+ sql(s"CREATE TABLE tbl(a int, b string) " +
+ s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS")
+ }.getMessage
+ assert(e.contains("Couldn't find column c in"))
+ }
+
+ test("createTable: column repeated in partition columns") {
+ Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case
(caseSensitive, (c0, c1)) =>
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+ val e = intercept[AnalysisException] {
+ sql(s"CREATE TABLE t($c0 INT) USING $v2Source PARTITIONED BY ($c0,
$c1)")
+ }.getMessage
+ assert(e.contains("Found duplicate column(s) in the partition schema"))
+ }
+ }
+ }
+
+ test("createTable: column repeated in bucket columns") {
+ Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case
(caseSensitive, (c0, c1)) =>
+ withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+ val e = intercept[AnalysisException] {
+ sql(s"CREATE TABLE t($c0 INT) USING $v2Source " +
+ s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
+ }.getMessage
+ assert(e.contains("Found duplicate column(s) in the bucket
definition"))
+ }
+ }
+ }
+
+ test("createTable: all columns used in partitioning") {
+ Seq(
+ "PARTITIONED BY (a, b)",
+ "CLUSTERED BY (a, b) INTO 2 BUCKETS",
+ "PARTITIONED BY (a) CLUSTERED BY (b) INTO 2 BUCKETS").foreach {
partitioning =>
+
+ val e = intercept[AnalysisException] {
+ sql(s"CREATE TABLE t(a INT, b STRING) USING $v2Source $partitioning")
+ }.getMessage
+ assert(e.contains("Cannot use all columns for partitioning."))
Review comment:
Why not? I think this is perfectly reasonable if the data uses partition
transforms.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]