[spark] branch branch-3.1 updated: [SPARK-33667][SQL] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW PARTITIONS`

dongjoon Sun, 06 Dec 2020 03:02:48 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 970fc64  [SPARK-33667][SQL] Respect the `spark.sql.caseSensitive` 
config while resolving partition spec in v1 `SHOW PARTITIONS`
970fc64 is described below

commit 970fc6406d190ecd4602e3e987339bc451aade7a
Author: Max Gekk <[email protected]>
AuthorDate: Sun Dec 6 02:56:08 2020 -0800

    [SPARK-33667][SQL] Respect the `spark.sql.caseSensitive` config while 
resolving partition spec in v1 `SHOW PARTITIONS`
    
    ### What changes were proposed in this pull request?
    Preprocess the partition spec passed to the V1 SHOW PARTITIONS 
implementation `ShowPartitionsCommand`, and normalize the passed spec according 
to the partition columns w.r.t the case sensitivity flag  
**spark.sql.caseSensitive**.
    
    ### Why are the changes needed?
    V1 SHOW PARTITIONS is case sensitive in fact, and doesn't respect the SQL 
config **spark.sql.caseSensitive** which is false by default, for instance:
    ```sql
    spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int)
             > USING parquet
             > PARTITIONED BY (year, month);
    spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1;
    spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1);
    Error in query: Non-partitioning column(s) [YEAR, Month] are specified for 
SHOW PARTITIONS;
    ```
    The `SHOW PARTITIONS` command must show the partition `year = 2015, month = 
1` specified by `YEAR = 2015, Month = 1`.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. After the changes, the command above works as expected:
    ```sql
    spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1);
    year=2015/month=1
    ```
    
    ### How was this patch tested?
    By running the affected test suites:
    - `v1/ShowPartitionsSuite`
    - `v2/ShowPartitionsSuite`
    
    Closes #30615 from MaxGekk/show-partitions-case-sensitivity-test.
    
    Authored-by: Max Gekk <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 48297818f37a8e02cc02ba6fa9ec04fe37540aca)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/execution/command/tables.scala       | 18 +++++++-------
 .../command/ShowPartitionsSuiteBase.scala          | 28 ++++++++++++++++++++--
 .../execution/command/v1/ShowPartitionsSuite.scala |  4 ----
 .../execution/command/v2/ShowPartitionsSuite.scala |  4 ----
 4 files changed, 34 insertions(+), 20 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 9e3ca3c..59adb7d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1006,20 +1006,18 @@ case class ShowPartitionsCommand(
     DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "SHOW 
PARTITIONS")
 
     /**
-     * Validate the partitioning spec by making sure all the referenced 
columns are
+     * Normalizes the partition spec w.r.t the partition columns and case 
sensitivity settings,
+     * and validates the spec by making sure all the referenced columns are
      * defined as partitioning columns in table definition. An 
AnalysisException exception is
      * thrown if the partitioning spec is invalid.
      */
-    if (spec.isDefined) {
-      val badColumns = 
spec.get.keySet.filterNot(table.partitionColumnNames.contains)
-      if (badColumns.nonEmpty) {
-        val badCols = badColumns.mkString("[", ", ", "]")
-        throw new AnalysisException(
-          s"Non-partitioning column(s) $badCols are specified for SHOW 
PARTITIONS")
-      }
-    }
+    val normalizedSpec = spec.map(partitionSpec => 
PartitioningUtils.normalizePartitionSpec(
+      partitionSpec,
+      table.partitionColumnNames,
+      table.identifier.quotedString,
+      sparkSession.sessionState.conf.resolver))
 
-    val partNames = catalog.listPartitionNames(tableName, spec)
+    val partNames = catalog.listPartitionNames(tableName, normalizedSpec)
     partNames.map(Row(_))
   }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 82457f9..b695dec 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -21,6 +21,7 @@ import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.{StringType, StructType}
 
@@ -28,7 +29,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with 
SQLTestUtils {
   protected def version: String
   protected def catalog: String
   protected def defaultUsing: String
-  protected def wrongPartitionColumnsError(columns: String*): String
   // Gets the schema of `SHOW PARTITIONS`
   private val showSchema: StructType = new StructType().add("partition", 
StringType, false)
   protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): 
Unit = {
@@ -94,7 +94,7 @@ trait ShowPartitionsSuiteBase extends QueryTest with 
SQLTestUtils {
         val errMsg = intercept[AnalysisException] {
           sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
         }.getMessage
-        assert(errMsg.contains(wrongPartitionColumnsError("abcd", "xyz")))
+        assert(errMsg.contains("abcd is not a valid partition column"))
       }
     }
   }
@@ -149,4 +149,28 @@ trait ShowPartitionsSuiteBase extends QueryTest with 
SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33667: case sensitivity of partition spec") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val t = s"$catalog.ns.part_table"
+      withTable(t) {
+        sql(s"""
+          |CREATE TABLE $t (price int, qty int, year int, month int)
+          |$defaultUsing
+          |PARTITIONED BY (year, month)""".stripMargin)
+        sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+        Seq(
+          true -> "PARTITION(year = 2015, month = 1)",
+          false -> "PARTITION(YEAR = 2015, Month = 1)"
+        ).foreach { case (caseSensitive, partitionSpec) =>
+          withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+            runShowPartitionsSql(
+              s"SHOW PARTITIONS $t $partitionSpec",
+              Row("year=2015/month=1") :: Nil)
+          }
+        }
+      }
+    }
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index 2b2bc9e..c752a5f 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -27,10 +27,6 @@ trait ShowPartitionsSuiteBase extends 
command.ShowPartitionsSuiteBase {
   override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   override def defaultUsing: String = "USING parquet"
 
-  override protected def wrongPartitionColumnsError(columns: String*): String 
= {
-    s"Non-partitioning column(s) ${columns.mkString("[", ", ", "]")} are 
specified"
-  }
-
   test("show everything in the default database") {
     val table = "dateTable"
     withTable(table) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index ca47a71..55985a3 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -32,10 +32,6 @@ class ShowPartitionsSuite extends 
command.ShowPartitionsSuiteBase with SharedSpa
     .set(s"spark.sql.catalog.$catalog", 
classOf[InMemoryPartitionTableCatalog].getName)
     .set(s"spark.sql.catalog.non_part_$catalog", 
classOf[InMemoryTableCatalog].getName)
 
-  override protected def wrongPartitionColumnsError(columns: String*): String 
= {
-    s"${columns.head} is not a valid partition column"
-  }
-
   test("a table does not support partitioning") {
     val table = s"non_part_$catalog.tab1"
     withTable(table) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.1 updated: [SPARK-33667][SQL] Respect the `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW PARTITIONS`

Reply via email to