(hudi) branch master updated: ShowPartitionsCommand should consider lazy delete_partitions (#10019)

vbalaji Wed, 08 Nov 2023 11:08:35 -0800

This is an automated email from the ASF dual-hosted git repository.

vbalaji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git



The following commit(s) were added to refs/heads/master by this push:
     new 43a39b907bc ShowPartitionsCommand should consider lazy 
delete_partitions (#10019)
43a39b907bc is described below

commit 43a39b907bc3e9c66c691f2aaf9c1ca7c8b1e0c6
Author: Rex(Hui) An <[email protected]>
AuthorDate: Thu Nov 9 03:07:50 2023 +0800

    ShowPartitionsCommand should consider lazy delete_partitions (#10019)
---
 .../command/ShowHoodieTablePartitionsCommand.scala | 13 ++++-----
 .../apache/spark/sql/hudi/TestShowPartitions.scala | 33 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/ShowHoodieTablePartitionsCommand.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/ShowHoodieTablePartitionsCommand.scala
index d896fecae0c..a2a35e35ec8 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/ShowHoodieTablePartitionsCommand.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/ShowHoodieTablePartitionsCommand.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.sql.hudi.command
 
+import org.apache.hudi.common.table.timeline.TimelineUtils
 import org.apache.hudi.common.util.PartitionPathEncodeUtils
-
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Attribute, 
AttributeReference}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.types.StringType
 
@@ -47,17 +46,17 @@ case class ShowHoodieTablePartitionsCommand(
     val partitionColumnNamesOpt = 
hoodieCatalogTable.tableConfig.getPartitionFields
 
     if (partitionColumnNamesOpt.isPresent && 
partitionColumnNamesOpt.get.nonEmpty && schemaOpt.nonEmpty) {
-      if (specOpt.isEmpty) {
-        hoodieCatalogTable.getPartitionPaths.map(Row(_))
-      } else {
-        val spec = specOpt.get
+      specOpt.map { spec =>
         hoodieCatalogTable.getPartitionPaths.filter { partitionPath =>
           val part = PartitioningUtils.parsePathFragment(partitionPath)
           spec.forall { case (col, value) =>
             PartitionPathEncodeUtils.escapePartitionValue(value) == 
part.getOrElse(col, null)
           }
-        }.map(Row(_))
+        }
       }
+        .getOrElse(hoodieCatalogTable.getPartitionPaths)
+        
.filter(!TimelineUtils.getDroppedPartitions(hoodieCatalogTable.metaClient.getActiveTimeline).contains(_))
+        .map(Row(_))
     } else {
       Seq.empty[Row]
     }
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
index 59ee6428610..d3f42a4d6ac 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestShowPartitions.scala
@@ -173,4 +173,37 @@ class TestShowPartitions extends HoodieSparkSqlTestBase {
       Seq("year=%s/month=%s/day=01".format(DEFAULT_PARTITION_PATH, 
DEFAULT_PARTITION_PATH))
     )
   }
+
+  test("Test alter table show partitions which are dropped before") {
+    Seq("true", "false").foreach { enableMetadata =>
+      withSQLConf("hoodie.metadata.enable" -> enableMetadata) {
+        withTable(generateTableName) { tableName =>
+          spark.sql(
+            s"""
+               | create table $tableName (
+               |   id int,
+               |   name string,
+               |   price double,
+               |   ts long,
+               |   year string,
+               |   month string,
+               |   day string
+               | ) using hudi
+               | partitioned by (year, month, day)
+               | tblproperties (
+               |   primaryKey = 'id',
+               |   preCombineField = 'ts'
+               | )
+             """.stripMargin)
+          spark.sql(s"alter table $tableName add partition(year='2023', 
month='06', day='06')")
+          checkAnswer(s"show partitions $tableName")(
+            Seq("year=2023/month=06/day=06")
+          )
+          // Lazily drop that partition
+          spark.sql(s"alter table $tableName drop partition(year='2023', 
month='06', day='06')")
+          checkAnswer(s"show partitions $tableName")(Seq.empty: _*)
+        }
+      }
+    }
+  }
 }

(hudi) branch master updated: ShowPartitionsCommand should consider lazy delete_partitions (#10019)

Reply via email to