This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 64a05bc0b87 [HUDI-6732] Allow wildcards from Spark-SQL entrypoints for
drop partition DDL (#9491)
64a05bc0b87 is described below
commit 64a05bc0b874fd2f3ce01c669840bb619550f033
Author: voonhous <[email protected]>
AuthorDate: Fri Sep 1 13:54:27 2023 +0800
[HUDI-6732] Allow wildcards from Spark-SQL entrypoints for drop partition
DDL (#9491)
---
.../org/apache/hudi/HoodieSparkSqlWriter.scala | 6 ++--
.../sql/hudi/TestAlterTableDropPartition.scala | 36 ++++++++++++++++++++++
2 files changed, 40 insertions(+), 2 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index cf78e514dda..6d0ce7d16bf 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -606,7 +606,8 @@ object HoodieSparkSqlWriter {
*/
private def resolvePartitionWildcards(partitions: List[String], jsc:
JavaSparkContext, cfg: HoodieConfig, basePath: String): List[String] = {
//find out if any of the input partitions have wildcards
- var (wildcardPartitions, fullPartitions) = partitions.partition(partition
=> partition.contains("*"))
+ //note:spark-sql may url-encode special characters (* -> %2A)
+ var (wildcardPartitions, fullPartitions) = partitions.partition(partition
=> partition.matches(".*(\\*|%2A).*"))
if (wildcardPartitions.nonEmpty) {
//get list of all partitions
@@ -621,7 +622,8 @@ object HoodieSparkSqlWriter {
//prevent that from happening. Any text inbetween \\Q and \\E is
considered literal
//So we start the string with \\Q and end with \\E and then whenever
we find a * we add \\E before
//and \\Q after so all other characters besides .* will be enclosed
between a set of \\Q \\E
- val regexPartition = "^\\Q" + partition.replace("*", "\\E.*\\Q") +
"\\E$"
+ val wildcardToken: String = if (partition.contains("*")) "*" else "%2A"
+ val regexPartition = "^\\Q" + partition.replace(wildcardToken,
"\\E.*\\Q") + "\\E$"
//filter all partitions with the regex and append the result to the
list of full partitions
fullPartitions =
List.concat(fullPartitions,allPartitions.filter(_.matches(regexPartition)))
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
index 2261e83f7f9..b421732d270 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTableDropPartition.scala
@@ -620,4 +620,40 @@ class TestAlterTableDropPartition extends
HoodieSparkSqlTestBase {
checkExceptionContain(s"ALTER TABLE $tableName DROP
PARTITION($partition)")(errMsg)
}
}
+
+ test("Test drop partition with wildcards") {
+ withRecordType()(withTempDir { tmp =>
+ Seq("cow", "mor").foreach { tableType =>
+ val tableName = generateTableName
+ spark.sql(
+ s"""
+ |create table $tableName (
+ | id int,
+ | name string,
+ | price double,
+ | ts long,
+ | partition_date_col string
+ |) using hudi
+ | location '${tmp.getCanonicalPath}/$tableName'
+ | tblproperties (
+ | primaryKey ='id',
+ | type = '$tableType',
+ | preCombineField = 'ts'
+ | ) partitioned by (partition_date_col)
+ """.stripMargin)
+ spark.sql(s"insert into $tableName values " +
+ s"(1, 'a1', 10, 1000, '2023-08-01'), (2, 'a2', 10, 1000,
'2023-08-02'), (3, 'a3', 10, 1000, '2023-09-01')")
+ checkAnswer(s"show partitions $tableName")(
+ Seq("partition_date_col=2023-08-01"),
+ Seq("partition_date_col=2023-08-02"),
+ Seq("partition_date_col=2023-09-01")
+ )
+ spark.sql(s"alter table $tableName drop
partition(partition_date_col='2023-08-*')")
+ // show partitions will still return all partitions for tests, use
select distinct as a stop-gap
+ checkAnswer(s"select distinct partition_date_col from $tableName")(
+ Seq("2023-09-01")
+ )
+ }
+ })
+ }
}