This is an automated email from the ASF dual-hosted git repository.
codope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 657b837aaa6 [HUDI-6048] Check if partition exists before list
partition by path prefix (#8402)
657b837aaa6 is described below
commit 657b837aaa6fa825945625579c52ff7365b1ecfd
Author: Zouxxyy <[email protected]>
AuthorDate: Fri Apr 14 09:48:48 2023 +0800
[HUDI-6048] Check if partition exists before list partition by path prefix
(#8402)
---
.../src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala | 4 +++-
.../src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala | 7 ++++++-
2 files changed, 9 insertions(+), 2 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
index a9a20057795..6459c967c56 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -300,7 +300,9 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
// prefix to try to reduce the scope of the required file-listing
val relativePartitionPathPrefix =
composeRelativePartitionPath(staticPartitionColumnNameValuePairs)
- if (staticPartitionColumnNameValuePairs.length ==
partitionColumnNames.length) {
+ if (!metaClient.getFs.exists(new Path(getBasePath,
relativePartitionPathPrefix))) {
+ Seq()
+ } else if (staticPartitionColumnNameValuePairs.length ==
partitionColumnNames.length) {
// In case composed partition path is complete, we can return it
directly avoiding extra listing operation
Seq(new PartitionPath(relativePartitionPathPrefix,
staticPartitionColumnNameValuePairs.map(_._2._2.asInstanceOf[AnyRef]).toArray))
} else {
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
index e69819fb6f4..ed73940186d 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
@@ -519,7 +519,12 @@ class TestHoodieFileIndex extends
HoodieSparkClientTestBase with ScalaAssertionS
EqualTo(attribute("region_code"), literal("1"))),
"dt = '2023/01/01' and region_code = '1'",
enablePartitionPathPrefixAnalysis,
- Seq(("1", "2023/01/01")))
+ Seq(("1", "2023/01/01"))),
+ // no partition matched
+ (Seq(EqualTo(attribute("region_code"), literal("0"))),
+ "region_code = '0'",
+ enablePartitionPathPrefixAnalysis,
+ Seq())
)
testCases.foreach(testCase => {