This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 93d1d565076 [HUDI-8855] Add bucket properties for spark bucket index
query pruning (#12614)
93d1d565076 is described below
commit 93d1d5650766fd61640d626cb235399162829e94
Author: Manu <[email protected]>
AuthorDate: Thu Jan 16 14:46:48 2025 +0800
[HUDI-8855] Add bucket properties for spark bucket index query pruning
(#12614)
---
.../java/org/apache/hudi/metadata/BaseTableMetadata.java | 2 +-
.../src/main/scala/org/apache/hudi/HoodieFileIndex.scala | 12 +++++++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git
a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
index f285ca198e0..0ffbce2ff4f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
@@ -96,7 +96,7 @@ public abstract class BaseTableMetadata extends
AbstractHoodieTableMetadata {
if (metadataConfig.isMetricsEnabled()) {
this.metrics = Option.of(new
HoodieMetadataMetrics(HoodieMetricsConfig.newBuilder()
- .fromProperties(metadataConfig.getProps()).build(),
dataMetaClient.getStorage()));
+
.fromProperties(metadataConfig.getProps()).withPath(dataBasePath).build(),
dataMetaClient.getStorage()));
} else {
this.metrics = Option.empty();
}
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index 74d80390cfa..930bd7519b1 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -34,7 +34,8 @@ import org.apache.hudi.util.JFunction
import org.apache.hadoop.fs.{FileStatus, Path}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
import org.apache.spark.sql.execution.datasources.{FileIndex, FileStatusCache,
NoopCache, PartitionDirectory}
import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
@@ -534,6 +535,15 @@ object HoodieFileIndex extends Logging {
properties.setProperty(RECORDKEY_FIELD.key,
tableConfig.getRecordKeyFields.orElse(Array.empty).mkString(","))
properties.setProperty(PRECOMBINE_FIELD.key,
Option(tableConfig.getPreCombineField).getOrElse(""))
properties.setProperty(PARTITIONPATH_FIELD.key,
HoodieTableConfig.getPartitionFieldPropForKeyGenerator(tableConfig).orElse(""))
+
+ // for simple bucket index, we need to set the INDEX_TYPE,
BUCKET_INDEX_HASH_FIELD, BUCKET_INDEX_NUM_BUCKETS
+ val dataBase = Some(tableConfig.getDatabaseName)
+ val tableName = tableConfig.getTableName
+ if (spark.catalog.tableExists(dataBase.getOrElse("default"), tableName))
{
+ val tableIdentifier = TableIdentifier(tableName, dataBase)
+ val table = HoodieCatalogTable(spark, tableIdentifier)
+ table.catalogProperties.foreach(kv => properties.setProperty(kv._1,
kv._2))
+ }
}
properties