This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new f2e276a38e5 [MINOR] Query index warning check (#11276)
f2e276a38e5 is described below
commit f2e276a38e5cf38d1c740a38e2c62fb4b2af1281
Author: KnightChess <[email protected]>
AuthorDate: Fri May 24 08:55:26 2024 +0800
[MINOR] Query index warning check (#11276)
---
.../src/main/scala/org/apache/hudi/BucketIndexSupport.scala | 6 +++++-
.../main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala | 4 ++--
.../src/main/scala/org/apache/hudi/HoodieFileIndex.scala | 9 ++++++++-
.../scala/org/apache/hudi/PartitionStatsIndexSupport.scala | 10 ++++++++--
4 files changed, 23 insertions(+), 6 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BucketIndexSupport.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BucketIndexSupport.scala
index e1b555efb58..def32f9ee94 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BucketIndexSupport.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BucketIndexSupport.scala
@@ -50,7 +50,7 @@ class BucketIndexSupport(spark: SparkSession,
private lazy val avroSchema = new
TableSchemaResolver(metaClient).getTableAvroSchema(false)
- override def getIndexName: String = "BUCKET"
+ override def getIndexName: String = BucketIndexSupport.INDEX_NAME
/**
* Return true if table can use bucket index
@@ -217,3 +217,7 @@ class BucketIndexSupport(spark: SparkSession,
}
}
+object BucketIndexSupport {
+ val INDEX_NAME = "BUCKET"
+}
+
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
index 6def93e65ea..9b423717404 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
@@ -116,8 +116,8 @@ class ColumnStatsIndexSupport(spark: SparkSession,
* w/in the Metadata Table
*/
def isIndexAvailable: Boolean = {
- checkState(metadataConfig.isEnabled, "Metadata Table support has to be
enabled")
-
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS)
+ metadataConfig.isEnabled &&
+
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS)
}
/**
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index c59eb68182a..416a7a95832 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -401,13 +401,20 @@ case class HoodieFileIndex(spark: SparkSession,
private def isFunctionalIndexEnabled: Boolean = indicesSupport.exists(idx =>
idx.getIndexName == FunctionalIndexSupport.INDEX_NAME &&
idx.isIndexAvailable)
+ private def isBucketIndexEnabled: Boolean = indicesSupport.exists(idx =>
+ idx.getIndexName == BucketIndexSupport.INDEX_NAME && idx.isIndexAvailable)
+
+ private def isPartitionStatsIndexEnabled: Boolean =
indicesSupport.exists(idx =>
+ idx.getIndexName == PartitionStatsIndexSupport.INDEX_NAME &&
idx.isIndexAvailable)
+
private def isIndexEnabled: Boolean = indicesSupport.exists(idx =>
idx.isIndexAvailable)
private def validateConfig(): Unit = {
if (isDataSkippingEnabled && (!isMetadataTableEnabled || !isIndexEnabled))
{
logWarning("Data skipping requires both Metadata Table and at least one
of Column Stats Index, Record Level Index, or Functional Index" +
" to be enabled as well! " + s"(isMetadataTableEnabled =
$isMetadataTableEnabled, isColumnStatsIndexEnabled = $isColumnStatsIndexEnabled"
- + s", isRecordIndexApplicable = $isRecordIndexEnabled,
isFunctionalIndexEnabled = $isFunctionalIndexEnabled)")
+ + s", isRecordIndexApplicable = $isRecordIndexEnabled,
isFunctionalIndexEnabled = $isFunctionalIndexEnabled, " +
+ s"isBucketIndexEnable = $isBucketIndexEnabled,
isPartitionStatsIndexEnabled = $isPartitionStatsIndexEnabled)")
}
}
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/PartitionStatsIndexSupport.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/PartitionStatsIndexSupport.scala
index 151fecfdc65..30e9b936aad 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/PartitionStatsIndexSupport.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/PartitionStatsIndexSupport.scala
@@ -41,9 +41,11 @@ class PartitionStatsIndexSupport(spark: SparkSession,
allowCaching: Boolean = false)
extends ColumnStatsIndexSupport(spark, tableSchema, metadataConfig,
metaClient, allowCaching) {
+ override def getIndexName: String = PartitionStatsIndexSupport.INDEX_NAME
+
override def isIndexAvailable: Boolean = {
- checkState(metadataConfig.isEnabled, "Metadata Table support has to be
enabled")
-
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_PARTITION_STATS)
+ metadataConfig.isEnabled &&
+
metaClient.getTableConfig.getMetadataPartitions.contains(HoodieTableMetadataUtil.PARTITION_NAME_PARTITION_STATS)
}
override def loadColumnStatsIndexRecords(targetColumns: Seq[String],
shouldReadInMemory: Boolean): HoodieData[HoodieMetadataColumnStats] = {
@@ -63,3 +65,7 @@ class PartitionStatsIndexSupport(spark: SparkSession,
columnStatsRecords
}
}
+
+object PartitionStatsIndexSupport {
+ val INDEX_NAME = "PARTITION_STATS"
+}