This is an automated email from the ASF dual-hosted git repository. yihua pushed a commit to branch release-1.1.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit dfd26b46ffb6645f4ca42575a1cdb2d559138ad4 Author: Lokesh Jain <[email protected]> AuthorDate: Tue Oct 28 07:50:05 2025 +0530 fix: Partition stats should be controlled using column stats config (#14165) --------- Co-authored-by: Lokesh Jain <[email protected]> Co-authored-by: sivabalan <[email protected]> --- .../org/apache/hudi/config/HoodieWriteConfig.java | 5 +--- .../metadata/HoodieBackedTableMetadataWriter.java | 3 +-- .../table/functional/TestCleanPlanExecutor.java | 3 +-- .../hudi/common/config/HoodieMetadataConfig.java | 30 ++++++---------------- .../org/apache/hudi/sink/TestWriteCopyOnWrite.java | 2 -- .../hudi/sink/TestWriteMergeOnReadWithCompact.java | 3 --- .../java/org/apache/hudi/source/TestFileIndex.java | 1 - .../hudi/source/TestIncrementalInputSplits.java | 1 - .../apache/hudi/table/ITTestHoodieDataSource.java | 2 -- .../apache/hudi/table/ITTestSchemaEvolution.java | 3 +-- .../apache/hudi/table/TestHoodieTableSource.java | 1 - .../hudi/metadata/TestHoodieTableMetadataUtil.java | 2 -- .../TestDataSkippingWithMORColstats.java | 3 --- .../hudi/functional/TestHoodieBackedMetadata.java | 2 -- .../apache/hudi/io/TestMetadataWriterCommit.java | 6 ++--- .../org/apache/hudi/TestHoodieSparkSqlWriter.scala | 2 +- .../functional/PartitionStatsIndexTestBase.scala | 3 +-- .../hudi/functional/TestBasicSchemaEvolution.scala | 3 +-- .../hudi/functional/TestPartitionStatsIndex.scala | 22 ---------------- .../TestPartitionStatsIndexWithSql.scala | 2 +- .../functional/TestPartitionStatsPruning.scala | 19 ++++---------- .../hudi/feature/index/TestExpressionIndex.scala | 4 +-- .../TestHoodieMetadataTableValidator.java | 2 -- 23 files changed, 26 insertions(+), 98 deletions(-) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java index ca36a5eb4b2e..c1ba48226bcb 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java @@ -2134,10 +2134,7 @@ public class HoodieWriteConfig extends HoodieConfig { * @return {@code true} if the partition stats index is enabled, {@code false} otherwise. */ public boolean isPartitionStatsIndexEnabled() { - if (isMetadataColumnStatsIndexEnabled()) { - return isMetadataTableEnabled() && getMetadataConfig().isPartitionStatsIndexEnabled(); - } - return false; + return isMetadataColumnStatsIndexEnabled(); } /** diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java index 51d5a9016081..20f9d1f6601a 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java @@ -440,8 +440,7 @@ public abstract class HoodieBackedTableMetadataWriter<I, O> implements HoodieTab while (iterator.hasNext()) { MetadataPartitionType partitionType = iterator.next(); if (partitionType == PARTITION_STATS && !dataMetaClient.getTableConfig().isTablePartitioned()) { - LOG.debug("Partition stats index cannot be enabled for a non-partitioned table. Removing from initialization list. Please disable {}", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key()); + // Partition stats index cannot be enabled for a non-partitioned table iterator.remove(); this.enabledPartitionTypes.remove(partitionType); } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java index 169c95e5911b..e572fef12260 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java @@ -341,8 +341,7 @@ public class TestCleanPlanExecutor extends HoodieCleanerTestBase { public void testKeepLatestFileVersionsWithBootstrapFileClean() throws Exception { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) - .withMetadataConfig(HoodieMetadataConfig.newBuilder().withMetadataIndexColumnStats(false) - .withMetadataIndexPartitionStats(false).build()) + .withMetadataConfig(HoodieMetadataConfig.newBuilder().withMetadataIndexColumnStats(false).build()) .withCleanConfig(HoodieCleanConfig.newBuilder() .withCleanBootstrapBaseFileEnabled(true) .withCleanerParallelism(1) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java index 96c95708ab75..ace77303d52c 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java @@ -420,21 +420,6 @@ public final class HoodieMetadataConfig extends HoodieConfig { .sinceVersion("1.0.1") .withDocumentation("Options for the expression index, e.g. \"expr='from_unixtime', format='yyyy-MM-dd'\""); - public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_PARTITION_STATS = ConfigProperty - .key(METADATA_PREFIX + ".index.partition.stats.enable") - // The defaultValue(false) here is the initial default, but it's overridden later based on - // column stats setting. - .defaultValue(false) - .sinceVersion("1.0.0") - .withDocumentation("Enable aggregating stats for each column at the storage partition level. " - + "Enabling this can improve query performance by leveraging partition and column stats " - + "for (partition) filtering. " - + "Important: The default value for this configuration is dynamically set based on the " - + "effective value of " + ENABLE_METADATA_INDEX_COLUMN_STATS.key() + ". If column stats " - + "index is enabled (default for Spark engine), partition stats indexing will also be " - + "enabled by default. Conversely, if column stats indexing is disabled (default for " - + "Flink and Java engines), partition stats indexing will also be disabled by default."); - public static final ConfigProperty<Integer> METADATA_INDEX_PARTITION_STATS_FILE_GROUP_COUNT = ConfigProperty .key(METADATA_PREFIX + ".index.partition.stats.file.group.count") .defaultValue(1) @@ -811,7 +796,7 @@ public final class HoodieMetadataConfig extends HoodieConfig { } public boolean isPartitionStatsIndexEnabled() { - return getBooleanOrDefault(ENABLE_METADATA_INDEX_PARTITION_STATS); + return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS); } public int getPartitionStatsIndexFileGroupCount() { @@ -1114,11 +1099,6 @@ public final class HoodieMetadataConfig extends HoodieConfig { return this; } - public Builder withMetadataIndexPartitionStats(boolean enable) { - metadataConfig.setValue(ENABLE_METADATA_INDEX_PARTITION_STATS, String.valueOf(enable)); - return this; - } - public Builder withMetadataIndexPartitionStatsFileGroupCount(int fileGroupCount) { metadataConfig.setValue(METADATA_INDEX_PARTITION_STATS_FILE_GROUP_COUNT, String.valueOf(fileGroupCount)); return this; @@ -1187,7 +1167,6 @@ public final class HoodieMetadataConfig extends HoodieConfig { public HoodieMetadataConfig build() { metadataConfig.setDefaultValue(ENABLE, getDefaultMetadataEnable(engineType)); metadataConfig.setDefaultValue(ENABLE_METADATA_INDEX_COLUMN_STATS, getDefaultColStatsEnable(engineType)); - metadataConfig.setDefaultValue(ENABLE_METADATA_INDEX_PARTITION_STATS, metadataConfig.isColumnStatsIndexEnabled()); metadataConfig.setDefaultValue(SECONDARY_INDEX_ENABLE_PROP, getDefaultSecondaryIndexEnable(engineType)); metadataConfig.setDefaultValue(STREAMING_WRITE_ENABLED, getDefaultForStreamingWriteEnabled(engineType)); // fix me: disable when schema on read is enabled. @@ -1244,6 +1223,13 @@ public final class HoodieMetadataConfig extends HoodieConfig { } } + /** + * The config is now deprecated. Partition stats are configured using the column stats config itself. + */ + @Deprecated + public static final String ENABLE_METADATA_INDEX_PARTITION_STATS = + METADATA_PREFIX + ".index.partition.stats.enable"; + /** * @deprecated Use {@link #ENABLE} and its methods. */ diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java index d6b7108e3217..c8e4dd4c6c06 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java @@ -19,7 +19,6 @@ package org.apache.hudi.sink; import org.apache.hudi.client.HoodieFlinkWriteClient; -import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy; import org.apache.hudi.common.model.HoodieKey; import org.apache.hudi.common.model.WriteConcurrencyMode; @@ -747,7 +746,6 @@ public class TestWriteCopyOnWrite extends TestWriteBase { public void testReuseEmbeddedServer() throws IOException { conf.setString("hoodie.filesystem.view.remote.timeout.secs", "500"); conf.setString("hoodie.metadata.enable","true"); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); // HUDI-8814 HoodieFlinkWriteClient writeClient = null; HoodieFlinkWriteClient writeClient2 = null; diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java index ee92a46fe681..7515f8904327 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteMergeOnReadWithCompact.java @@ -19,7 +19,6 @@ package org.apache.hudi.sink; import org.apache.hudi.client.HoodieFlinkWriteClient; -import org.apache.hudi.common.config.HoodieMetadataConfig; import org.apache.hudi.common.model.HoodieTableType; import org.apache.hudi.common.model.PartialUpdateAvroPayload; import org.apache.hudi.common.model.WriteConcurrencyMode; @@ -97,7 +96,6 @@ public class TestWriteMergeOnReadWithCompact extends TestWriteCopyOnWrite { // disable schedule compaction in writers conf.set(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, false); conf.set(FlinkOptions.PRE_COMBINE, true); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); // HUDI-8814 // start pipeline1 and insert record: [id1,Danny,null,1,par1], suspend the tx commit List<RowData> dataset1 = Collections.singletonList( @@ -292,7 +290,6 @@ public class TestWriteMergeOnReadWithCompact extends TestWriteCopyOnWrite { // disable schedule compaction in writers conf.set(FlinkOptions.COMPACTION_SCHEDULE_ENABLED, false); conf.set(FlinkOptions.PRE_COMBINE, true); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); Configuration conf1 = conf.clone(); conf1.set(FlinkOptions.OPERATION, "BULK_INSERT"); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java index ef5d809996bf..a50f4a2a5243 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestFileIndex.java @@ -173,7 +173,6 @@ public class TestFileIndex { conf.set(READ_DATA_SKIPPING_ENABLED, true); conf.set(METADATA_ENABLED, true); conf.set(TABLE_TYPE, tableType.name()); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true"); conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true"); if (tableType == HoodieTableType.MERGE_ON_READ) { // enable CSI for MOR table to collect col stats for delta write stats, diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java index 3595287b6c7d..fb9275c2fc35 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/source/TestIncrementalInputSplits.java @@ -360,7 +360,6 @@ public class TestIncrementalInputSplits extends HoodieCommonTestHarness { conf.set(FlinkOptions.READ_AS_STREAMING, true); conf.set(FlinkOptions.READ_DATA_SKIPPING_ENABLED, true); conf.set(FlinkOptions.TABLE_TYPE, tableType.name()); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true"); conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true"); if (tableType == HoodieTableType.MERGE_ON_READ) { // enable CSI for MOR table to collect col stats for delta write stats, diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java index b08e772c198a..c89148173b10 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestHoodieDataSource.java @@ -530,7 +530,6 @@ public class ITTestHoodieDataSource { .option(FlinkOptions.PATH, tempFile.getAbsolutePath()) .option(FlinkOptions.METADATA_ENABLED, true) .option(FlinkOptions.READ_AS_STREAMING, true) - .option(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), true) .option(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), false) .option(FlinkOptions.READ_DATA_SKIPPING_ENABLED, true) .option(FlinkOptions.TABLE_TYPE, tableType) @@ -538,7 +537,6 @@ public class ITTestHoodieDataSource { .end(); streamTableEnv.executeSql(hoodieTableDDL); Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath()); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true"); conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true"); conf.set(FlinkOptions.READ_DATA_SKIPPING_ENABLED, true); conf.set(FlinkOptions.TABLE_TYPE, tableType.name()); diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java index 895305db0f32..601625242991 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/ITTestSchemaEvolution.java @@ -383,8 +383,7 @@ public class ITTestSchemaEvolution { FlinkOptions.COMPACTION_SCHEDULE_ENABLED.key(), false, HoodieWriteConfig.EMBEDDED_TIMELINE_SERVER_REUSE_ENABLED.key(), false, HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key(), true, - HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true"); } private void checkAnswerEvolved(String... expectedResult) throws Exception { diff --git a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java index f12fc0b832f7..6d35ac1f1adf 100644 --- a/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java +++ b/hudi-flink-datasource/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableSource.java @@ -178,7 +178,6 @@ public class TestHoodieTableSource { void testDataSkippingWithPartitionStatsPruning(List<ResolvedExpression> filters, List<String> expectedPartitions) throws Exception { final String path = tempFile.getAbsolutePath(); conf = TestConfigurations.getDefaultConf(path); - conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true"); conf.setString(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key(), "true"); conf.set(FlinkOptions.READ_DATA_SKIPPING_ENABLED, true); TestData.writeData(TestData.DATA_SET_INSERT, conf); diff --git a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java index 889eae6d32c0..643e51196ff9 100644 --- a/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java +++ b/hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java @@ -172,7 +172,6 @@ public class TestHoodieTableMetadataUtil extends HoodieCommonTestHarness { partitionFileSlicePairs, HoodieMetadataConfig.newBuilder().enable(true) .withMetadataIndexColumnStats(true) - .withMetadataIndexPartitionStats(true) .withColumnStatsIndexForColumns("rider,driver") .withPartitionStatsIndexParallelism(1) .build(), @@ -290,7 +289,6 @@ public class TestHoodieTableMetadataUtil extends HoodieCommonTestHarness { partitionFileSlicePairs, HoodieMetadataConfig.newBuilder().enable(true) .withMetadataIndexColumnStats(true) - .withMetadataIndexPartitionStats(true) .withColumnStatsIndexForColumns("rider,driver") .withPartitionStatsIndexParallelism(1) .build(), diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestDataSkippingWithMORColstats.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestDataSkippingWithMORColstats.java index 432d8e90c7f0..67e95008903b 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestDataSkippingWithMORColstats.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestDataSkippingWithMORColstats.java @@ -119,7 +119,6 @@ public class TestDataSkippingWithMORColstats extends HoodieSparkClientTestBase { // we have specified which columns to index options.put(HoodieMetadataConfig.COLUMN_STATS_INDEX_MAX_COLUMNS.key(), "1"); - options.put(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); Dataset<Row> inserts = makeInsertDf("000", 100); Dataset<Row> batch1 = inserts.where(matchCond); Dataset<Row> batch2 = inserts.where(nonMatchCond); @@ -190,7 +189,6 @@ public class TestDataSkippingWithMORColstats extends HoodieSparkClientTestBase { */ @Test public void testBaseFileAndLogFileUpdateMatchesDeleteBlockCompact() { - options.put(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); testBaseFileAndLogFileUpdateMatchesHelper(false, true,true, false); } @@ -204,7 +202,6 @@ public class TestDataSkippingWithMORColstats extends HoodieSparkClientTestBase { */ @Test public void testBaseFileAndLogFileUpdateMatchesAndRollBack() { - options.put(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "false"); testBaseFileAndLogFileUpdateMatchesHelper(false, false,false, true); } diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java index 1be6c3d1218f..e20bb31abfb9 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieBackedMetadata.java @@ -342,7 +342,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { .withMetadataConfig(HoodieMetadataConfig.newBuilder() .withProperties(cfg.getMetadataConfig().getProps()) .withMetadataIndexColumnStats(false) - .withMetadataIndexPartitionStats(false) .build()) .build(); @@ -1145,7 +1144,6 @@ public class TestHoodieBackedMetadata extends TestHoodieMetadataBase { // Disable the other two default index for this test because the test orchestrates // the rollback with the assumption of init commits being in certain order .withMetadataIndexColumnStats(false) - .withMetadataIndexPartitionStats(false) .build()) .build(); diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/TestMetadataWriterCommit.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/TestMetadataWriterCommit.java index 6ad06102511b..48d01e13768a 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/TestMetadataWriterCommit.java +++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/io/TestMetadataWriterCommit.java @@ -129,7 +129,7 @@ public class TestMetadataWriterCommit extends BaseTestHandle { assertEquals(10, mdtCommitMetadata.getPartitionToWriteStats().get(RECORD_INDEX.getPartitionPath()).size()); assertFalse(mdtCommitMetadata.getPartitionToWriteStats().containsKey(COLUMN_STATS.getPartitionPath())); - // Create commit in MDT with col stats enabled + // Create commit in MDT with col stats enabled (partition stats is enabled with column stats) config.getMetadataConfig().setValue(HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS, "true"); instantTime = InProcessTimeGenerator.createNewInstantTime(); mdtWriter = (HoodieBackedTableMetadataWriter) SparkMetadataWriterFactory.createWithStreamingWrites(storageConf, config, @@ -138,8 +138,8 @@ public class TestMetadataWriterCommit extends BaseTestHandle { mdtWriteStatus = mdtWriter.streamWriteToMetadataPartitions(HoodieJavaRDD.of(Collections.singletonList(writeStatus), context, 1), instantTime); mdtWriteStats = mdtWriteStatus.collectAsList().stream().map(WriteStatus::getStat).collect(Collectors.toList()); mdtWriter.completeStreamingCommit(instantTime, context, mdtWriteStats, commitMetadata); - // 3 bootstrap commits for 3 enabled partitions, 2 commits due to update - assertEquals(5, mdtMetaClient.reloadActiveTimeline().filterCompletedInstants().countInstants()); + // 3 bootstrap commits for 4 enabled partitions, 2 commits due to update + assertEquals(6, mdtMetaClient.reloadActiveTimeline().filterCompletedInstants().countInstants()); // Verify commit metadata mdtCommitMetadata = mdtMetaClient.getActiveTimeline().readCommitMetadata(mdtMetaClient.getActiveTimeline().lastInstant().get()); diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala index 496c7b1790f2..689ce52dc009 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala @@ -895,7 +895,7 @@ def testBulkInsertForDropPartitionColumn(): Unit = { val partitionStatsIndex = new PartitionStatsIndexSupport( spark, inputDf.schema, - HoodieMetadataConfig.newBuilder().enable(true).withMetadataIndexPartitionStats(true).build(), + HoodieMetadataConfig.newBuilder().enable(true).build(), metaClient) val partitionStats = partitionStatsIndex.loadColumnStatsIndexRecords(List("partition", "ts"), shouldReadInMemory = true).collectAsList() partitionStats.forEach(stat => { diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala index 5517ce654d45..ef6012518ebc 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/PartitionStatsIndexTestBase.scala @@ -42,7 +42,6 @@ class PartitionStatsIndexTestBase extends HoodieStatsIndexTestBase { val targetColumnsToIndex: Seq[String] = Seq("rider", "driver") val metadataOpts: Map[String, String] = Map( HoodieMetadataConfig.ENABLE.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", HoodieMetadataConfig.COLUMN_STATS_INDEX_FOR_COLUMNS.key -> targetColumnsToIndex.mkString(",") ) @@ -113,7 +112,7 @@ class PartitionStatsIndexTestBase extends HoodieStatsIndexTestBase { val partitionStatsIndex = new PartitionStatsIndexSupport( spark, inputDf.schema, - HoodieMetadataConfig.newBuilder().enable(true).withMetadataIndexPartitionStats(true).build(), + HoodieMetadataConfig.newBuilder().enable(true).build(), metaClient) val partitionStats = partitionStatsIndex.loadColumnStatsIndexRecords(List("partition", "trip_type"), shouldReadInMemory = true).collectAsList() assertEquals(0, partitionStats.size()) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala index 66ee2fa88008..26fa1c722344 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestBasicSchemaEvolution.scala @@ -55,8 +55,7 @@ class TestBasicSchemaEvolution extends HoodieSparkClientTestBase with ScalaAsser DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key", DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition", HoodieTableConfig.ORDERING_FIELDS.key -> "timestamp", - HoodieWriteConfig.TBL_NAME.key -> "hoodie_test", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true" + HoodieWriteConfig.TBL_NAME.key -> "hoodie_test" ) val verificationCol: String = "driver" diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndex.scala index 108f59ba82e6..1d9362769158 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndex.scala @@ -65,24 +65,6 @@ class TestPartitionStatsIndex extends PartitionStatsIndexTestBase { val sqlTempTable = "hudi_tbl" - /** - * Test case to validate partition stats cannot be created without column stats. - */ - @Test - def testPartitionStatsWithoutColumnStats(): Unit = { - // remove column stats enable key from commonOpts - val hudiOpts = commonOpts + (HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false") - // should throw an exception as column stats is required for partition stats - try { - doWriteAndValidateDataAndPartitionStats(hudiOpts, - operation = DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL, - saveMode = SaveMode.Overwrite) - fail("Should have thrown exception") - } catch { - case e: HoodieException => assertTrue(e.getCause.getMessage.startsWith("Column stats partition must be enabled to generate partition stats.")) - } - } - /** * Test case to validate partition stats for a logical type column */ @@ -411,7 +393,6 @@ class TestPartitionStatsIndex extends PartitionStatsIndexTestBase { DataSourceWriteOptions.TABLE_TYPE.key -> HoodieTableType.MERGE_ON_READ.name(), HoodieMetadataConfig.ENABLE.key -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true", HoodieMetadataConfig.RECORD_INDEX_ENABLE_PROP.key -> "true") // First ingest. @@ -483,7 +464,6 @@ class TestPartitionStatsIndex extends PartitionStatsIndexTestBase { val partitionStatsIndex = new PartitionStatsIndexSupport( spark, latestDf.schema, HoodieMetadataConfig.newBuilder() .enable(true) - .withMetadataIndexPartitionStats(true) .build(), metaClient) val partitionStats = partitionStatsIndex.loadColumnStatsIndexRecords( @@ -495,7 +475,6 @@ class TestPartitionStatsIndex extends PartitionStatsIndexTestBase { val columnStatsIndex = new ColumnStatsIndexSupport( spark, latestDf.schema, HoodieMetadataConfig.newBuilder() .enable(true) - .withMetadataIndexPartitionStats(true) .build(), metaClient) val columnStats = columnStatsIndex @@ -559,7 +538,6 @@ class TestPartitionStatsIndex extends PartitionStatsIndexTestBase { DataSourceWriteOptions.TABLE_TYPE.key -> HoodieTableType.MERGE_ON_READ.name(), HoodieMetadataConfig.ENABLE.key() -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key() -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key() -> "true", DataSourceReadOptions.ENABLE_DATA_SKIPPING.key -> "true") doWriteAndValidateDataAndPartitionStats( diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala index 6629b9523541..30274ab2b565 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsIndexWithSql.scala @@ -687,7 +687,7 @@ class TestPartitionStatsIndexWithSql extends HoodieSparkSqlTestBase { } private def getTableFileSystemView(metaClient: HoodieTableMetaClient): HoodieTableFileSystemView = { - val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).withMetadataIndexPartitionStats(true).build() + val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).build() val metadataTable = new HoodieBackedTableMetadata(new HoodieSparkEngineContext(spark.sparkContext), metaClient.getStorage, metadataConfig, metaClient.getBasePath.toString) new HoodieTableFileSystemView( diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsPruning.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsPruning.scala index 2e07b702592d..3b5d3eb0320a 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsPruning.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestPartitionStatsPruning.scala @@ -47,8 +47,7 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { def testMetadataPSISimple(testCase: ColumnStatsTestCase): Unit = { val metadataOpts = Map( HoodieMetadataConfig.ENABLE.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true" + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true" ) val commonOpts = Map( @@ -77,8 +76,7 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { def testMetadataColumnStatsIndex(testCase: ColumnStatsTestCase): Unit = { val metadataOpts = Map( HoodieMetadataConfig.ENABLE.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true" + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true" ) val commonOpts = Map( @@ -125,7 +123,6 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { val metadataOpts1 = Map( HoodieMetadataConfig.ENABLE.key -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true", HoodieMetadataConfig.COLUMN_STATS_INDEX_FOR_COLUMNS.key -> "c1,c2,c3,c5,c6,c7,c8" // ignore c4 ) @@ -145,7 +142,6 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { val metadataOpts2 = Map( HoodieMetadataConfig.ENABLE.key -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true", HoodieMetadataConfig.COLUMN_STATS_INDEX_FOR_COLUMNS.key -> "c1,c2,c3,c5,c7,c8" // ignore c4,c6 ) @@ -163,8 +159,7 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { // disable cols stats val metadataOpts3 = Map( HoodieMetadataConfig.ENABLE.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "false" + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false" ) // disable col stats @@ -186,8 +181,7 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { val partitionCol : String = "c8" val metadataOpts = Map( HoodieMetadataConfig.ENABLE.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "false" + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false" ) val commonOpts = Map( @@ -216,8 +210,7 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { val metadataOpts0 = Map( HoodieMetadataConfig.ENABLE.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "false" + HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "false" ) // updates @@ -245,7 +238,6 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { val metadataOpts1 = Map( HoodieMetadataConfig.ENABLE.key -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true", HoodieMetadataConfig.COLUMN_STATS_INDEX_FOR_COLUMNS.key -> "c1,c2,c3,c4,c5,c6,c7,c8" ) @@ -342,7 +334,6 @@ class TestPartitionStatsPruning extends ColumnStatIndexTestBase { val metadataOpts1 = Map( HoodieMetadataConfig.ENABLE.key -> "true", HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true", - HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key -> "true", HoodieMetadataConfig.COLUMN_STATS_INDEX_FOR_COLUMNS.key -> "c1,c2,c3,c4,c5,c6,c7,c8" ) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/feature/index/TestExpressionIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/feature/index/TestExpressionIndex.scala index e7a75fbc2a6a..b4a98a58130b 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/feature/index/TestExpressionIndex.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/feature/index/TestExpressionIndex.scala @@ -2065,7 +2065,7 @@ class TestExpressionIndex extends HoodieSparkSqlTestBase with SparkAdapterSuppor val lastCompletedInstant = metaClient.reloadActiveTimeline().getCommitsTimeline.filterCompletedInstants().lastInstant() val configBuilder = getWriteConfigBuilder(Map.empty, metaClient.getBasePath.toString) configBuilder.withMetadataConfig(HoodieMetadataConfig.newBuilder() - .withMetadataIndexColumnStats(false).withMetadataIndexPartitionStats(false).build()) + .withMetadataIndexColumnStats(false).build()) val writeClient = new SparkRDDWriteClient(new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext)), configBuilder.build()) writeClient.rollback(lastCompletedInstant.get().requestedTime) writeClient.close() @@ -2387,7 +2387,7 @@ class TestExpressionIndex extends HoodieSparkSqlTestBase with SparkAdapterSuppor private def getTableFileSystemView(metaClient: HoodieTableMetaClient): HoodieTableFileSystemView = { val engineContext = new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext)) - val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).withMetadataIndexPartitionStats(true).build() + val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).build() val metadataTable = new HoodieBackedTableMetadata(engineContext, metaClient.getStorage, metadataConfig, metaClient.getBasePath.toString) new HoodieTableFileSystemView( metadataTable, diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java index ec781d9bb616..7cc016463ead 100644 --- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java +++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/TestHoodieMetadataTableValidator.java @@ -472,7 +472,6 @@ public class TestHoodieMetadataTableValidator extends HoodieSparkClientTestBase Dataset<Row> inserts = makeInsertDf("000", 5); inserts.write().format("hudi").options(writeOptions) .option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.BULK_INSERT.value()) - .option(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true") .mode(SaveMode.Overwrite) .save(basePath); // validate MDT partition stats @@ -481,7 +480,6 @@ public class TestHoodieMetadataTableValidator extends HoodieSparkClientTestBase Dataset<Row> updates = makeUpdateDf("001", 5); updates.write().format("hudi").options(writeOptions) .option(DataSourceWriteOptions.OPERATION().key(), WriteOperationType.UPSERT.value()) - .option(HoodieMetadataConfig.ENABLE_METADATA_INDEX_PARTITION_STATS.key(), "true") .mode(SaveMode.Append) .save(basePath);
