nsivabalan commented on code in PR #12105:
URL: https://github.com/apache/hudi/pull/12105#discussion_r1817968282
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala:
##########
@@ -90,13 +97,428 @@ class TestColumnStatsIndex extends ColumnStatIndexTestBase
{
"index/colstats/mor-updated2-column-stats-index-table.json"
}
- doWriteAndValidateColumnStats(testCase, metadataOpts, commonOpts,
+
doWriteAndValidateColumnStats(DoWriteAndValidateColumnStatsParams(testCase,
metadataOpts, commonOpts,
dataSourcePath = "index/colstats/update-input-table-json",
expectedColStatsSourcePath = expectedColStatsSourcePath,
operation = DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL,
- saveMode = SaveMode.Append)
+ saveMode = SaveMode.Append))
}
+ @ParameterizedTest
+ @MethodSource(Array("testTableTypePartitionTypeParams"))
+ def testMetadataColumnStatsIndexInitializationWithUpserts(tableType:
HoodieTableType, partitionCol : String): Unit = {
+ val testCase = ColumnStatsTestCase(tableType, shouldReadInMemory = true)
+ val metadataOpts = Map(
+ HoodieMetadataConfig.ENABLE.key -> "true"
+ )
+
+ val commonOpts = Map(
+ "hoodie.insert.shuffle.parallelism" -> "1",
+ "hoodie.upsert.shuffle.parallelism" -> "1",
+ HoodieWriteConfig.TBL_NAME.key -> "hoodie_test",
+ DataSourceWriteOptions.TABLE_TYPE.key -> testCase.tableType.toString,
+ RECORDKEY_FIELD.key -> "c1",
+ PRECOMBINE_FIELD.key -> "c1",
+ PARTITIONPATH_FIELD.key() -> partitionCol,
+ HoodieTableConfig.POPULATE_META_FIELDS.key -> "true",
+ HoodieCompactionConfig.INLINE_COMPACT_NUM_DELTA_COMMITS.key() -> "5"
+ ) ++ metadataOpts
+
+ // inserts
+
doWriteAndValidateColumnStats(DoWriteAndValidateColumnStatsParams(testCase,
metadataOpts, commonOpts,
+ dataSourcePath = "index/colstats/input-table-json",
+ expectedColStatsSourcePath = null,
+ operation = DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL,
+ saveMode = SaveMode.Overwrite,
+ false,
+ numPartitions = 1,
+ parquetMaxFileSize = 100 * 1024 * 1024,
+ smallFileLimit = 0))
+
+ // updates
+
doWriteAndValidateColumnStats(DoWriteAndValidateColumnStatsParams(testCase,
metadataOpts, commonOpts,
+ dataSourcePath = "index/colstats/update2-input-table-json/",
+ expectedColStatsSourcePath = null,
+ operation = DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL,
+ saveMode = SaveMode.Append,
+ false,
+ numPartitions = 1,
+ parquetMaxFileSize = 100 * 1024 * 1024,
+ smallFileLimit = 0))
+
+ // delete a subset of recs. this will add a delete log block for MOR table.
+
doWriteAndValidateColumnStats(DoWriteAndValidateColumnStatsParams(testCase,
metadataOpts, commonOpts,
+ dataSourcePath = "index/colstats/delete-input-table-json/",
+ expectedColStatsSourcePath = null,
+ operation = DataSourceWriteOptions.DELETE_OPERATION_OPT_VAL,
+ saveMode = SaveMode.Append,
+ false,
+ numPartitions = 1,
+ parquetMaxFileSize = 100 * 1024 * 1024,
+ smallFileLimit = 0))
+
+ val metadataOpts1 = Map(
+ HoodieMetadataConfig.ENABLE.key -> "true",
+ HoodieMetadataConfig.ENABLE_METADATA_INDEX_COLUMN_STATS.key -> "true"
+ )
+
+ // NOTE: MOR and COW have different fixtures since MOR is bearing
delta-log files (holding
Review Comment:
ack.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]