This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 3dcd7573fa2 [HUDI-6184] Improve the test on incremental queries (#8648)
3dcd7573fa2 is described below
commit 3dcd7573fa26556af83cc81b108ae57cc363145c
Author: Y Ethan Guo <[email protected]>
AuthorDate: Fri May 5 23:16:16 2023 -0700
[HUDI-6184] Improve the test on incremental queries (#8648)
The test
TestIncrementalReadWithFullTableScan#testFailEarlyForIncrViewQueryForNonExistingFiles
can fail due to changes in archival behavior because of hard-coded parameters.
This commit improves the test to be more robust.
---
.../functional/TestIncrementalReadWithFullTableScan.scala | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestIncrementalReadWithFullTableScan.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestIncrementalReadWithFullTableScan.scala
index f2396fc9bfc..45b9cb4c902 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestIncrementalReadWithFullTableScan.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestIncrementalReadWithFullTableScan.scala
@@ -85,7 +85,7 @@ class TestIncrementalReadWithFullTableScan extends
HoodieSparkClientTestBase {
.option(DataSourceWriteOptions.TABLE_TYPE.key, tableType.name())
.option("hoodie.cleaner.commits.retained", "3")
.option("hoodie.keep.min.commits", "4")
- .option("hoodie.keep.max.commits", "5")
+ .option("hoodie.keep.max.commits", "7")
.option(DataSourceWriteOptions.OPERATION.key(),
DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
.mode(SaveMode.Append)
.save(basePath)
@@ -106,6 +106,10 @@ class TestIncrementalReadWithFullTableScan extends
HoodieSparkClientTestBase {
val completedCommits =
hoodieMetaClient.getCommitsTimeline.filterCompletedInstants() // C4 to C9
val archivedInstants =
hoodieMetaClient.getArchivedTimeline.filterCompletedInstants()
.getInstantsAsStream.distinct().toArray // C0 to C3
+ val nCompletedCommits = completedCommits.getInstants.size
+ val nArchivedInstants = archivedInstants.size
+ assertTrue(nCompletedCommits >= 3)
+ assertTrue(nArchivedInstants >= 3)
//Anything less than 2 is a valid commit in the sense no cleanup has been
done for those commit files
val startUnarchivedCommitTs =
completedCommits.nthInstant(0).get().getTimestamp //C4
@@ -125,8 +129,8 @@ class TestIncrementalReadWithFullTableScan extends
HoodieSparkClientTestBase {
// Test start commit is archived, end commit is not archived
shouldThrowIfFallbackIsFalse(tableType,
- () => runIncrementalQueryAndCompare(startArchivedCommitTs,
endUnarchivedCommitTs, 5, false))
- runIncrementalQueryAndCompare(startArchivedCommitTs,
endUnarchivedCommitTs, 5, true)
+ () => runIncrementalQueryAndCompare(startArchivedCommitTs,
endUnarchivedCommitTs, nArchivedInstants + 1, false))
+ runIncrementalQueryAndCompare(startArchivedCommitTs,
endUnarchivedCommitTs, nArchivedInstants + 1, true)
// Test both start commit and end commits are not archived but got cleaned
shouldThrowIfFallbackIsFalse(tableType,
@@ -134,7 +138,7 @@ class TestIncrementalReadWithFullTableScan extends
HoodieSparkClientTestBase {
runIncrementalQueryAndCompare(startUnarchivedCommitTs,
endUnarchivedCommitTs, 1, true)
// Test start commit is not archived, end commits is out of the timeline
- runIncrementalQueryAndCompare(startUnarchivedCommitTs,
endOutOfRangeCommitTs, 5, true)
+ runIncrementalQueryAndCompare(startUnarchivedCommitTs,
endOutOfRangeCommitTs, nCompletedCommits - 1, true)
// Test both start commit and end commits are out of the timeline
runIncrementalQueryAndCompare(startOutOfRangeCommitTs,
endOutOfRangeCommitTs, 0, false)