yihua commented on code in PR #12602:
URL: https://github.com/apache/hudi/pull/12602#discussion_r1908007317
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala:
##########
@@ -91,87 +90,101 @@ class TestSparkSqlCoreFlow extends HoodieSparkSqlTestBase {
val dataGen = new
HoodieTestDataGenerator(HoodieTestDataGenerator.TRIP_NESTED_EXAMPLE_SCHEMA,
0xDEED)
//Bulk insert first set of records
- val inputDf0 = generateInserts(dataGen, "000", 100).cache()
+ val inputDf0 = generateInserts(dataGen, "000", 10).cache()
insertInto(tableName, tableBasePath, inputDf0, BULK_INSERT,
isMetadataEnabled, 1)
+ val inputDf0Rows = canonicalizeDF(inputDf0).collect()
+ inputDf0.unpersist(true)
assertTrue(hasNewCommits(fs, tableBasePath, "000"))
//Verify bulk insert works correctly
- val snapshotDf1 = doSnapshotRead(tableName, isMetadataEnabled).cache()
- assertEquals(100, snapshotDf1.count())
- compareEntireInputDfWithHudiDf(inputDf0, snapshotDf1)
- snapshotDf1.unpersist(true)
+ val snapshotDf1 = doSnapshotRead(tableName, isMetadataEnabled)
+ val snapshotDf1Rows =
canonicalizeDF(dropMetaColumns(snapshotDf1)).collect()
+ assertEquals(10, snapshotDf1.count())
+ compareEntireInputRowsWithHudiRows(inputDf0Rows, snapshotDf1Rows)
//Test updated records
- val updateDf = generateUniqueUpdates(dataGen, "001", 50).cache()
+ val updateDf = generateUniqueUpdates(dataGen, "001", 5).cache()
insertInto(tableName, tableBasePath, updateDf, UPSERT, isMetadataEnabled,
2)
- val commitInstantTime2 = latestCommit(fs, tableBasePath)
- val snapshotDf2 = doSnapshotRead(tableName, isMetadataEnabled).cache()
- assertEquals(100, snapshotDf2.count())
- compareUpdateDfWithHudiDf(updateDf, snapshotDf2, snapshotDf1)
- snapshotDf2.unpersist(true)
+ val commitCompletedInstant2 = latestCompletedCommitCompletionTime(fs,
tableBasePath)
Review Comment:
```suggestion
val commitInstant2 = latestCompletedCommitCompletionTime(fs,
tableBasePath)
```
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSparkSqlCoreFlow.scala:
##########
@@ -91,87 +90,101 @@ class TestSparkSqlCoreFlow extends HoodieSparkSqlTestBase {
val dataGen = new
HoodieTestDataGenerator(HoodieTestDataGenerator.TRIP_NESTED_EXAMPLE_SCHEMA,
0xDEED)
//Bulk insert first set of records
- val inputDf0 = generateInserts(dataGen, "000", 100).cache()
+ val inputDf0 = generateInserts(dataGen, "000", 10).cache()
Review Comment:
same for other places
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]