suryaprasanna commented on code in PR #17494:
URL: https://github.com/apache/hudi/pull/17494#discussion_r2688491632
##########
hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHoodieIndex.java:
##########
@@ -325,6 +361,66 @@ public void testLookupIndexWithAndWithoutColumnStats()
throws Exception {
recordLocations.foreach(entry ->
assertEquals(recordKeyToPartitionPathMap.get(entry._1.getRecordKey()),
entry._1.getPartitionPath(), "PartitionPath mismatch"));
}
+ @ParameterizedTest
+ @MethodSource("indexTypeParams")
+ public void testTagLocationDuringUpdatesAndFailures(IndexType indexType,
boolean populateMetaFields, boolean enableMetadataIndex) throws Exception {
+ setUp(indexType, populateMetaFields, enableMetadataIndex);
+ String newCommitTime =
HoodieInstantTimeGenerator.getCurrentInstantTimeStr();
+ int initialRecords = 10;// + new Random().nextInt(20);
+ List<HoodieRecord> originalBatch = getRandomInserts(initialRecords);
+ JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(originalBatch, 1);
+
+ metaClient = HoodieTableMetaClient.reload(metaClient);
+ HoodieTable hoodieTable = HoodieSparkTable.create(config, context,
metaClient);
+
+ // Test tagLocation without any entries in index, no records should be
tagged.
+ JavaRDD<HoodieRecord> javaRDD = tagLocation(hoodieTable.getIndex(),
writeRecords, hoodieTable);
+ assertEquals(0,
javaRDD.filter(HoodieRecord::isCurrentLocationKnown).collect().size());
+
+ // Insert initialRecords
+ WriteClientTestUtils.startCommitWithTime(writeClient, newCommitTime);
+ JavaRDD<WriteStatus> writeStatues = writeClient.upsert(writeRecords,
newCommitTime);
+ assertNoWriteErrors(writeStatues.collect());
+
+ // Now tagLocation for these records, index should not tag them since the
commit is still in progress.
+ javaRDD = tagLocation(hoodieTable.getIndex(), writeRecords, hoodieTable);
+ assert (javaRDD.filter(record ->
record.isCurrentLocationKnown()).collect().size() == 0);
Review Comment:
Made the change.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]