nada-attia commented on code in PR #18064:
URL: https://github.com/apache/hudi/pull/18064#discussion_r2799716413
##########
hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/TestHiveSyncTool.java:
##########
@@ -1464,6 +1468,105 @@ public void testDropPartition(String syncMode, String
enablePushDown) throws Exc
"The last commit that was synced should be updated in the
TBLPROPERTIES");
}
+ @ParameterizedTest
+ @MethodSource("syncMode")
+ public void testTouchPartition(String syncMode) throws Exception {
+ hiveSyncProps.setProperty(HIVE_SYNC_MODE.key(), syncMode);
+ hiveSyncProps.setProperty(META_SYNC_CONDITIONAL_SYNC.key(), "true");
+ String instantTime = "100";
+ HiveTestUtil.createCOWTable(instantTime, 5, true);
+
+ HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." +
HiveTestUtil.TABLE_NAME);
+ reInitHiveSyncClient();
+ assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
+ "Table " + HiveTestUtil.TABLE_NAME + " should not exist
initially");
+ hiveSyncTool.syncHoodieTable();
+
+ // Reinitialize client after sync as the previous client was closed
+ reInitHiveSyncClient();
+ Option<String> lastCommitTimeSynced =
hiveClient.getLastCommitTimeSynced(HiveTestUtil.TABLE_NAME);
+ Option<String> lastCommitCompletionTimeSynced =
hiveClient.getLastCommitCompletionTimeSynced(HiveTestUtil.TABLE_NAME);
+ assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
+ "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync
completes");
+ assertEquals(hiveClient.getMetastoreSchema(HiveTestUtil.TABLE_NAME).size(),
+ hiveClient.getStorageSchema().getFields().size() +
getPartitionFieldSize(),
+ "Hive Schema should match the table schema + partition field");
+
+ List<Partition> partitions =
hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+ String partitionToTouch = getRelativePartitionPath(new
Path(HiveTestUtil.basePath),
+ new Path(partitions.get(0).getStorageLocation()));
+
+ assertEquals(5, partitions.size(),
+ "Table partitions should match the number of partitions we wrote");
+ assertEquals(instantTime,
hiveClient.getLastCommitTimeSynced(HiveTestUtil.TABLE_NAME).get(),
+ "The last commit that was synced should be updated in the
TBLPROPERTIES");
+
+ // insert into existing partition (creates a touch event)
+ HiveTestUtil.addCOWPartition(partitionToTouch, true, true, "101");
+
+ // sync touch partition event
+ reInitHiveSyncClient();
+ hiveSyncTool.syncHoodieTable();
+
+ // Reinitialize client after sync as the previous client was closed
+ reInitHiveSyncClient();
+ List<Partition> hivePartitions =
hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+ List<String> writtenPartitionsSince =
hiveClient.getWrittenPartitionsSince(lastCommitTimeSynced,
lastCommitCompletionTimeSynced);
+ List<PartitionEvent> partitionEvents =
hiveClient.getPartitionEvents(hivePartitions, writtenPartitionsSince,
Collections.emptySet());
+ List<String> touchPartitionEvents = partitionEvents.stream().filter(s ->
s.eventType == PartitionEventType.TOUCH).map(s -> s.storagePartition)
+ .collect(Collectors.toList());
+ // check touch partition event was detected
+ assertEquals(1, touchPartitionEvents.size(), "There should be only one
touch partition event: " + touchPartitionEvents + "written: " +
writtenPartitionsSince + "hive: " + hivePartitions);
+ }
+
+ @ParameterizedTest
+ @MethodSource("syncMode")
+ public void testTouchPartitionNotSyncedWhenConditionalSyncDisabled(String
syncMode) throws Exception {
+ hiveSyncProps.setProperty(HIVE_SYNC_MODE.key(), syncMode);
+ hiveSyncProps.setProperty(META_SYNC_CONDITIONAL_SYNC.key(), "false");
+ String instantTime = "100";
+ HiveTestUtil.createCOWTable(instantTime, 5, true);
+
+ HiveTestUtil.getCreatedTablesSet().add(HiveTestUtil.DB_NAME + "." +
HiveTestUtil.TABLE_NAME);
+ reInitHiveSyncClient();
+ assertFalse(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
+ "Table " + HiveTestUtil.TABLE_NAME + " should not exist
initially");
+ hiveSyncTool.syncHoodieTable();
+
+ // Reinitialize client after sync as the previous client was closed
+ reInitHiveSyncClient();
+ assertTrue(hiveClient.tableExists(HiveTestUtil.TABLE_NAME),
+ "Table " + HiveTestUtil.TABLE_NAME + " should exist after sync
completes");
+
+ List<Partition> partitions =
hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+ String partitionToTouch = getRelativePartitionPath(new
Path(HiveTestUtil.basePath),
+ new Path(partitions.get(0).getStorageLocation()));
+
+ assertEquals(5, partitions.size(),
+ "Table partitions should match the number of partitions we wrote");
+
+ // insert into existing partition (creates a touch event)
+ HiveTestUtil.addCOWPartition(partitionToTouch, true, true, "101");
+
+ // sync - with META_SYNC_CONDITIONAL_SYNC=false, touch partitions should
not be synced to HMS
+ // even though the partition was written to
+ reInitHiveSyncClient();
+ hiveSyncTool.syncHoodieTable();
+
+ // Reinitialize client after sync as the previous client was closed
+ reInitHiveSyncClient();
+ List<Partition> hivePartitionsAfter =
hiveClient.getAllPartitions(HiveTestUtil.TABLE_NAME);
+
+ // Verify the partition count remains the same (no new partitions added)
+ // This demonstrates that touch partitions are NOT synced to HMS when
META_SYNC_CONDITIONAL_SYNC=false
+ assertEquals(5, hivePartitionsAfter.size(),
Review Comment:
done on lines 1532-1545
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]