slfan1989 commented on code in PR #8388:
URL: https://github.com/apache/hudi/pull/8388#discussion_r1163441854
##########
hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java:
##########
@@ -258,13 +258,28 @@ protected void syncHoodieTable(String tableName, boolean
useRealtimeInputFormat,
lastCommitTimeSynced = syncClient.getLastCommitTimeSynced(tableName);
}
LOG.info("Last commit time synced was found to be " +
lastCommitTimeSynced.orElse("null"));
- List<String> writtenPartitionsSince =
syncClient.getWrittenPartitionsSince(lastCommitTimeSynced);
- LOG.info("Storage partitions scan complete. Found " +
writtenPartitionsSince.size());
- // Sync the partitions if needed
- // find dropped partitions, if any, in the latest commit
- Set<String> droppedPartitions =
syncClient.getDroppedPartitionsSince(lastCommitTimeSynced);
- boolean partitionsChanged = syncPartitions(tableName,
writtenPartitionsSince, droppedPartitions);
+ boolean partitionsChanged;
+ if (!lastCommitTimeSynced.isPresent()
+ ||
syncClient.getActiveTimeline().isBeforeTimelineStarts(lastCommitTimeSynced.get()))
{
+ // If the last commit time synced is before the start of the active
timeline,
+ // the Hive sync falls back to list all partitions on storage, instead of
+ // reading active and archived timelines for written partitions.
+ LOG.info("Sync all partitions given the last commit time synced is empty
or "
+ + "before the start of the active timeline. Listing all partitions
in "
+ + config.getString(META_SYNC_BASE_PATH)
+ + ", file system: " + config.getHadoopFileSystem());
+ partitionsChanged = syncAllPartitions(tableName);
+ } else {
+ List<String> writtenPartitionsSince =
syncClient.getWrittenPartitionsSince(lastCommitTimeSynced);
+ LOG.info("Storage partitions scan complete. Found " +
writtenPartitionsSince.size());
Review Comment:
Our logging has changed to slf4j, can we use {}?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]