sv2000 commented on code in PR #3517:
URL: https://github.com/apache/gobblin/pull/3517#discussion_r891531637
##########
gobblin-iceberg/src/main/java/org/apache/gobblin/iceberg/writer/IcebergMetadataWriter.java:
##########
@@ -790,28 +789,33 @@ public void flush(String dbName, String tableName) throws
IOException {
Transaction transaction = tableMetadata.transaction.get();
Map<String, String> props = tableMetadata.newProperties.or(
Maps.newHashMap(tableMetadata.lastProperties.or(getIcebergTable(tid).properties())));
+ String topic = props.get(TOPIC_NAME_KEY);
if (tableMetadata.appendFiles.isPresent()) {
tableMetadata.appendFiles.get().commit();
if (tableMetadata.completenessEnabled) {
- String topicName = props.get(TOPIC_NAME_KEY);
- if(topicName == null) {
- log.error(String.format("Not performing audit check. %s is null.
Please set as table property of %s.%s",
- TOPIC_NAME_KEY, dbName, tableName));
- } else {
- long newCompletenessWatermark =
- computeCompletenessWatermark(topicName,
tableMetadata.datePartitions, tableMetadata.prevCompletenessWatermark);
- if(newCompletenessWatermark >
tableMetadata.prevCompletenessWatermark) {
- log.info(String.format("Updating %s for %s.%s to %s",
COMPLETION_WATERMARK_KEY, dbName, tableName, newCompletenessWatermark));
- props.put(COMPLETION_WATERMARK_KEY,
String.valueOf(newCompletenessWatermark));
- props.put(COMPLETION_WATERMARK_TIMEZONE_KEY, this.timeZone);
- tableMetadata.newCompletenessWatermark =
newCompletenessWatermark;
- }
- }
+ checkAndUpdateCompletenessWatermark(tableMetadata, topic,
tableMetadata.datePartitions, props);
}
}
if (tableMetadata.deleteFiles.isPresent()) {
tableMetadata.deleteFiles.get().commit();
}
+ // Check and update completion watermark when there are no files to be
registered, typically for quiet topics
+ // The logic is to check the next window from previous completion
watermark and update the watermark if there are no audit counts
Review Comment:
So what happens if the current time is 08:00 and current watermark is 02:00?
Will we only check for the completeness for [02:00, 03:00]? Shouldn't we check
for completeness of the entire range [02:00, 08:00]?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]