kamalcph commented on code in PR #20913:
URL: https://github.com/apache/kafka/pull/20913#discussion_r3294901251
##########
storage/src/main/java/org/apache/kafka/server/log/remote/storage/RemoteLogManager.java:
##########
@@ -925,18 +927,73 @@ List<EnrichedLogSegment> candidateLogSegments(UnifiedLog
log, Long fromOffset, L
List<EnrichedLogSegment> candidateLogSegments = new ArrayList<>();
List<LogSegment> segments = log.logSegments(fromOffset,
Long.MAX_VALUE);
if (!segments.isEmpty()) {
+ long currentTimeMs = time.milliseconds();
+ long totalLogSize = UnifiedLog.sizeInBytes(segments);
+ long cumulativeSize = 0;
for (int idx = 1; idx < segments.size(); idx++) {
LogSegment previousSeg = segments.get(idx - 1);
LogSegment currentSeg = segments.get(idx);
if (currentSeg.baseOffset() <= lastStableOffset) {
- candidateLogSegments.add(new
EnrichedLogSegment(previousSeg, currentSeg.baseOffset()));
+ cumulativeSize += previousSeg.size();
+ if (isEligibleForUpload(log.config(), previousSeg,
currentTimeMs, totalLogSize, cumulativeSize)) {
+ candidateLogSegments.add(new
EnrichedLogSegment(previousSeg, currentSeg.baseOffset()));
+ } else {
+ break;
+ }
}
}
// Discard the last active segment
}
return candidateLogSegments;
}
+ private boolean isEligibleForUpload(LogConfig logConfig, LogSegment
previousSeg, long currentTimeMs, long totalLogSize, long cumulativeSize) {
+ long copyLagMs = logConfig.remoteCopyLagMs();
+ long copyLagBytes = logConfig.remoteCopyLagBytes();
+ if (logger.isTraceEnabled()) {
+ logger.trace("delayCopy check for segment {}: copyLagMs={},
copyLagBytes={}, currentTimeMs={}, totalLogSize={}, cumulativeSize={},
sizeLagBytes={}",
+ previousSeg, copyLagMs, copyLagBytes, currentTimeMs,
totalLogSize, cumulativeSize, totalLogSize - cumulativeSize);
+ }
+
+ if (copyLagMs == 0 || copyLagBytes == 0) {
+ return true;
+ }
+
+ boolean limitedCopyLagMsCheck = copyLagMs > 0;
+ boolean limitedCopyLagSizeCheck = copyLagBytes > 0;
+
+ if (limitedCopyLagMsCheck && eligibleUploadByTime(previousSeg,
currentTimeMs, copyLagMs)) {
+ return true;
+ }
+
+ return limitedCopyLagSizeCheck &&
eligibleUploadBySize(previousSeg, totalLogSize, cumulativeSize, copyLagBytes);
+ }
+
+ private boolean eligibleUploadByTime(LogSegment segment, long
currentTimeMs, long copyLagMs) {
+ try {
+ long segmentAgeMs = currentTimeMs - segment.largestTimestamp();
+ boolean eligibleUpload = segmentAgeMs < 0 || segmentAgeMs >=
copyLagMs;
Review Comment:
> shouldn't be blocked from deletion once they are already uploaded. Perhaps
we could fall back to checking the lastModifiedTime when a segment contains
future records.
Yeah, we can allow this behavior by introducing a config. The segment exist
in the remote storage but the user might face slowness in reading the data from
remote if they don't have prefetching feature implemented in the Remote Storage
Manager. So, better to gate the change in behavior via config and the change
applies only when remote storage is enabled on the topic.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]