This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 4af0c8f153 Fixing the logic to only process deep store untracked
segments in the offline table part for a hybrid table (#16107)
4af0c8f153 is described below
commit 4af0c8f15367f2e0371d1cf2daa2fc4b9fd13a0a
Author: Xiang Fu <[email protected]>
AuthorDate: Sun Jun 15 03:21:55 2025 +0800
Fixing the logic to only process deep store untracked segments in the
offline table part for a hybrid table (#16107)
---
.../helix/core/retention/RetentionManager.java | 29 +++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java
index e139a93cc6..984bfbd527 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/retention/RetentionManager.java
@@ -301,6 +301,18 @@ public class RetentionManager extends
ControllerPeriodicTask<Void> {
private List<String> getSegmentsToDeleteFromDeepstore(String
tableNameWithType, RetentionStrategy retentionStrategy,
List<SegmentZKMetadata> segmentZKMetadataList, int
untrackedSegmentsDeletionBatchSize) {
List<String> segmentsToDelete = new ArrayList<>();
+ String rawTableName =
TableNameBuilder.extractRawTableName(tableNameWithType);
+ boolean isHybridTable =
_pinotHelixResourceManager.hasOfflineTable(rawTableName)
+ && _pinotHelixResourceManager.hasRealtimeTable(rawTableName);
+ if (isHybridTable &&
TableNameBuilder.isRealtimeTableResource(tableNameWithType)) {
+ // If it is a hybrid table, we don't need to scan deep store for
untracked segments when processing the
+ // realtime table.
+ // This is because realtime tables are expected to have short retention
periods, so scanning deep store for
+ // untracked segments is not necessary.
+ LOGGER.info("Skipping deep store scan for untracked segments for
realtime table: {} as it's a hybrid table",
+ tableNameWithType);
+ return segmentsToDelete;
+ }
if (!_untrackedSegmentDeletionEnabled) {
LOGGER.info(
@@ -317,8 +329,20 @@ public class RetentionManager extends
ControllerPeriodicTask<Void> {
return segmentsToDelete;
}
- List<String> segmentsPresentInZK =
-
segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList());
+ List<String> segmentsPresentInZK;
+ if (isHybridTable) {
+ segmentsPresentInZK = new ArrayList<>();
+ // This must be the OFFLINE table
+ segmentsPresentInZK.addAll(
+
segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList()));
+ // Add segments from the REALTIME table as well
+ segmentsPresentInZK.addAll(
+
_pinotHelixResourceManager.getSegmentsFor(TableNameBuilder.REALTIME.tableNameWithType(rawTableName),
false));
+ } else {
+ segmentsPresentInZK =
+
segmentZKMetadataList.stream().map(SegmentZKMetadata::getSegmentName).collect(Collectors.toList());
+ }
+
try {
LOGGER.info("Fetch segments present in deep store that are beyond
retention period for table: {}",
tableNameWithType);
@@ -340,7 +364,6 @@ public class RetentionManager extends
ControllerPeriodicTask<Void> {
return segmentsToDelete;
}
-
/**
* Identifies segments in deepstore that are ready for deletion based on the
retention strategy.
*
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]