walterddr commented on a change in pull request #8078:
URL: https://github.com/apache/pinot/pull/8078#discussion_r798940110



##########
File path: 
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/SegmentDeletionManager.java
##########
@@ -222,60 +240,108 @@ protected void removeSegmentFromStore(String 
tableNameWithType, String segmentId
   }
 
   /**
-   * Removes aged deleted segments from the deleted directory
-   * @param retentionInDays: retention for deleted segments in days
+   * Scanning deleted segment directory for aged segments and returns their 
table name.
+   * @return list of table name with aged deleted segments.
    */
-  public void removeAgedDeletedSegments(int retentionInDays) {
+  public List<String> getTableWithAgedDeletedSegments() {
+    if (_dataDir == null) {
+      return Collections.emptyList();
+    }
+    URI deletedDirURI = URIUtils.getUri(_dataDir, DELETED_SEGMENTS);
+    PinotFS pinotFS = PinotFSFactory.create(deletedDirURI.getScheme());
+    try {
+      // Directly return when the deleted directory does not exist (no segment 
deleted yet)
+      if (!pinotFS.exists(deletedDirURI)) {
+        return Collections.emptyList();
+      }
+
+      if (!pinotFS.isDirectory(deletedDirURI)) {
+        LOGGER.warn("Deleted segments URI: {} is not a directory", 
deletedDirURI);
+        return Collections.emptyList();
+      }
+
+      String[] tableNameDirs = pinotFS.listFiles(deletedDirURI, false);
+      if (tableNameDirs == null) {
+        LOGGER.warn("Failed to list files from the deleted segments directory: 
{}", deletedDirURI);
+        return Collections.emptyList();
+      }
+      return Arrays.stream(tableNameDirs).map(tableNameDir -> {
+        String[] split = StringUtils.split(tableNameDir, '/');
+        return split[split.length - 1];
+      }).collect(Collectors.toList());
+    } catch (IOException e) {
+      LOGGER.error("Error scanning deleted segment directory!", e);
+      return Collections.emptyList();
+    }
+  }
+
+  /**
+   * Removes aged deleted segments from the deleted directory for a specific 
table.
+   * @param tableConfig: config for the table that needs to be deleted.
+   */
+  public void removeAgedDeletedSegments(TableConfig tableConfig) {
     if (_dataDir != null) {
-      URI deletedDirURI = URIUtils.getUri(_dataDir, DELETED_SEGMENTS);
-      PinotFS pinotFS = PinotFSFactory.create(deletedDirURI.getScheme());
+      String tableNameWithType = tableConfig.getTableName();
+      URI tableDeletedDirURI = URIUtils.getUri(_dataDir, DELETED_SEGMENTS, 
tableNameWithType);
+      PinotFS pinotFS = PinotFSFactory.create(tableDeletedDirURI.getScheme());
 
       try {
         // Directly return when the deleted directory does not exist (no 
segment deleted yet)
-        if (!pinotFS.exists(deletedDirURI)) {
+        if (!pinotFS.exists(tableDeletedDirURI)) {
           return;
         }
 
-        if (!pinotFS.isDirectory(deletedDirURI)) {
-          LOGGER.warn("Deleted segments URI: {} is not a directory", 
deletedDirURI);
+        if (!pinotFS.isDirectory(tableDeletedDirURI)) {
+          LOGGER.warn("Deleted segments URI: {} is not a directory", 
tableDeletedDirURI);
           return;
         }
 
-        String[] tableNameDirs = pinotFS.listFiles(deletedDirURI, false);
-        if (tableNameDirs == null) {
-          LOGGER.warn("Failed to list files from the deleted segments 
directory: {}", deletedDirURI);
-          return;
-        }
+        long retentionMs = getRetentionFromTableConfig(tableConfig);
 
-        for (String tableNameDir : tableNameDirs) {
-          URI tableNameURI = URIUtils.getUri(tableNameDir);
-          // Get files that are aged
-          final String[] targetFiles = pinotFS.listFiles(tableNameURI, false);
-          int numFilesDeleted = 0;
-          for (String targetFile : targetFiles) {
-            URI targetURI = URIUtils.getUri(targetFile);
-            Date dateToDelete = 
DateTime.now().minusDays(retentionInDays).toDate();
-            if (pinotFS.lastModified(targetURI) < dateToDelete.getTime()) {
-              if (!pinotFS.delete(targetURI, true)) {
-                LOGGER.warn("Cannot remove file {} from deleted directory.", 
targetURI.toString());
-              } else {
-                numFilesDeleted++;
-              }
+        // Get files that are aged
+        final String[] targetFiles = pinotFS.listFiles(tableDeletedDirURI, 
false);
+        int numFilesDeleted = 0;
+        for (String targetFile : targetFiles) {
+          URI targetURI = URIUtils.getUri(targetFile);
+          Date dateToDelete = DateTime.now().minus(retentionMs).toDate();
+          if (pinotFS.lastModified(targetURI) < dateToDelete.getTime()) {
+            if (!pinotFS.delete(targetURI, true)) {
+              LOGGER.warn("Cannot remove file {} from deleted directory.", 
targetURI.toString());
+            } else {
+              numFilesDeleted++;
             }
           }
+        }
 
-          if (numFilesDeleted == targetFiles.length) {
-            // Delete directory if it's empty
-            if (!pinotFS.delete(tableNameURI, false)) {
-              LOGGER.warn("The directory {} cannot be removed.", tableNameDir);
-            }
+        if (numFilesDeleted == targetFiles.length) {
+          // Delete directory if it's empty
+          if (!pinotFS.delete(tableDeletedDirURI, false)) {
+            LOGGER.warn("The directory {} cannot be removed.", 
tableDeletedDirURI);
           }
         }
       } catch (IOException e) {
-        LOGGER.error("Had trouble deleting directories: {}", 
deletedDirURI.toString(), e);
+        LOGGER.error("Had trouble deleting directories: {}", 
tableDeletedDirURI.toString(), e);
       }
     } else {
       LOGGER.info("dataDir is not configured, won't delete any expired 
segments from deleted directory.");
     }
   }
+
+  public long getRetentionFromTableConfig(TableConfig tableConfig) {

Review comment:
       suggest moving this to RetentionManager




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to