imply-cheddar commented on code in PR #14239:
URL: https://github.com/apache/druid/pull/14239#discussion_r1190490466


##########
indexing-service/src/main/java/org/apache/druid/indexing/common/TaskStorageDirTracker.java:
##########
@@ -24,91 +24,202 @@
 import org.apache.druid.indexing.worker.config.WorkerConfig;
 import org.apache.druid.java.util.common.FileUtils;
 import org.apache.druid.java.util.common.ISE;
+import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.lifecycle.LifecycleStart;
 
 import javax.annotation.Nullable;
 import java.io.File;
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
+/**
+ * Used to pick storage slots for tasks when run from the middle manager.
+ */
 public class TaskStorageDirTracker
 {
   public static TaskStorageDirTracker fromConfigs(WorkerConfig workerConfig, 
TaskConfig taskConfig)
   {
+    final List<File> baseTaskDirs;
     if (workerConfig == null) {
-      return new 
TaskStorageDirTracker(ImmutableList.of(taskConfig.getBaseTaskDir()));
+      baseTaskDirs = ImmutableList.of(taskConfig.getBaseTaskDir());
     } else {
       final List<String> basePaths = workerConfig.getBaseTaskDirs();
       if (basePaths == null) {
-        return new 
TaskStorageDirTracker(ImmutableList.of(taskConfig.getBaseTaskDir()));
+        baseTaskDirs = ImmutableList.of(taskConfig.getBaseTaskDir());
+      } else {
+        baseTaskDirs = 
basePaths.stream().map(File::new).collect(Collectors.toList());
       }
-      return new TaskStorageDirTracker(
-          basePaths.stream().map(File::new).collect(Collectors.toList())
-      );
     }
+
+    return fromBaseDirs(baseTaskDirs, workerConfig.getCapacity(), 
workerConfig.getBaseTaskDirSize());
+  }
+
+  public static TaskStorageDirTracker fromBaseDirs(List<File> baseTaskDirs, 
int numSlots, long dirSize)
+  {
+    int slotsPerBaseTaskDir = Math.max(1, numSlots / baseTaskDirs.size());
+    if (numSlots % baseTaskDirs.size() > 0) {
+      // We have to add an extra slot per location if they do not evenly divide
+      ++slotsPerBaseTaskDir;
+    }
+    long sizePerSlot = dirSize / slotsPerBaseTaskDir;

Review Comment:
   Yes, this config is explicitly forcing symmetry across the locations.  It's 
a divergence from the historical "storage location" which allows for 
non-symmetrical stuff.  Forcing symmetry makes it much easier to divide up the 
sizes given, where if they are not symmetrical and someone has location A with 
size 200GB, location B with size 350GB and location C with 1TB and has 8 worker 
slots, what's the correct size to give to each of those tasks?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to