This is an automated email from the ASF dual-hosted git repository.

jiangtian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git


The following commit(s) were added to refs/heads/master by this push:
     new 10ae8196eaf Do not remove top-level load dir after load & add retry 
for getting disk space (#12920)
10ae8196eaf is described below

commit 10ae8196eaf7ece94427790dd7e105098545845e
Author: Jiang Tian <[email protected]>
AuthorDate: Tue Jul 16 09:38:49 2024 +0800

    Do not remove top-level load dir after load & add retry for getting disk 
space (#12920)
    
    * add log for full directory
    
    * fix to computeIfAbsent
    
    * fix compute and add more logs
    
    * add retry when getting directory space
    
    * do not remove top-level load dir after load
    
    * spotless
    
    * Fix unupdated dir
---
 .../execution/load/LoadTsFileManager.java          | 17 ------------
 .../rescon/disk/strategy/SequenceStrategy.java     | 24 +++++++++++++++-
 .../org/apache/iotdb/commons/utils/IOUtils.java    | 32 ++++++++++++++++++++++
 .../apache/iotdb/commons/utils/JVMCommonUtils.java | 30 +++++++++++++++++---
 4 files changed, 81 insertions(+), 22 deletions(-)

diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
index 0e0be8deae6..fca8fd62cdb 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
@@ -65,7 +65,6 @@ import java.util.Optional;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
@@ -296,22 +295,6 @@ public class LoadTsFileManager {
     if (Objects.nonNull(writerManager)) {
       writerManager.close();
     }
-
-    for (final Path loadDirPath :
-        Arrays.stream(LOAD_BASE_DIRS.get())
-            .map(File::new)
-            .filter(File::exists)
-            .map(File::toPath)
-            .collect(Collectors.toList())) {
-      try {
-        Files.deleteIfExists(loadDirPath);
-        LOGGER.info("Load dir {} was deleted.", loadDirPath);
-      } catch (DirectoryNotEmptyException e) {
-        LOGGER.info("Load dir {} is not empty, skip deleting.", loadDirPath);
-      } catch (Exception e) {
-        LOGGER.info(MESSAGE_DELETE_FAIL, loadDirPath);
-      }
-    }
   }
 
   public static void updateWritePointCountMetrics(
diff --git 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
index 82d7ff7f6e5..ba7265babd5 100644
--- 
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
+++ 
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
@@ -21,11 +21,18 @@ package 
org.apache.iotdb.db.storageengine.rescon.disk.strategy;
 import org.apache.iotdb.commons.utils.JVMCommonUtils;
 import org.apache.iotdb.db.exception.DiskSpaceInsufficientException;
 
+import org.apache.tsfile.fileSystem.FSFactoryProducer;
+
+import java.io.File;
 import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
 
 public class SequenceStrategy extends DirectoryStrategy {
 
+  private static final long PRINT_INTERVAL_MS = 3600 * 1000L;
   private int currentIndex;
+  private Map<Integer, Long> dirLastPrintTimeMap = new ConcurrentHashMap<>();
 
   @Override
   public void setFolders(List<String> folders) throws 
DiskSpaceInsufficientException {
@@ -52,11 +59,26 @@ public class SequenceStrategy extends DirectoryStrategy {
 
   private int tryGetNextIndex(int start) throws DiskSpaceInsufficientException 
{
     int index = (start + 1) % folders.size();
-    while (!JVMCommonUtils.hasSpace(folders.get(index))) {
+    String dir = folders.get(index);
+    while (!JVMCommonUtils.hasSpace(dir)) {
+      File dirFile = FSFactoryProducer.getFSFactory().getFile(dir);
+
+      Long lastPrintTime = dirLastPrintTimeMap.computeIfAbsent(index, i -> 
-1L);
+      if (System.currentTimeMillis() - lastPrintTime > PRINT_INTERVAL_MS) {
+        long freeSpace = dirFile.getFreeSpace();
+        long totalSpace = dirFile.getTotalSpace();
+        LOGGER.warn(
+            "{} is above the warning threshold, free space {}, total space{}",
+            dir,
+            freeSpace,
+            totalSpace);
+        dirLastPrintTimeMap.put(index, System.currentTimeMillis());
+      }
       if (index == start) {
         throw new DiskSpaceInsufficientException(folders);
       }
       index = (index + 1) % folders.size();
+      dir = folders.get(index);
     }
     return index;
   }
diff --git 
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
 
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
index f2d9e9d2dac..f96ae8443b9 100644
--- 
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
+++ 
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
@@ -24,6 +24,7 @@ import org.apache.iotdb.commons.auth.entity.Role;
 import org.apache.iotdb.commons.exception.IllegalPathException;
 import org.apache.iotdb.commons.path.PartialPath;
 
+import com.google.common.base.Supplier;
 import org.apache.tsfile.utils.Pair;
 
 import java.io.DataInputStream;
@@ -33,6 +34,8 @@ import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
+import java.util.function.Function;
 
 public class IOUtils {
 
@@ -273,4 +276,33 @@ public class IOUtils {
     clone.flip();
     return clone;
   }
+
+  /**
+   * Retry a method at most 'maxRetry' times, each time calling 'retryFunc' 
and passing the result
+   * to 'validator'. If 'validator' returns true, returns the result.
+   *
+   * @param maxRetry maximum number of retires
+   * @param retryIntervalMS retry interval in milliseconds
+   * @param retryFunc function to be retried
+   * @param validator validating the result of 'retryFunc'
+   * @return true if the result from 'retryFunc' passes validation, false if 
all retries fail or is
+   *     interrupted
+   * @param <T>
+   */
+  public static <T> Optional<T> retryNoException(
+      int maxRetry, long retryIntervalMS, Supplier<T> retryFunc, Function<T, 
Boolean> validator) {
+    for (int i = 0; i < maxRetry; i++) {
+      T result = retryFunc.get();
+      if (Boolean.TRUE.equals(validator.apply(result))) {
+        return Optional.of(result);
+      }
+      try {
+        Thread.sleep(retryIntervalMS);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        return Optional.empty();
+      }
+    }
+    return Optional.empty();
+  }
 }
diff --git 
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
 
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
index 6c7bb1909f5..9435d54d2c7 100644
--- 
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
+++ 
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
@@ -22,6 +22,8 @@ package org.apache.iotdb.commons.utils;
 import org.apache.iotdb.commons.conf.CommonDescriptor;
 
 import org.apache.tsfile.fileSystem.FSFactoryProducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
@@ -31,6 +33,9 @@ import java.nio.file.Paths;
 import java.util.stream.Stream;
 
 public class JVMCommonUtils {
+
+  private static final Logger LOGGER = 
LoggerFactory.getLogger(JVMCommonUtils.class);
+
   /** Default executor pool maximum size. */
   public static final int MAX_EXECUTOR_POOL_SIZE = Math.max(100, getCpuCores() 
* 5);
 
@@ -62,13 +67,30 @@ public class JVMCommonUtils {
   public static long getUsableSpace(String dir) {
     File dirFile = FSFactoryProducer.getFSFactory().getFile(dir);
     dirFile.mkdirs();
-    return dirFile.getFreeSpace();
+    return IOUtils.retryNoException(5, 2000L, dirFile::getFreeSpace, space -> 
space > 0).orElse(0L);
   }
 
   public static double getDiskFreeRatio(String dir) {
-    File dirFile = FSFactoryProducer.getFSFactory().getFile(dir);
-    dirFile.mkdirs();
-    return 1.0 * dirFile.getFreeSpace() / dirFile.getTotalSpace();
+    File dirFile = new File(dir);
+    if (!dirFile.mkdirs()) {
+      // This may solve getFreeSpace() == 0?
+      dirFile = new File(dir);
+    }
+    long freeSpace =
+        IOUtils.retryNoException(5, 2000L, dirFile::getFreeSpace, space -> 
space > 0).orElse(0L);
+    if (freeSpace == 0) {
+      LOGGER.warn("Cannot get free space for {} after retries, please check 
the disk status", dir);
+    }
+    long totalSpace = dirFile.getTotalSpace();
+    double ratio = 1.0 * freeSpace / totalSpace;
+    if (ratio <= diskSpaceWarningThreshold) {
+      LOGGER.warn(
+          "{} is above the warning threshold, free space {}, total space {}",
+          dir,
+          freeSpace,
+          totalSpace);
+    }
+    return ratio;
   }
 
   public static boolean hasSpace(String dir) {

Reply via email to