This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new 10ae8196eaf Do not remove top-level load dir after load & add retry
for getting disk space (#12920)
10ae8196eaf is described below
commit 10ae8196eaf7ece94427790dd7e105098545845e
Author: Jiang Tian <[email protected]>
AuthorDate: Tue Jul 16 09:38:49 2024 +0800
Do not remove top-level load dir after load & add retry for getting disk
space (#12920)
* add log for full directory
* fix to computeIfAbsent
* fix compute and add more logs
* add retry when getting directory space
* do not remove top-level load dir after load
* spotless
* Fix unupdated dir
---
.../execution/load/LoadTsFileManager.java | 17 ------------
.../rescon/disk/strategy/SequenceStrategy.java | 24 +++++++++++++++-
.../org/apache/iotdb/commons/utils/IOUtils.java | 32 ++++++++++++++++++++++
.../apache/iotdb/commons/utils/JVMCommonUtils.java | 30 +++++++++++++++++---
4 files changed, 81 insertions(+), 22 deletions(-)
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
index 0e0be8deae6..fca8fd62cdb 100644
---
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/queryengine/execution/load/LoadTsFileManager.java
@@ -65,7 +65,6 @@ import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
@@ -296,22 +295,6 @@ public class LoadTsFileManager {
if (Objects.nonNull(writerManager)) {
writerManager.close();
}
-
- for (final Path loadDirPath :
- Arrays.stream(LOAD_BASE_DIRS.get())
- .map(File::new)
- .filter(File::exists)
- .map(File::toPath)
- .collect(Collectors.toList())) {
- try {
- Files.deleteIfExists(loadDirPath);
- LOGGER.info("Load dir {} was deleted.", loadDirPath);
- } catch (DirectoryNotEmptyException e) {
- LOGGER.info("Load dir {} is not empty, skip deleting.", loadDirPath);
- } catch (Exception e) {
- LOGGER.info(MESSAGE_DELETE_FAIL, loadDirPath);
- }
- }
}
public static void updateWritePointCountMetrics(
diff --git
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
index 82d7ff7f6e5..ba7265babd5 100644
---
a/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
+++
b/iotdb-core/datanode/src/main/java/org/apache/iotdb/db/storageengine/rescon/disk/strategy/SequenceStrategy.java
@@ -21,11 +21,18 @@ package
org.apache.iotdb.db.storageengine.rescon.disk.strategy;
import org.apache.iotdb.commons.utils.JVMCommonUtils;
import org.apache.iotdb.db.exception.DiskSpaceInsufficientException;
+import org.apache.tsfile.fileSystem.FSFactoryProducer;
+
+import java.io.File;
import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
public class SequenceStrategy extends DirectoryStrategy {
+ private static final long PRINT_INTERVAL_MS = 3600 * 1000L;
private int currentIndex;
+ private Map<Integer, Long> dirLastPrintTimeMap = new ConcurrentHashMap<>();
@Override
public void setFolders(List<String> folders) throws
DiskSpaceInsufficientException {
@@ -52,11 +59,26 @@ public class SequenceStrategy extends DirectoryStrategy {
private int tryGetNextIndex(int start) throws DiskSpaceInsufficientException
{
int index = (start + 1) % folders.size();
- while (!JVMCommonUtils.hasSpace(folders.get(index))) {
+ String dir = folders.get(index);
+ while (!JVMCommonUtils.hasSpace(dir)) {
+ File dirFile = FSFactoryProducer.getFSFactory().getFile(dir);
+
+ Long lastPrintTime = dirLastPrintTimeMap.computeIfAbsent(index, i ->
-1L);
+ if (System.currentTimeMillis() - lastPrintTime > PRINT_INTERVAL_MS) {
+ long freeSpace = dirFile.getFreeSpace();
+ long totalSpace = dirFile.getTotalSpace();
+ LOGGER.warn(
+ "{} is above the warning threshold, free space {}, total space{}",
+ dir,
+ freeSpace,
+ totalSpace);
+ dirLastPrintTimeMap.put(index, System.currentTimeMillis());
+ }
if (index == start) {
throw new DiskSpaceInsufficientException(folders);
}
index = (index + 1) % folders.size();
+ dir = folders.get(index);
}
return index;
}
diff --git
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
index f2d9e9d2dac..f96ae8443b9 100644
---
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
+++
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/IOUtils.java
@@ -24,6 +24,7 @@ import org.apache.iotdb.commons.auth.entity.Role;
import org.apache.iotdb.commons.exception.IllegalPathException;
import org.apache.iotdb.commons.path.PartialPath;
+import com.google.common.base.Supplier;
import org.apache.tsfile.utils.Pair;
import java.io.DataInputStream;
@@ -33,6 +34,8 @@ import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import java.util.Optional;
+import java.util.function.Function;
public class IOUtils {
@@ -273,4 +276,33 @@ public class IOUtils {
clone.flip();
return clone;
}
+
+ /**
+ * Retry a method at most 'maxRetry' times, each time calling 'retryFunc'
and passing the result
+ * to 'validator'. If 'validator' returns true, returns the result.
+ *
+ * @param maxRetry maximum number of retires
+ * @param retryIntervalMS retry interval in milliseconds
+ * @param retryFunc function to be retried
+ * @param validator validating the result of 'retryFunc'
+ * @return true if the result from 'retryFunc' passes validation, false if
all retries fail or is
+ * interrupted
+ * @param <T>
+ */
+ public static <T> Optional<T> retryNoException(
+ int maxRetry, long retryIntervalMS, Supplier<T> retryFunc, Function<T,
Boolean> validator) {
+ for (int i = 0; i < maxRetry; i++) {
+ T result = retryFunc.get();
+ if (Boolean.TRUE.equals(validator.apply(result))) {
+ return Optional.of(result);
+ }
+ try {
+ Thread.sleep(retryIntervalMS);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return Optional.empty();
+ }
+ }
+ return Optional.empty();
+ }
}
diff --git
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
index 6c7bb1909f5..9435d54d2c7 100644
---
a/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
+++
b/iotdb-core/node-commons/src/main/java/org/apache/iotdb/commons/utils/JVMCommonUtils.java
@@ -22,6 +22,8 @@ package org.apache.iotdb.commons.utils;
import org.apache.iotdb.commons.conf.CommonDescriptor;
import org.apache.tsfile.fileSystem.FSFactoryProducer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
@@ -31,6 +33,9 @@ import java.nio.file.Paths;
import java.util.stream.Stream;
public class JVMCommonUtils {
+
+ private static final Logger LOGGER =
LoggerFactory.getLogger(JVMCommonUtils.class);
+
/** Default executor pool maximum size. */
public static final int MAX_EXECUTOR_POOL_SIZE = Math.max(100, getCpuCores()
* 5);
@@ -62,13 +67,30 @@ public class JVMCommonUtils {
public static long getUsableSpace(String dir) {
File dirFile = FSFactoryProducer.getFSFactory().getFile(dir);
dirFile.mkdirs();
- return dirFile.getFreeSpace();
+ return IOUtils.retryNoException(5, 2000L, dirFile::getFreeSpace, space ->
space > 0).orElse(0L);
}
public static double getDiskFreeRatio(String dir) {
- File dirFile = FSFactoryProducer.getFSFactory().getFile(dir);
- dirFile.mkdirs();
- return 1.0 * dirFile.getFreeSpace() / dirFile.getTotalSpace();
+ File dirFile = new File(dir);
+ if (!dirFile.mkdirs()) {
+ // This may solve getFreeSpace() == 0?
+ dirFile = new File(dir);
+ }
+ long freeSpace =
+ IOUtils.retryNoException(5, 2000L, dirFile::getFreeSpace, space ->
space > 0).orElse(0L);
+ if (freeSpace == 0) {
+ LOGGER.warn("Cannot get free space for {} after retries, please check
the disk status", dir);
+ }
+ long totalSpace = dirFile.getTotalSpace();
+ double ratio = 1.0 * freeSpace / totalSpace;
+ if (ratio <= diskSpaceWarningThreshold) {
+ LOGGER.warn(
+ "{} is above the warning threshold, free space {}, total space {}",
+ dir,
+ freeSpace,
+ totalSpace);
+ }
+ return ratio;
}
public static boolean hasSpace(String dir) {