This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e6b135c76a9 [improvement](fe) Add reason log when `Env` is not ready
(#28286)
e6b135c76a9 is described below
commit e6b135c76a9697484a0b6046b927864426d11bad
Author: Lei Zhang <[email protected]>
AuthorDate: Fri Dec 15 12:22:06 2023 +0800
[improvement](fe) Add reason log when `Env` is not ready (#28286)
---
.../main/java/org/apache/doris/common/Config.java | 4 +-
.../main/java/org/apache/doris/catalog/Env.java | 28 +++++++++-----
.../apache/doris/journal/bdbje/BDBEnvironment.java | 44 ++++++++++------------
.../apache/doris/journal/bdbje/BDBJEJournal.java | 15 ++++----
.../java/org/apache/doris/persist/EditLog.java | 17 +++++----
5 files changed, 58 insertions(+), 50 deletions(-)
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 516f89ec37c..c7b6c611a5a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -244,12 +244,12 @@ public class Config extends ConfigBase {
"The desired upper limit on the number of bytes of reserved space
to retain "
+ "in a replicated JE Environment. "
+ "This parameter is ignored in a non-replicated JE
Environment."})
- public static int bdbje_reserved_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G
+ public static long bdbje_reserved_disk_bytes = 1 * 1024 * 1024 * 1024; //
1G
@ConfField(description = {"BDBJE 所需的空闲磁盘空间大小。如果空闲磁盘空间小于这个值,则BDBJE将无法写入。",
"Amount of free disk space required by BDBJE. "
+ "If the free disk space is less than this value, BDBJE
will not be able to write."})
- public static int bdbje_free_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G
+ public static long bdbje_free_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G
@ConfField(masterOnly = true, description = {"心跳线程池的线程数",
"Num of thread to handle heartbeat events"})
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 3065e92b71f..a698b8cdcfc 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -997,9 +997,10 @@ public class Env {
}
Thread.sleep(100);
- if (counter++ % 20 == 0) {
- LOG.info("wait catalog to be ready. FE type: {}. is ready: {},
counter: {}", feType, isReady.get(),
- counter);
+ if (counter++ % 100 == 0) {
+ String reason = editLog == null ? "editlog is null" :
editLog.getNotReadyReason();
+ LOG.info("wait catalog to be ready. feType:{} isReady:{},
counter:{} reason: {}",
+ feType, isReady.get(), counter, reason);
}
}
}
@@ -2460,8 +2461,8 @@ public class Env {
if (currentTimeMs - synchronizedTimeMs >
Config.meta_delay_toleration_second * 1000) {
// we still need this log to observe this situation
// but service may be continued when there is no log being
replayed.
- LOG.warn("meta out of date. current time: {}, synchronized time:
{}, has log: {}, fe type: {}",
- currentTimeMs, synchronizedTimeMs, hasLog, feType);
+ LOG.warn("meta out of date. current time: {}, sync time: {},
delta: {} ms, hasLog: {}, feType: {}",
+ currentTimeMs, synchronizedTimeMs, (currentTimeMs -
synchronizedTimeMs), hasLog, feType);
if (hasLog || feType == FrontendNodeType.UNKNOWN) {
// 1. if we read log from BDB, which means master is still
alive.
// So we need to set meta out of date.
@@ -2471,6 +2472,13 @@ public class Env {
metaReplayState.setOutOfDate(currentTimeMs,
synchronizedTimeMs);
canRead.set(false);
isReady.set(false);
+
+ if (editLog != null) {
+ String reason = editLog.getNotReadyReason();
+ if (!Strings.isNullOrEmpty(reason)) {
+ LOG.warn("Not ready reason:{}", reason);
+ }
+ }
}
// sleep 5s to avoid numerous 'meta out of date' log
@@ -5891,10 +5899,10 @@ public class Env {
sb.append(frontend.toString()).append("\n");
}
- long diskUsagePercent = editLog.getEnvDiskUsagePercent();
- sb.append("Disk usage: ")
- .append(diskUsagePercent != -1 ?
String.valueOf(diskUsagePercent) : "<unknown>")
- .append("%\n");
+ String reason = editLog.getNotReadyReason();
+ if (!Strings.isNullOrEmpty(reason)) {
+ sb.append("Reason: ").append(reason).append("%\n");
+ }
if (haProtocol instanceof BDBHA) {
try {
@@ -5915,7 +5923,7 @@ public class Env {
}
} catch (Exception e) {
- // pass
+ LOG.warn("checkReadyOrThrow:", e);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
index 27206443c08..1f604f96a4e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
@@ -40,6 +40,7 @@ import com.sleepycat.je.rep.NetworkRestore;
import com.sleepycat.je.rep.NetworkRestoreConfig;
import com.sleepycat.je.rep.NoConsistencyRequiredPolicy;
import com.sleepycat.je.rep.NodeType;
+import com.sleepycat.je.rep.RepInternal;
import com.sleepycat.je.rep.ReplicatedEnvironment;
import com.sleepycat.je.rep.ReplicationConfig;
import com.sleepycat.je.rep.RollbackException;
@@ -51,10 +52,7 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
-import java.io.IOException;
import java.net.InetSocketAddress;
-import java.nio.file.FileStore;
-import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -76,7 +74,6 @@ public class BDBEnvironment {
"INFO", "CONFIG", "FINE", "FINER", "FINEST", "ALL");
public static final String PALO_JOURNAL_GROUP = "PALO_JOURNAL_GROUP";
- private File envHome;
private ReplicatedEnvironment replicatedEnvironment;
private EnvironmentConfig environmentConfig;
private ReplicationConfig replicationConfig;
@@ -98,7 +95,6 @@ public class BDBEnvironment {
// The setup() method opens the environment and database
public void setup(File envHome, String selfNodeName, String
selfNodeHostPort,
String helperHostPort) {
- this.envHome = envHome;
// Almost never used, just in case the master can not restart
if (metadataFailureRecovery) {
if (!isElectable) {
@@ -443,25 +439,6 @@ public class BDBEnvironment {
}
}
- // Get the disk usage of BDB Environment in percent. -1 is returned if any
error occuried.
- public long getEnvDiskUsagePercent() {
- if (envHome == null) {
- return -1;
- }
-
- try {
- FileStore fileStore = Files.getFileStore(envHome.toPath());
- long totalSpace = fileStore.getTotalSpace();
- long usableSpace = fileStore.getUsableSpace();
- if (totalSpace <= 0) {
- return -1;
- }
- return 100 - (usableSpace * 100) / totalSpace;
- } catch (IOException e) {
- return -1;
- }
- }
-
private static SyncPolicy getSyncPolicy(String policy) {
if (policy.equalsIgnoreCase("SYNC")) {
return Durability.SyncPolicy.SYNC;
@@ -484,4 +461,23 @@ public class BDBEnvironment {
return Durability.ReplicaAckPolicy.SIMPLE_MAJORITY;
}
+ public String getNotReadyReason() {
+ if (replicatedEnvironment == null) {
+ LOG.warn("replicatedEnvironment is null");
+ return "replicatedEnvironment is null";
+ }
+ try {
+ if (replicatedEnvironment.getInvalidatingException() != null) {
+ return
replicatedEnvironment.getInvalidatingException().getMessage();
+ }
+
+ if
(RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation()
!= null) {
+ return
RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation();
+ }
+ } catch (Exception e) {
+ LOG.warn("getNotReadyReason exception:", e);
+ }
+ return "";
+ }
+
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index ebdbadae192..134b609549e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -566,13 +566,6 @@ public class BDBJEJournal implements Journal { //
CHECKSTYLE IGNORE THIS LINE: B
return this.bdbEnvironment;
}
- public long getEnvDiskUsagePercent() {
- if (bdbEnvironment == null) {
- return -1;
- }
- return bdbEnvironment.getEnvDiskUsagePercent();
- }
-
public String getBDBStats() {
if (bdbEnvironment == null) {
return "";
@@ -585,4 +578,12 @@ public class BDBJEJournal implements Journal { //
CHECKSTYLE IGNORE THIS LINE: B
return repEnv.getRepStats(StatsConfig.DEFAULT).toString();
}
+
+ public String getNotReadyReason() {
+ if (bdbEnvironment == null) {
+ LOG.warn("replicatedEnvironment is null");
+ return "replicatedEnvironment is null";
+ }
+ return bdbEnvironment.getNotReadyReason();
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
index e2c5630ce77..be7dc75f384 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
@@ -151,13 +151,6 @@ public class EditLog {
return journal == null ? 0 : 1;
}
- public long getEnvDiskUsagePercent() {
- if (journal instanceof BDBJEJournal) {
- return ((BDBJEJournal) journal).getEnvDiskUsagePercent();
- }
- return -1;
- }
-
/**
* Load journal.
**/
@@ -1955,4 +1948,14 @@ public class EditLog {
public void logAlterMTMV(AlterMTMV log) {
logEdit(OperationType.OP_ALTER_MTMV, log);
}
+
+ public String getNotReadyReason() {
+ if (journal == null) {
+ return "journal is null";
+ }
+ if (journal instanceof BDBJEJournal) {
+ return ((BDBJEJournal) journal).getNotReadyReason();
+ }
+ return "";
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]