This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e6b135c76a9 [improvement](fe) Add reason log when `Env` is not ready 
(#28286)
e6b135c76a9 is described below

commit e6b135c76a9697484a0b6046b927864426d11bad
Author: Lei Zhang <[email protected]>
AuthorDate: Fri Dec 15 12:22:06 2023 +0800

    [improvement](fe) Add reason log when `Env` is not ready (#28286)
---
 .../main/java/org/apache/doris/common/Config.java  |  4 +-
 .../main/java/org/apache/doris/catalog/Env.java    | 28 +++++++++-----
 .../apache/doris/journal/bdbje/BDBEnvironment.java | 44 ++++++++++------------
 .../apache/doris/journal/bdbje/BDBJEJournal.java   | 15 ++++----
 .../java/org/apache/doris/persist/EditLog.java     | 17 +++++----
 5 files changed, 58 insertions(+), 50 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 516f89ec37c..c7b6c611a5a 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -244,12 +244,12 @@ public class Config extends ConfigBase {
             "The desired upper limit on the number of bytes of reserved space 
to retain "
                     + "in a replicated JE Environment. "
                     + "This parameter is ignored in a non-replicated JE 
Environment."})
-    public static int bdbje_reserved_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G
+    public static long bdbje_reserved_disk_bytes = 1 * 1024 * 1024 * 1024; // 
1G
 
     @ConfField(description = {"BDBJE 所需的空闲磁盘空间大小。如果空闲磁盘空间小于这个值,则BDBJE将无法写入。",
             "Amount of free disk space required by BDBJE. "
                     + "If the free disk space is less than this value, BDBJE 
will not be able to write."})
-    public static int bdbje_free_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G
+    public static long bdbje_free_disk_bytes = 1 * 1024 * 1024 * 1024; // 1G
 
     @ConfField(masterOnly = true, description = {"心跳线程池的线程数",
             "Num of thread to handle heartbeat events"})
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
index 3065e92b71f..a698b8cdcfc 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java
@@ -997,9 +997,10 @@ public class Env {
             }
 
             Thread.sleep(100);
-            if (counter++ % 20 == 0) {
-                LOG.info("wait catalog to be ready. FE type: {}. is ready: {}, 
counter: {}", feType, isReady.get(),
-                        counter);
+            if (counter++ % 100 == 0) {
+                String reason = editLog == null ? "editlog is null" : 
editLog.getNotReadyReason();
+                LOG.info("wait catalog to be ready. feType:{} isReady:{}, 
counter:{} reason: {}",
+                        feType, isReady.get(), counter, reason);
             }
         }
     }
@@ -2460,8 +2461,8 @@ public class Env {
         if (currentTimeMs - synchronizedTimeMs > 
Config.meta_delay_toleration_second * 1000) {
             // we still need this log to observe this situation
             // but service may be continued when there is no log being 
replayed.
-            LOG.warn("meta out of date. current time: {}, synchronized time: 
{}, has log: {}, fe type: {}",
-                    currentTimeMs, synchronizedTimeMs, hasLog, feType);
+            LOG.warn("meta out of date. current time: {}, sync time: {}, 
delta: {} ms, hasLog: {}, feType: {}",
+                    currentTimeMs, synchronizedTimeMs, (currentTimeMs - 
synchronizedTimeMs), hasLog, feType);
             if (hasLog || feType == FrontendNodeType.UNKNOWN) {
                 // 1. if we read log from BDB, which means master is still 
alive.
                 // So we need to set meta out of date.
@@ -2471,6 +2472,13 @@ public class Env {
                 metaReplayState.setOutOfDate(currentTimeMs, 
synchronizedTimeMs);
                 canRead.set(false);
                 isReady.set(false);
+
+                if (editLog != null) {
+                    String reason = editLog.getNotReadyReason();
+                    if (!Strings.isNullOrEmpty(reason)) {
+                        LOG.warn("Not ready reason:{}", reason);
+                    }
+                }
             }
 
             // sleep 5s to avoid numerous 'meta out of date' log
@@ -5891,10 +5899,10 @@ public class Env {
             sb.append(frontend.toString()).append("\n");
         }
 
-        long diskUsagePercent = editLog.getEnvDiskUsagePercent();
-        sb.append("Disk usage: ")
-                .append(diskUsagePercent != -1 ? 
String.valueOf(diskUsagePercent) : "<unknown>")
-                .append("%\n");
+        String reason = editLog.getNotReadyReason();
+        if (!Strings.isNullOrEmpty(reason)) {
+            sb.append("Reason: ").append(reason).append("%\n");
+        }
 
         if (haProtocol instanceof BDBHA) {
             try {
@@ -5915,7 +5923,7 @@ public class Env {
                 }
 
             } catch (Exception e) {
-                // pass
+                LOG.warn("checkReadyOrThrow:", e);
             }
         }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java 
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
index 27206443c08..1f604f96a4e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java
@@ -40,6 +40,7 @@ import com.sleepycat.je.rep.NetworkRestore;
 import com.sleepycat.je.rep.NetworkRestoreConfig;
 import com.sleepycat.je.rep.NoConsistencyRequiredPolicy;
 import com.sleepycat.je.rep.NodeType;
+import com.sleepycat.je.rep.RepInternal;
 import com.sleepycat.je.rep.ReplicatedEnvironment;
 import com.sleepycat.je.rep.ReplicationConfig;
 import com.sleepycat.je.rep.RollbackException;
@@ -51,10 +52,7 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.io.File;
-import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.nio.file.FileStore;
-import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -76,7 +74,6 @@ public class BDBEnvironment {
             "INFO", "CONFIG", "FINE", "FINER", "FINEST", "ALL");
     public static final String PALO_JOURNAL_GROUP = "PALO_JOURNAL_GROUP";
 
-    private File envHome;
     private ReplicatedEnvironment replicatedEnvironment;
     private EnvironmentConfig environmentConfig;
     private ReplicationConfig replicationConfig;
@@ -98,7 +95,6 @@ public class BDBEnvironment {
     // The setup() method opens the environment and database
     public void setup(File envHome, String selfNodeName, String 
selfNodeHostPort,
                       String helperHostPort) {
-        this.envHome = envHome;
         // Almost never used, just in case the master can not restart
         if (metadataFailureRecovery) {
             if (!isElectable) {
@@ -443,25 +439,6 @@ public class BDBEnvironment {
         }
     }
 
-    // Get the disk usage of BDB Environment in percent. -1 is returned if any 
error occuried.
-    public long getEnvDiskUsagePercent() {
-        if (envHome == null) {
-            return -1;
-        }
-
-        try {
-            FileStore fileStore = Files.getFileStore(envHome.toPath());
-            long totalSpace = fileStore.getTotalSpace();
-            long usableSpace = fileStore.getUsableSpace();
-            if (totalSpace <= 0) {
-                return -1;
-            }
-            return 100 - (usableSpace * 100) / totalSpace;
-        } catch (IOException e) {
-            return -1;
-        }
-    }
-
     private static SyncPolicy getSyncPolicy(String policy) {
         if (policy.equalsIgnoreCase("SYNC")) {
             return Durability.SyncPolicy.SYNC;
@@ -484,4 +461,23 @@ public class BDBEnvironment {
         return Durability.ReplicaAckPolicy.SIMPLE_MAJORITY;
     }
 
+    public String getNotReadyReason() {
+        if (replicatedEnvironment == null) {
+            LOG.warn("replicatedEnvironment is null");
+            return "replicatedEnvironment is null";
+        }
+        try {
+            if (replicatedEnvironment.getInvalidatingException() != null) {
+                return 
replicatedEnvironment.getInvalidatingException().getMessage();
+            }
+
+            if 
(RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation() 
!= null) {
+                return 
RepInternal.getNonNullRepImpl(replicatedEnvironment).getDiskLimitViolation();
+            }
+        } catch (Exception e) {
+            LOG.warn("getNotReadyReason exception:", e);
+        }
+        return "";
+    }
+
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java 
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index ebdbadae192..134b609549e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -566,13 +566,6 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
         return this.bdbEnvironment;
     }
 
-    public long getEnvDiskUsagePercent() {
-        if (bdbEnvironment == null) {
-            return -1;
-        }
-        return bdbEnvironment.getEnvDiskUsagePercent();
-    }
-
     public String getBDBStats() {
         if (bdbEnvironment == null) {
             return "";
@@ -585,4 +578,12 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
 
         return repEnv.getRepStats(StatsConfig.DEFAULT).toString();
     }
+
+    public String getNotReadyReason() {
+        if (bdbEnvironment == null) {
+            LOG.warn("replicatedEnvironment is null");
+            return "replicatedEnvironment is null";
+        }
+        return bdbEnvironment.getNotReadyReason();
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java 
b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
index e2c5630ce77..be7dc75f384 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
@@ -151,13 +151,6 @@ public class EditLog {
         return journal == null ? 0 : 1;
     }
 
-    public long getEnvDiskUsagePercent() {
-        if (journal instanceof BDBJEJournal) {
-            return ((BDBJEJournal) journal).getEnvDiskUsagePercent();
-        }
-        return -1;
-    }
-
     /**
      * Load journal.
      **/
@@ -1955,4 +1948,14 @@ public class EditLog {
     public void logAlterMTMV(AlterMTMV log) {
         logEdit(OperationType.OP_ALTER_MTMV, log);
     }
+
+    public String getNotReadyReason() {
+        if (journal == null) {
+            return "journal is null";
+        }
+        if (journal instanceof BDBJEJournal) {
+            return ((BDBJEJournal) journal).getNotReadyReason();
+        }
+        return "";
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to