This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 7abf3c5d2a8 branch-3.0: [fix](heartbeat) fix heartbeat editlog no 
persist hbTime (#42986)
7abf3c5d2a8 is described below

commit 7abf3c5d2a834cd6bf2710dc20470dbb885eb61d
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Nov 7 14:10:29 2024 +0800

    branch-3.0: [fix](heartbeat) fix heartbeat editlog no persist hbTime 
(#42986)
    
    PR Body: Backend persist lastUpdateMs, it will be modified by heartbeat
    editlog. But heartbeat editlog not persist hbTime, and hbTime always
    equal 0, it will make backend's lastUpdateMs = 0 in bdb image.
    
    fix details:
    1. heartbeat response persist hbTime;
    2. only be state change will write an editlog. but we make a change:
    even a backend is healthy, still write a healthy response editlog every
    5 min. Inorder to make backend's lastUpdateMs periodly updated in bdb
    image. But notice that this change wouldn't increase real editlog num.
    Because heartbeat mgr will patch all fe/be's heartbeat into one editlog.
    Even no fe/be state change, it still write an editlog which not contains
    any node's response.
    3. for a dead heartbeat response, set hbTime to last succ hbTime, then
    replayer can set correct lastUpdateMs;
     Cherry-picked from #42653
    
    Co-authored-by: yujun <[email protected]>
---
 .../src/main/java/org/apache/doris/common/Config.java      |  6 ++++++
 .../src/main/java/org/apache/doris/system/Backend.java     | 14 ++++++++++++++
 .../java/org/apache/doris/system/BackendHbResponse.java    | 10 ++--------
 .../main/java/org/apache/doris/system/HeartbeatMgr.java    |  4 ++--
 .../java/org/apache/doris/system/HeartbeatResponse.java    |  4 +++-
 5 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index af8ce5d7c2d..59f4b33aff4 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -2021,6 +2021,12 @@ public class Config extends ConfigBase {
     @ConfField(mutable = true, masterOnly = true)
     public static long max_backend_heartbeat_failure_tolerance_count = 1;
 
+    /**
+     * Even if a backend is healthy, still write a heartbeat editlog to update 
backend's lastUpdateMs of bdb image.
+     */
+    @ConfField(mutable = true, masterOnly = true)
+    public static int editlog_healthy_heartbeat_seconds = 300;
+
     /**
      * Abort transaction time after lost heartbeat.
      * The default value is 300s, which means transactions of be will be 
aborted after lost heartbeat 300s.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java 
b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
index da55ecee0de..974c0e0cae1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java
@@ -47,6 +47,7 @@ import org.apache.logging.log4j.Logger;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.security.SecureRandom;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
@@ -153,6 +154,8 @@ public class Backend implements Writable {
     // send some queries to this BE, it is not an important problem.
     private AtomicBoolean isShutDown = new AtomicBoolean(false);
 
+    private long nextForceEditlogHeartbeatTime = System.currentTimeMillis() + 
(new SecureRandom()).nextInt(60 * 1000);
+
     public Backend() {
         this.host = "";
         this.version = "";
@@ -876,7 +879,18 @@ public class Backend implements Writable {
 
             heartbeatErrMsg = "";
             this.heartbeatFailureCounter = 0;
+
+            // even if no change, write an editlog to make lastUpdateMs in 
image update
+            if (System.currentTimeMillis() >= 
this.nextForceEditlogHeartbeatTime) {
+                isChanged = true;
+                int delaySecond = Config.editlog_healthy_heartbeat_seconds + 
(new SecureRandom()).nextInt(60);
+                this.nextForceEditlogHeartbeatTime = 
System.currentTimeMillis() + delaySecond * 1000L;
+            }
         } else {
+            // for a bad BackendHbResponse, its hbTime is last succ hbTime, 
not this hbTime
+            if (hbResponse.getHbTime() > 0) {
+                this.lastUpdateMs = hbResponse.getHbTime();
+            }
             // Only set backend to dead if the heartbeat failure counter 
exceed threshold.
             // And if it is a replay process, must set backend to dead.
             if (isReplay || ++this.heartbeatFailureCounter >= 
Config.max_backend_heartbeat_failure_tolerance_count) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java 
b/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java
index a0311a9b737..479966d2ff3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java
@@ -98,18 +98,12 @@ public class BackendHbResponse extends HeartbeatResponse 
implements Writable {
         this.beMemory = beMemory;
     }
 
-    public BackendHbResponse(long beId, String errMsg) {
-        super(HeartbeatResponse.Type.BACKEND);
-        this.status = HbStatus.BAD;
-        this.beId = beId;
-        this.msg = errMsg;
-    }
-
-    public BackendHbResponse(long beId, String host, String errMsg) {
+    public BackendHbResponse(long beId, String host, long lastHbTime, String 
errMsg) {
         super(HeartbeatResponse.Type.BACKEND);
         this.status = HbStatus.BAD;
         this.beId = beId;
         this.host = host;
+        this.hbTime = lastHbTime;
         this.msg = errMsg;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java 
b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java
index 7fe5bf0d442..fb6853e83c3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java
@@ -316,13 +316,13 @@ public class HeartbeatMgr extends MasterDaemon {
                             System.currentTimeMillis(), beStartTime, version, 
nodeRole,
                             fragmentNum, lastFragmentUpdateTime, isShutDown, 
arrowFlightSqlPort, beMemory);
                 } else {
-                    return new BackendHbResponse(backendId, backend.getHost(),
+                    return new BackendHbResponse(backendId, backend.getHost(), 
backend.getLastUpdateMs(),
                             result.getStatus().getErrorMsgs().isEmpty()
                                     ? "Unknown error" : 
result.getStatus().getErrorMsgs().get(0));
                 }
             } catch (Exception e) {
                 LOG.warn("backend heartbeat got exception", e);
-                return new BackendHbResponse(backendId, backend.getHost(),
+                return new BackendHbResponse(backendId, backend.getHost(), 
backend.getLastUpdateMs(),
                         Strings.isNullOrEmpty(e.getMessage()) ? "got 
exception" : e.getMessage());
             } finally {
                 if (client != null) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatResponse.java 
b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatResponse.java
index 447ffad8189..3fffd121450 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatResponse.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatResponse.java
@@ -51,10 +51,12 @@ public class HeartbeatResponse implements Writable {
     protected boolean isTypeRead = false;
 
     /**
-     * msg and hbTime are no need to be synchronized to other Frontends,
+     * msg no need to be synchronized to other Frontends,
      * and only Master Frontend has these info
      */
     protected String msg;
+
+    @SerializedName(value = "hbTime")
     protected long hbTime;
 
     public HeartbeatResponse(Type type) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to