This is an automated email from the ASF dual-hosted git repository.
kerwin pushed a commit to branch 3.1.3-prepare
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler.git
The following commit(s) were added to refs/heads/3.1.3-prepare by this push:
new d27d27f7d7 [fix][worker][bug] master/worker crash when registry
recover from SUSPENDED to RECONNECTED (#13328)
d27d27f7d7 is described below
commit d27d27f7d76b9617a5b70f6a6b705b147e9ea92b
Author: hokie-chan <[email protected]>
AuthorDate: Tue Jan 3 19:24:11 2023 +0800
[fix][worker][bug] master/worker crash when registry recover from SUSPENDED
to RECONNECTED (#13328)
---
.../master/registry/MasterWaitingStrategy.java | 28 ++++++++++++----------
.../worker/registry/WorkerWaitingStrategy.java | 28 ++++++++++++----------
2 files changed, 32 insertions(+), 24 deletions(-)
diff --git
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterWaitingStrategy.java
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterWaitingStrategy.java
index 1079b655f5..193c81e072 100644
---
a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterWaitingStrategy.java
+++
b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterWaitingStrategy.java
@@ -92,18 +92,22 @@ public class MasterWaitingStrategy implements
MasterConnectStrategy {
@Override
public void reconnect() {
- try {
- ServerLifeCycleManager.recoverFromWaiting();
- reStartMasterResource();
- // reopen the resource
- logger.info("Recover from waiting success, the current server
status is {}",
- ServerLifeCycleManager.getServerStatus());
- } catch (Exception e) {
- String errorMessage =
- String.format("Recover from waiting failed, the current
server status is %s, will stop the server",
- ServerLifeCycleManager.getServerStatus());
- logger.error(errorMessage, e);
- registryClient.getStoppable().stop(errorMessage);
+ if (ServerLifeCycleManager.isRunning()) {
+ logger.info("no need to reconnect, as the current server status is
running");
+ } else {
+ try {
+ ServerLifeCycleManager.recoverFromWaiting();
+ reStartMasterResource();
+ logger.info("Recover from waiting success, the current server
status is {}",
+ ServerLifeCycleManager.getServerStatus());
+ } catch (Exception e) {
+ String errorMessage =
+ String.format(
+ "Recover from waiting failed, the current
server status is %s, will stop the server",
+ ServerLifeCycleManager.getServerStatus());
+ logger.error(errorMessage, e);
+ registryClient.getStoppable().stop(errorMessage);
+ }
}
}
diff --git
a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerWaitingStrategy.java
b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerWaitingStrategy.java
index 203e5711f1..7188bdeed8 100644
---
a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerWaitingStrategy.java
+++
b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/registry/WorkerWaitingStrategy.java
@@ -92,19 +92,23 @@ public class WorkerWaitingStrategy implements
WorkerConnectStrategy {
@Override
public void reconnect() {
- try {
- ServerLifeCycleManager.recoverFromWaiting();
- reStartWorkerResource();
- logger.info("Recover from waiting success, the current server
status is {}",
- ServerLifeCycleManager.getServerStatus());
- } catch (Exception e) {
- String errorMessage =
- String.format("Recover from waiting failed, the current
server status is %s, will stop the server",
- ServerLifeCycleManager.getServerStatus());
- logger.error(errorMessage, e);
- registryClient.getStoppable().stop(errorMessage);
+ if (ServerLifeCycleManager.isRunning()) {
+ logger.info("no need to reconnect, as the current server status is
running");
+ } else {
+ try {
+ ServerLifeCycleManager.recoverFromWaiting();
+ reStartWorkerResource();
+ logger.info("Recover from waiting success, the current server
status is {}",
+ ServerLifeCycleManager.getServerStatus());
+ } catch (Exception e) {
+ String errorMessage =
+ String.format(
+ "Recover from waiting failed, the current
server status is %s, will stop the server",
+ ServerLifeCycleManager.getServerStatus());
+ logger.error(errorMessage, e);
+ registryClient.getStoppable().stop(errorMessage);
+ }
}
-
}
@Override