This is an automated email from the ASF dual-hosted git repository.

alexkun pushed a commit to branch dev-1.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-linkis.git

commit 4b1cb6eb11e0aa1bc311d7931c1d32595752201c
Author: peacewong <[email protected]>
AuthorDate: Fri Jul 22 19:36:31 2022 +0800

    Optimized engine startup to support more Retry scenarios close #2505
---
 .../server/service/impl/ProcessEngineConnLaunchService.scala   |  4 ++++
 .../manager/am/service/engine/DefaultEngineCreateService.scala | 10 ++++++++++
 .../manager/rm/service/impl/DefaultResourceManager.scala       |  5 +++--
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git 
a/linkis-computation-governance/linkis-engineconn-manager/linkis-engineconn-manager-server/src/main/scala/org/apache/linkis/ecm/server/service/impl/ProcessEngineConnLaunchService.scala
 
b/linkis-computation-governance/linkis-engineconn-manager/linkis-engineconn-manager-server/src/main/scala/org/apache/linkis/ecm/server/service/impl/ProcessEngineConnLaunchService.scala
index ca0db8b8f..1f004f777 100644
--- 
a/linkis-computation-governance/linkis-engineconn-manager/linkis-engineconn-manager-server/src/main/scala/org/apache/linkis/ecm/server/service/impl/ProcessEngineConnLaunchService.scala
+++ 
b/linkis-computation-governance/linkis-engineconn-manager/linkis-engineconn-manager-server/src/main/scala/org/apache/linkis/ecm/server/service/impl/ProcessEngineConnLaunchService.scala
@@ -36,6 +36,7 @@ import 
org.apache.linkis.manager.engineplugin.common.launch.entity.EngineConnLau
 import org.apache.linkis.rpc.Sender
 import org.apache.commons.io.IOUtils
 import org.apache.commons.lang3.exception.ExceptionUtils
+import org.apache.commons.lang3.StringUtils
 import org.apache.linkis.ecm.server.service.LocalDirsHandleService
 import org.apache.linkis.manager.label.utils.LabelUtil
 
@@ -65,6 +66,9 @@ abstract class ProcessEngineConnLaunchService extends 
AbstractEngineConnLaunchSe
             case ecmError: ECMErrorException =>
               if (ECMErrorCode.EC_START_TIME_OUT == ecmError.getErrCode) {
                 true
+              } else if (StringUtils.isBlank(ecmError.getDesc)) {
+                logger.info("exception desc is null, can be retry")
+                true
               } else {
                 false
               }
diff --git 
a/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/am/service/engine/DefaultEngineCreateService.scala
 
b/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/am/service/engine/DefaultEngineCreateService.scala
index 4dcc236be..733cfb1e5 100644
--- 
a/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/am/service/engine/DefaultEngineCreateService.scala
+++ 
b/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/am/service/engine/DefaultEngineCreateService.scala
@@ -215,6 +215,16 @@ class DefaultEngineCreateService extends 
AbstractEngineService with EngineCreate
 
     Utils.tryCatch(getEngineNodeManager.updateEngineNode(oldServiceInstance, 
engineNode)) { t =>
       logger.warn(s"Failed to update engineNode $engineNode", t)
+      val stopEngineRequest = new 
EngineStopRequest(engineNode.getServiceInstance, ManagerUtils.getAdminUser)
+      engineStopService.asyncStopEngine(stopEngineRequest)
+      val failedEcNode = getEngineNodeManager.getEngineNode(oldServiceInstance)
+      if (null == failedEcNode) {
+        logger.info(s" engineConn is not exists in db: $oldServiceInstance ")
+      } else {
+        
failedEcNode.setLabels(nodeLabelService.getNodeLabels(oldServiceInstance))
+        
failedEcNode.getLabels.addAll(LabelUtils.distinctLabel(labelFilter.choseEngineLabel(labelList),
 emNode.getLabels))
+        engineStopService.engineConnInfoClear(failedEcNode)
+      }
       throw new LinkisRetryException(AMConstant.EM_ERROR_CODE, s"Failed to 
update engineNode: ${t.getMessage}")
     }
 
diff --git 
a/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/rm/service/impl/DefaultResourceManager.scala
 
b/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/rm/service/impl/DefaultResourceManager.scala
index 5b675f508..b98a70446 100644
--- 
a/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/rm/service/impl/DefaultResourceManager.scala
+++ 
b/linkis-computation-governance/linkis-manager/linkis-application-manager/src/main/scala/org/apache/linkis/manager/rm/service/impl/DefaultResourceManager.scala
@@ -19,6 +19,7 @@ package org.apache.linkis.manager.rm.service.impl
 
 import com.google.common.collect.Lists
 import org.apache.linkis.common.ServiceInstance
+import org.apache.linkis.common.exception.LinkisRetryException
 import org.apache.linkis.common.utils.{Logging, Utils}
 import org.apache.linkis.governance.common.conf.GovernanceCommonConf
 import org.apache.linkis.manager.common.conf.RMConfiguration
@@ -587,10 +588,10 @@ class DefaultResourceManager extends ResourceManager with 
Logging with Initializ
   private def tryLock(labelContainer: RMLabelContainer, timeOut: Long = -1): 
Unit = {
     labelContainer.getResourceLabels.foreach {
       case label: Label[_] =>
-        labelContainer.setCurrentLabel(label.asInstanceOf[Label[_]])
+        labelContainer.setCurrentLabel(label)
         val locked = resourceLockService.tryLock(labelContainer, timeOut)
         if (!locked) {
-          throw new RMWarnException(110022, s"try to lock resource label 
${labelContainer.getCurrentLabel} over $timeOut ms, please wait a moment and 
try again!")
+          throw new LinkisRetryException(110022, s"try to lock resource label 
${labelContainer.getCurrentLabel} over $timeOut ms, please wait a moment and 
try again!")
         }
       case _ =>
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to