pan3793 commented on code in PR #6997:
URL: https://github.com/apache/kyuubi/pull/6997#discussion_r2010661031


##########
kyuubi-server/src/main/scala/org/apache/kyuubi/operation/BatchJobSubmission.scala:
##########
@@ -250,50 +252,58 @@ class BatchJobSubmission(
   private def submitAndMonitorBatchJob(): Unit = {
     var appStatusFirstUpdated = false
     var lastStarvationCheckTime = createTime
+
+    def doUpdateApplicationInfoMetadataIfNeeded(): Unit = {
+      updateApplicationInfoMetadataIfNeeded()
+      if (!appStatusFirstUpdated) {
+        // only the ApplicationInfo with non-empty id indicates that batch is 
RUNNING
+        if (applicationId(_applicationInfo).isDefined) {
+          setStateIfNotCanceled(OperationState.RUNNING)
+          updateBatchMetadata()
+          appStatusFirstUpdated = true
+        } else {
+          val currentTime = System.currentTimeMillis()
+          if (currentTime - lastStarvationCheckTime > 
applicationStarvationTimeout) {
+            lastStarvationCheckTime = currentTime
+            warn(s"Batch[$batchId] has not started, check the Kyuubi server to 
ensure" +
+              s" that batch jobs can be submitted.")
+          }
+        }
+      }
+    }
+
     try {
       info(s"Submitting $batchType batch[$batchId] job:\n$builder")
       val process = builder.start
-      while (!applicationFailed(_applicationInfo) && process.isAlive) {
-        updateApplicationInfoMetadataIfNeeded()
-        if (!appStatusFirstUpdated) {
-          // only the ApplicationInfo with non-empty id indicates that batch 
is RUNNING
-          if (applicationId(_applicationInfo).isDefined) {
-            setStateIfNotCanceled(OperationState.RUNNING)
-            updateBatchMetadata()
-            appStatusFirstUpdated = true
-          } else {
-            val currentTime = System.currentTimeMillis()
-            if (currentTime - lastStarvationCheckTime > 
applicationStarvationTimeout) {
-              lastStarvationCheckTime = currentTime
-              warn(s"Batch[$batchId] has not started, check the Kyuubi server 
to ensure" +
-                s" that batch jobs can be submitted.")
-            }
-          }
-        }
+      while (process.isAlive && !applicationFailed(_applicationInfo)) {
+        doUpdateApplicationInfoMetadataIfNeeded()
         process.waitFor(applicationCheckInterval, TimeUnit.MILLISECONDS)
       }
 
+      if (!process.isAlive) {
+        doUpdateApplicationInfoMetadataIfNeeded()

Review Comment:
   just for note, this is the key change -
   
   in the current round, app state is `NOT_FOUND` because the submit stage 
exceeds the `kyuubi.engine.yarn.submit.timeout`, while during the 
`process.waitFor` period, submit success, then `process.isAlive` returns false, 
thus there is no chance to retrieve the app state from the cluster manager



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@kyuubi.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@kyuubi.apache.org
For additional commands, e-mail: notifications-h...@kyuubi.apache.org

Reply via email to