gyfora commented on code in PR #407:
URL: 
https://github.com/apache/flink-kubernetes-operator/pull/407#discussion_r1003023775


##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/reconciler/deployment/AbstractJobReconciler.java:
##########
@@ -238,8 +240,34 @@ protected void rollback(CR resource, Context<?> ctx, 
Configuration observeConfig
     @Override
     public boolean reconcileOtherChanges(
             CR resource, Context<?> context, Configuration observeConfig) 
throws Exception {
-        return SavepointUtils.triggerSavepointIfNeeded(
-                getFlinkService(resource, context), resource, observeConfig);
+        var jobStatus =
+                org.apache.flink.api.common.JobStatus.valueOf(
+                        resource.getStatus().getJobStatus().getState());
+        if (jobStatus == org.apache.flink.api.common.JobStatus.FAILED
+                && observeConfig.getBoolean(OPERATOR_JOB_RESTART_FAILED)) {
+            LOG.info("Stopping failed Flink Cluster deployment...");
+            cancelJob(resource, context, UpgradeMode.LAST_STATE, 
observeConfig);
+            resource.getStatus().setError("");
+            resubmitJmDeployment(resource, context, observeConfig, false);
+            return true;
+        } else {
+            return SavepointUtils.triggerSavepointIfNeeded(
+                    getFlinkService(resource, context), resource, 
observeConfig);
+        }
+    }
+
+    protected void resubmitJmDeployment(
+            CR deployment, Context<?> ctx, Configuration observeConfig, 
boolean requireHaMetadata)
+            throws Exception {
+        LOG.info("Resubmitting Flink Cluster deployment...");
+        SPEC specToRecover = ReconciliationUtils.getDeployedSpec(deployment);
+        restoreJob(
+                deployment,
+                specToRecover,
+                deployment.getStatus(),
+                ctx,
+                observeConfig,
+                requireHaMetadata);

Review Comment:
   Or just rename it to `resubmintJob` and modify the logging accordingly



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to