gyfora commented on code in PR #165:
URL:
https://github.com/apache/flink-kubernetes-operator/pull/165#discussion_r850308026
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/reconciler/deployment/ApplicationReconciler.java:
##########
@@ -102,36 +111,84 @@ public void reconcile(FlinkDeployment flinkApp, Context
context, Configuration e
}
if (currentJobState == JobState.SUSPENDED && desiredJobState ==
JobState.RUNNING) {
if (upgradeMode == UpgradeMode.STATELESS) {
- deployFlinkJob(flinkApp, effectiveConfig,
Optional.empty());
- } else if (upgradeMode == UpgradeMode.LAST_STATE
- || upgradeMode == UpgradeMode.SAVEPOINT) {
- restoreFromLastSavepoint(flinkApp, effectiveConfig);
+ deployFlinkJob(currentJobSpec, status, effectiveConfig,
Optional.empty());
+ } else {
+ restoreFromLastSavepoint(currentJobSpec, status,
effectiveConfig);
}
stateAfterReconcile = JobState.RUNNING;
}
- IngressUtils.updateIngressRules(flinkApp, effectiveConfig,
kubernetesClient);
+ IngressUtils.updateIngressRules(
+ deployMeta, currentDeploySpec, effectiveConfig,
kubernetesClient);
ReconciliationUtils.updateForSpecReconciliationSuccess(flinkApp,
stateAfterReconcile);
- } else if (SavepointUtils.shouldTriggerSavepoint(flinkApp) &&
isJobRunning(flinkApp)) {
+ } else if (ReconciliationUtils.shouldRollBack(reconciliationStatus,
effectiveConfig)) {
+ rollbackApplication(flinkApp);
+ } else if (SavepointUtils.shouldTriggerSavepoint(currentJobSpec,
status)
+ && isJobRunning(status)) {
triggerSavepoint(flinkApp, effectiveConfig);
ReconciliationUtils.updateSavepointReconciliationSuccess(flinkApp);
+ } else {
+ LOG.info("Deployment is fully reconciled, nothing to do.");
}
}
+ private void rollbackApplication(FlinkDeployment flinkApp) throws
Exception {
+ ReconciliationStatus reconciliationStatus =
flinkApp.getStatus().getReconciliationStatus();
+
+ if (reconciliationStatus.getState() !=
ReconciliationStatus.State.ROLLING_BACK) {
+ LOG.warn("Preparing to roll back to last stable spec.");
+ if (flinkApp.getStatus().getError() == null) {
+ flinkApp.getStatus()
+ .setError(
+ "Deployment is not ready within the configured
timeout, rolling-back.");
+ }
+
reconciliationStatus.setState(ReconciliationStatus.State.ROLLING_BACK);
+ return;
+ }
+
+ LOG.warn("Executing roll-back operation");
+
+ FlinkDeploymentSpec rollbackSpec =
reconciliationStatus.deserializeLastStableSpec();
+ Configuration rollbackConfig =
+ FlinkUtils.getEffectiveConfig(flinkApp.getMetadata(),
rollbackSpec, defaultConfig);
+
+ UpgradeMode upgradeMode = flinkApp.getSpec().getJob().getUpgradeMode();
+
+ suspendJob(
Review Comment:
Of course I am happy to iterate over the design and change anything, just
wanted to explain the current design in detail.
With the current flow, it is always clear what spec the user sent in, what
was deployed and what has been rolled back.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]