ferenc-csaky commented on code in PR #821:
URL:
https://github.com/apache/flink-kubernetes-operator/pull/821#discussion_r1689526438
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/service/FlinkService.java:
##########
@@ -83,20 +82,20 @@ void deleteClusterDeployment(
Configuration conf,
boolean deleteHaData);
- void cancelSessionJob(FlinkSessionJob sessionJob, UpgradeMode upgradeMode,
Configuration conf)
+ Optional<String> cancelSessionJob(
+ FlinkSessionJob sessionJob, UpgradeMode upgradeMode, Configuration
conf)
throws Exception;
- void triggerSavepoint(
+ String triggerSavepoint(
String jobId,
- SnapshotTriggerType triggerType,
- SavepointInfo savepointInfo,
+ org.apache.flink.core.execution.SavepointFormatType
savepointFormatType,
+ String savepointDirectory,
Configuration conf)
throws Exception;
- void triggerCheckpoint(
+ String triggerCheckpoint(
String jobId,
- SnapshotTriggerType triggerType,
- CheckpointInfo checkpointInfo,
+ org.apache.flink.core.execution.CheckpointType
checkpointFormatType,
Review Comment:
nit: checkpointFormatType -> checkpointType
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/controller/FlinkStateSnapshotContext.java:
##########
@@ -44,61 +45,56 @@ public class FlinkStateSnapshotContext {
private final Context<FlinkStateSnapshot> josdkContext;
private final FlinkConfigManager configManager;
- private FlinkOperatorConfiguration operatorConfig;
- private Configuration referencedJobObserveConfig;
- private FlinkDeployment referencedJobFlinkDeployment;
+ @Getter(lazy = true)
+ private final FlinkOperatorConfiguration operatorConfig = operatorConfig();
+
+ @Getter(lazy = true)
+ private final Configuration referencedJobObserveConfig =
referencedJobObserveConfig();
+
+ @Getter(lazy = true)
+ private final FlinkDeployment referencedJobFlinkDeployment =
referencedJobFlinkDeployment();
/**
* @return Operator configuration for this resource.
*/
- public FlinkOperatorConfiguration getOperatorConfig() {
- if (operatorConfig != null) {
- return operatorConfig;
- }
- return operatorConfig =
- configManager.getOperatorConfiguration(
- getResource().getMetadata().getNamespace(), null);
+ public FlinkOperatorConfiguration operatorConfig() {
+ return configManager.getOperatorConfiguration(
+ getResource().getMetadata().getNamespace(), null);
+ }
+
+ public Configuration referencedJobObserveConfig() {
+ return
configManager.getObserveConfig(getReferencedJobFlinkDeployment());
+ }
+
+ public FlinkDeployment referencedJobFlinkDeployment() {
Review Comment:
These lazy init methods can be private.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/config/FlinkConfigManager.java:
##########
@@ -72,21 +74,26 @@ public class FlinkConfigManager {
private volatile Configuration defaultConfig;
private volatile FlinkOperatorConfiguration defaultOperatorConfiguration;
+ private final boolean snapshotCrdInstalled;
Review Comment:
As we discussed offline, the whole snapshot CRD install check is planned for
1 release to give more meaningful errors for users, so I think it would make
sense to mark this with a TODO to remove it in the 1.11 release.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/FlinkStateSnapshotUtils.java:
##########
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.kubernetes.operator.utils;
+
+import org.apache.flink.autoscaler.utils.DateTimeUtils;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.kubernetes.operator.api.AbstractFlinkResource;
+import org.apache.flink.kubernetes.operator.api.CrdConstants;
+import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
+import org.apache.flink.kubernetes.operator.api.spec.CheckpointSpec;
+import
org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotReference;
+import org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotSpec;
+import org.apache.flink.kubernetes.operator.api.spec.JobReference;
+import org.apache.flink.kubernetes.operator.api.spec.SavepointSpec;
+import org.apache.flink.kubernetes.operator.api.status.CheckpointType;
+import org.apache.flink.kubernetes.operator.api.status.SavepointFormatType;
+import org.apache.flink.kubernetes.operator.api.status.SnapshotTriggerType;
+import org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration;
+import
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions;
+import org.apache.flink.kubernetes.operator.reconciler.ReconciliationUtils;
+import org.apache.flink.kubernetes.operator.reconciler.SnapshotType;
+
+import io.fabric8.kubernetes.api.model.ObjectMeta;
+import io.fabric8.kubernetes.client.KubernetesClient;
+import org.apache.commons.lang3.StringUtils;
+
+import javax.annotation.Nullable;
+
+import java.time.Instant;
+import java.util.UUID;
+
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.ABANDONED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.COMPLETED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.IN_PROGRESS;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.TRIGGER_PENDING;
+import static
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions.SNAPSHOT_RESOURCE_ENABLED;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.CHECKPOINT;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.SAVEPOINT;
+
+/** Utilities class for FlinkStateSnapshot resources. */
+public class FlinkStateSnapshotUtils {
+
+ /**
+ * From a snapshot reference, return its snapshot path. If a {@link
FlinkStateSnapshot} is
+ * referenced, it will be retrieved from Kubernetes.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param snapshotRef snapshot reference
+ * @return found savepoint path
+ */
+ public static String getAndValidateFlinkStateSnapshotPath(
Review Comment:
nit: `getValidatedFlinkStateSnapshotPath` is closer to what actually happens
in this function.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/FlinkStateSnapshotUtils.java:
##########
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.kubernetes.operator.utils;
+
+import org.apache.flink.autoscaler.utils.DateTimeUtils;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.kubernetes.operator.api.AbstractFlinkResource;
+import org.apache.flink.kubernetes.operator.api.CrdConstants;
+import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
+import org.apache.flink.kubernetes.operator.api.spec.CheckpointSpec;
+import
org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotReference;
+import org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotSpec;
+import org.apache.flink.kubernetes.operator.api.spec.JobReference;
+import org.apache.flink.kubernetes.operator.api.spec.SavepointSpec;
+import org.apache.flink.kubernetes.operator.api.status.CheckpointType;
+import org.apache.flink.kubernetes.operator.api.status.SavepointFormatType;
+import org.apache.flink.kubernetes.operator.api.status.SnapshotTriggerType;
+import org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration;
+import
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions;
+import org.apache.flink.kubernetes.operator.reconciler.ReconciliationUtils;
+import org.apache.flink.kubernetes.operator.reconciler.SnapshotType;
+
+import io.fabric8.kubernetes.api.model.ObjectMeta;
+import io.fabric8.kubernetes.client.KubernetesClient;
+import org.apache.commons.lang3.StringUtils;
+
+import javax.annotation.Nullable;
+
+import java.time.Instant;
+import java.util.UUID;
+
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.ABANDONED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.COMPLETED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.IN_PROGRESS;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.TRIGGER_PENDING;
+import static
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions.SNAPSHOT_RESOURCE_ENABLED;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.CHECKPOINT;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.SAVEPOINT;
+
+/** Utilities class for FlinkStateSnapshot resources. */
+public class FlinkStateSnapshotUtils {
+
+ /**
+ * From a snapshot reference, return its snapshot path. If a {@link
FlinkStateSnapshot} is
+ * referenced, it will be retrieved from Kubernetes.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param snapshotRef snapshot reference
+ * @return found savepoint path
+ */
+ public static String getAndValidateFlinkStateSnapshotPath(
+ KubernetesClient kubernetesClient, FlinkStateSnapshotReference
snapshotRef) {
+ if (!StringUtils.isBlank(snapshotRef.getPath())) {
+ return snapshotRef.getPath();
+ }
+
+ if (StringUtils.isBlank(snapshotRef.getName())) {
+ throw new IllegalArgumentException(
+ String.format("Invalid snapshot name: %s",
snapshotRef.getName()));
+ }
+
+ FlinkStateSnapshot result;
+ if (snapshotRef.getName() != null) {
+ var namespace = snapshotRef.getNamespace();
+ if (namespace == null) {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .withName(snapshotRef.getName())
+ .get();
+ } else {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .inNamespace(namespace)
+ .withName(snapshotRef.getName())
+ .get();
+ }
+ } else {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .withName(snapshotRef.getName())
+ .get();
+ }
Review Comment:
According to this code the `kubernetesClient` call will be the same if
`snapshotRef.getName()` is `null` OR `snapshotRef.getNamespace()` is `null`, as
L81-85, and L95-99 is the same. Also, we validate the `snapshotRef.getName()`
is not `null` at L72, so I do not see why the another null-check is meaningful.
So unless i miss something the whole `if` block can be simplified to:
```java
var result =
snapshotRef.getNamespace() == null
? kubernetesClient
.resources(FlinkStateSnapshot.class)
.withName(snapshotRef.getName())
.get()
: kubernetesClient
.resources(FlinkStateSnapshot.class)
.inNamespace(snapshotRef.getNamespace())
.withName(snapshotRef.getName())
.get();
```
##########
flink-kubernetes-webhook/src/main/java/org/apache/flink/kubernetes/operator/admission/mutator/FlinkMutator.java:
##########
@@ -92,4 +96,16 @@ private FlinkDeployment mutateDeployment(HasMetadata
resource) {
throw new RuntimeException(e);
}
}
+
+ private FlinkStateSnapshot mutateStateSnapshot(HasMetadata resource) {
+ try {
+ var savepoint = mapper.convertValue(resource,
FlinkStateSnapshot.class);
Review Comment:
nit: savepoint -> snapshot.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/service/AbstractFlinkService.java:
##########
@@ -525,18 +513,14 @@ public void triggerSavepoint(
.get(timeout, TimeUnit.SECONDS);
LOG.info("Savepoint successfully triggered: " +
response.getTriggerId().toHexString());
- savepointInfo.setTrigger(
- response.getTriggerId().toHexString(),
- triggerType,
- SavepointFormatType.valueOf(savepointFormatType.name()));
+ return response.getTriggerId().toHexString();
}
}
@Override
- public void triggerCheckpoint(
+ public String triggerCheckpoint(
String jobId,
- SnapshotTriggerType triggerType,
- org.apache.flink.kubernetes.operator.api.status.CheckpointInfo
checkpointInfo,
+ org.apache.flink.core.execution.CheckpointType
checkpointFormatType,
Review Comment:
nit: checkpointFormatType -> checkpointType
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/FlinkStateSnapshotUtils.java:
##########
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.kubernetes.operator.utils;
+
+import org.apache.flink.autoscaler.utils.DateTimeUtils;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.kubernetes.operator.api.AbstractFlinkResource;
+import org.apache.flink.kubernetes.operator.api.CrdConstants;
+import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
+import org.apache.flink.kubernetes.operator.api.spec.CheckpointSpec;
+import
org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotReference;
+import org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotSpec;
+import org.apache.flink.kubernetes.operator.api.spec.JobReference;
+import org.apache.flink.kubernetes.operator.api.spec.SavepointSpec;
+import org.apache.flink.kubernetes.operator.api.status.CheckpointType;
+import org.apache.flink.kubernetes.operator.api.status.SavepointFormatType;
+import org.apache.flink.kubernetes.operator.api.status.SnapshotTriggerType;
+import org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration;
+import
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions;
+import org.apache.flink.kubernetes.operator.reconciler.ReconciliationUtils;
+import org.apache.flink.kubernetes.operator.reconciler.SnapshotType;
+
+import io.fabric8.kubernetes.api.model.ObjectMeta;
+import io.fabric8.kubernetes.client.KubernetesClient;
+import org.apache.commons.lang3.StringUtils;
+
+import javax.annotation.Nullable;
+
+import java.time.Instant;
+import java.util.UUID;
+
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.ABANDONED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.COMPLETED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.IN_PROGRESS;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.TRIGGER_PENDING;
+import static
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions.SNAPSHOT_RESOURCE_ENABLED;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.CHECKPOINT;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.SAVEPOINT;
+
+/** Utilities class for FlinkStateSnapshot resources. */
+public class FlinkStateSnapshotUtils {
+
+ /**
+ * From a snapshot reference, return its snapshot path. If a {@link
FlinkStateSnapshot} is
+ * referenced, it will be retrieved from Kubernetes.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param snapshotRef snapshot reference
+ * @return found savepoint path
+ */
+ public static String getAndValidateFlinkStateSnapshotPath(
+ KubernetesClient kubernetesClient, FlinkStateSnapshotReference
snapshotRef) {
+ if (!StringUtils.isBlank(snapshotRef.getPath())) {
Review Comment:
nit: `StringUtils.isNotBlank(...)` to avoid invert.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/service/AbstractFlinkService.java:
##########
@@ -487,34 +484,25 @@ public void cancelSessionJob(
}
jobStatus.setState(JobStatus.FINISHED.name());
- savepointOpt.ifPresent(
- location -> {
- Savepoint sp = Savepoint.of(location,
SnapshotTriggerType.UPGRADE);
- jobStatus.getSavepointInfo().updateLastSavepoint(sp);
- });
+ return savepointOpt;
}
@Override
- public void triggerSavepoint(
+ public String triggerSavepoint(
String jobId,
- SnapshotTriggerType triggerType,
- org.apache.flink.kubernetes.operator.api.status.SavepointInfo
savepointInfo,
+ org.apache.flink.core.execution.SavepointFormatType
savepointFormatType,
+ String savepointDirectory,
Configuration conf)
throws Exception {
- LOG.info("Triggering new savepoint");
+ LOG.info("Triggering new savepoint using new method");
Review Comment:
This seems to me like a dev debug log.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/FlinkStateSnapshotUtils.java:
##########
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.kubernetes.operator.utils;
+
+import org.apache.flink.autoscaler.utils.DateTimeUtils;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.kubernetes.operator.api.AbstractFlinkResource;
+import org.apache.flink.kubernetes.operator.api.CrdConstants;
+import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
+import org.apache.flink.kubernetes.operator.api.spec.CheckpointSpec;
+import
org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotReference;
+import org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotSpec;
+import org.apache.flink.kubernetes.operator.api.spec.JobReference;
+import org.apache.flink.kubernetes.operator.api.spec.SavepointSpec;
+import org.apache.flink.kubernetes.operator.api.status.CheckpointType;
+import org.apache.flink.kubernetes.operator.api.status.SavepointFormatType;
+import org.apache.flink.kubernetes.operator.api.status.SnapshotTriggerType;
+import org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration;
+import
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions;
+import org.apache.flink.kubernetes.operator.reconciler.ReconciliationUtils;
+import org.apache.flink.kubernetes.operator.reconciler.SnapshotType;
+
+import io.fabric8.kubernetes.api.model.ObjectMeta;
+import io.fabric8.kubernetes.client.KubernetesClient;
+import org.apache.commons.lang3.StringUtils;
+
+import javax.annotation.Nullable;
+
+import java.time.Instant;
+import java.util.UUID;
+
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.ABANDONED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.COMPLETED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.IN_PROGRESS;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.TRIGGER_PENDING;
+import static
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions.SNAPSHOT_RESOURCE_ENABLED;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.CHECKPOINT;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.SAVEPOINT;
+
+/** Utilities class for FlinkStateSnapshot resources. */
+public class FlinkStateSnapshotUtils {
+
+ /**
+ * From a snapshot reference, return its snapshot path. If a {@link
FlinkStateSnapshot} is
+ * referenced, it will be retrieved from Kubernetes.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param snapshotRef snapshot reference
+ * @return found savepoint path
+ */
+ public static String getAndValidateFlinkStateSnapshotPath(
+ KubernetesClient kubernetesClient, FlinkStateSnapshotReference
snapshotRef) {
+ if (!StringUtils.isBlank(snapshotRef.getPath())) {
+ return snapshotRef.getPath();
+ }
+
+ if (StringUtils.isBlank(snapshotRef.getName())) {
+ throw new IllegalArgumentException(
+ String.format("Invalid snapshot name: %s",
snapshotRef.getName()));
+ }
+
+ FlinkStateSnapshot result;
+ if (snapshotRef.getName() != null) {
+ var namespace = snapshotRef.getNamespace();
+ if (namespace == null) {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .withName(snapshotRef.getName())
+ .get();
+ } else {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .inNamespace(namespace)
+ .withName(snapshotRef.getName())
+ .get();
+ }
+ } else {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .withName(snapshotRef.getName())
+ .get();
+ }
+
+ if (result == null) {
+ throw new IllegalArgumentException(
Review Comment:
IMO `IllegalStateException` would be better here, as we validate the name
already so the passed argument itself seems to be valid. Same for the following
2 throws in the current function.
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/FlinkStateSnapshotUtils.java:
##########
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.kubernetes.operator.utils;
+
+import org.apache.flink.autoscaler.utils.DateTimeUtils;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.kubernetes.operator.api.AbstractFlinkResource;
+import org.apache.flink.kubernetes.operator.api.CrdConstants;
+import org.apache.flink.kubernetes.operator.api.FlinkStateSnapshot;
+import org.apache.flink.kubernetes.operator.api.spec.CheckpointSpec;
+import
org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotReference;
+import org.apache.flink.kubernetes.operator.api.spec.FlinkStateSnapshotSpec;
+import org.apache.flink.kubernetes.operator.api.spec.JobReference;
+import org.apache.flink.kubernetes.operator.api.spec.SavepointSpec;
+import org.apache.flink.kubernetes.operator.api.status.CheckpointType;
+import org.apache.flink.kubernetes.operator.api.status.SavepointFormatType;
+import org.apache.flink.kubernetes.operator.api.status.SnapshotTriggerType;
+import org.apache.flink.kubernetes.operator.config.FlinkOperatorConfiguration;
+import
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions;
+import org.apache.flink.kubernetes.operator.reconciler.ReconciliationUtils;
+import org.apache.flink.kubernetes.operator.reconciler.SnapshotType;
+
+import io.fabric8.kubernetes.api.model.ObjectMeta;
+import io.fabric8.kubernetes.client.KubernetesClient;
+import org.apache.commons.lang3.StringUtils;
+
+import javax.annotation.Nullable;
+
+import java.time.Instant;
+import java.util.UUID;
+
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.ABANDONED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.COMPLETED;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.IN_PROGRESS;
+import static
org.apache.flink.kubernetes.operator.api.status.FlinkStateSnapshotStatus.State.TRIGGER_PENDING;
+import static
org.apache.flink.kubernetes.operator.config.KubernetesOperatorConfigOptions.SNAPSHOT_RESOURCE_ENABLED;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.CHECKPOINT;
+import static
org.apache.flink.kubernetes.operator.reconciler.SnapshotType.SAVEPOINT;
+
+/** Utilities class for FlinkStateSnapshot resources. */
+public class FlinkStateSnapshotUtils {
+
+ /**
+ * From a snapshot reference, return its snapshot path. If a {@link
FlinkStateSnapshot} is
+ * referenced, it will be retrieved from Kubernetes.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param snapshotRef snapshot reference
+ * @return found savepoint path
+ */
+ public static String getAndValidateFlinkStateSnapshotPath(
+ KubernetesClient kubernetesClient, FlinkStateSnapshotReference
snapshotRef) {
+ if (!StringUtils.isBlank(snapshotRef.getPath())) {
+ return snapshotRef.getPath();
+ }
+
+ if (StringUtils.isBlank(snapshotRef.getName())) {
+ throw new IllegalArgumentException(
+ String.format("Invalid snapshot name: %s",
snapshotRef.getName()));
+ }
+
+ FlinkStateSnapshot result;
+ if (snapshotRef.getName() != null) {
+ var namespace = snapshotRef.getNamespace();
+ if (namespace == null) {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .withName(snapshotRef.getName())
+ .get();
+ } else {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .inNamespace(namespace)
+ .withName(snapshotRef.getName())
+ .get();
+ }
+ } else {
+ result =
+ kubernetesClient
+ .resources(FlinkStateSnapshot.class)
+ .withName(snapshotRef.getName())
+ .get();
+ }
+
+ if (result == null) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Cannot find snapshot %s in namespace %s.",
+ snapshotRef.getNamespace(),
snapshotRef.getName()));
+ }
+
+ // We can return the savepoint path if it's marked as completed
without waiting for the
+ // reconciler to update its status.
+ if (result.getSpec().isSavepoint() &&
result.getSpec().getSavepoint().getAlreadyExists()) {
+ var path = result.getSpec().getSavepoint().getPath();
+ if (!StringUtils.isBlank(path)) {
+ return path;
+ }
+ }
+
+ if (COMPLETED != result.getStatus().getState()) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Snapshot %s/%s is not complete yet.",
+ snapshotRef.getNamespace(),
snapshotRef.getName()));
+ }
+
+ var path = result.getStatus().getPath();
+ if (StringUtils.isBlank(path)) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Snapshot %s/%s path is incorrect: %s.",
+ snapshotRef.getNamespace(), snapshotRef.getName(),
path));
+ }
+
+ return path;
+ }
+
+ protected static FlinkStateSnapshot createFlinkStateSnapshot(
+ KubernetesClient kubernetesClient,
+ String name,
+ FlinkStateSnapshotSpec spec,
+ SnapshotTriggerType triggerType) {
+ var metadata = new ObjectMeta();
+ metadata.setName(name);
+ metadata.getLabels().put(CrdConstants.LABEL_SNAPSHOT_TYPE,
triggerType.name());
+
+ var snapshot = new FlinkStateSnapshot();
+ snapshot.setSpec(spec);
+ snapshot.setMetadata(metadata);
+
+ return kubernetesClient.resource(snapshot).create();
+ }
+
+ /**
+ * Creates a checkpoint {@link FlinkStateSnapshot} resource on the
Kubernetes cluster.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param resource Flink resource associated
+ * @param savepointPath savepoint path if any
+ * @param triggerType trigger type
+ * @param savepointFormatType format type
+ * @param disposeOnDelete should dispose of data on deletion
+ * @return created snapshot
+ */
+ public static FlinkStateSnapshot createSavepointResource(
+ KubernetesClient kubernetesClient,
+ AbstractFlinkResource<?, ?> resource,
+ @Nullable String savepointPath,
+ SnapshotTriggerType triggerType,
+ SavepointFormatType savepointFormatType,
+ boolean disposeOnDelete) {
+ var savepointSpec =
+ SavepointSpec.builder()
+ .path(savepointPath)
+ .formatType(savepointFormatType)
+ .disposeOnDelete(disposeOnDelete)
+ .alreadyExists(triggerType ==
SnapshotTriggerType.UPGRADE)
+ .build();
+
+ var snapshotSpec =
+ FlinkStateSnapshotSpec.builder()
+ .jobReference(JobReference.fromFlinkResource(resource))
+ .savepoint(savepointSpec)
+ .build();
+
+ var resourceName = getFlinkStateSnapshotName(SAVEPOINT, triggerType,
resource);
+ return createFlinkStateSnapshot(kubernetesClient, resourceName,
snapshotSpec, triggerType);
+ }
+
+ /**
+ * Creates a checkpoint {@link FlinkStateSnapshot} resource on the
Kubernetes cluster.
+ *
+ * @param kubernetesClient kubernetes client
+ * @param resource Flink resource associated
+ * @param checkpointType type of checkpoint
+ * @param triggerType trigger type
+ * @return created snapshot
+ */
+ public static FlinkStateSnapshot createCheckpointResource(
+ KubernetesClient kubernetesClient,
+ AbstractFlinkResource<?, ?> resource,
+ CheckpointType checkpointType,
+ SnapshotTriggerType triggerType) {
+ var checkpointSpec =
CheckpointSpec.builder().checkpointType(checkpointType).build();
+
+ var snapshotSpec =
+ FlinkStateSnapshotSpec.builder()
+ .jobReference(JobReference.fromFlinkResource(resource))
+ .checkpoint(checkpointSpec)
+ .build();
+
+ var resourceName = getFlinkStateSnapshotName(CHECKPOINT, triggerType,
resource);
+ return createFlinkStateSnapshot(kubernetesClient, resourceName,
snapshotSpec, triggerType);
+ }
+
+ /**
+ * Based on job configuration and operator configuration, decide if {@link
FlinkStateSnapshot}
+ * resources should be used or not. Operator configuration will disable
the usage of the
+ * corresponding CRD was not installed on this Kubernetes cluster.
+ *
+ * @param operatorConfiguration operator config
+ * @param configuration job config
+ * @return true if snapshot resources should be created
+ */
+ public static boolean isSnapshotResourceEnabled(
+ FlinkOperatorConfiguration operatorConfiguration, Configuration
configuration) {
+ return configuration.get(SNAPSHOT_RESOURCE_ENABLED)
+ && operatorConfiguration.isSnapshotResourcesEnabled();
+ }
+
+ /**
+ * Return a generated name for a {@link FlinkStateSnapshot} to be created.
+ *
+ * @param snapshotType type of snapshot
+ * @param triggerType trigger type of snapshot
+ * @param referencedResource referenced resource
+ * @return result name
+ */
+ public static String getFlinkStateSnapshotName(
+ SnapshotType snapshotType,
+ SnapshotTriggerType triggerType,
+ AbstractFlinkResource<?, ?> referencedResource) {
+ return String.format(
+ "%s-%s-%s-%s",
+ referencedResource.getMetadata().getName(),
+ snapshotType.name().toLowerCase(),
+ triggerType.name().toLowerCase(),
+ UUID.randomUUID());
+ }
+
+ /**
+ * For an upgrade savepoint, create a {@link FlinkStateSnapshot} on the
Kubernetes cluster and
+ * return its reference if snapshot resources are enabled. In other case
return a reference
+ * containing only the path.
+ *
+ * @param conf job configuration
+ * @param operatorConf operator configuration
+ * @param kubernetesClient kubernetes client
+ * @param flinkResource referenced Flink resource
+ * @param savepointFormatType savepoint format type
+ * @param savepointPath path of savepoint
+ * @return reference for snapshot
+ */
+ public static FlinkStateSnapshotReference
createReferenceForUpgradeSavepoint(
+ Configuration conf,
+ FlinkOperatorConfiguration operatorConf,
+ KubernetesClient kubernetesClient,
+ AbstractFlinkResource<?, ?> flinkResource,
+ SavepointFormatType savepointFormatType,
+ String savepointPath) {
+ if (isSnapshotResourceEnabled(operatorConf, conf)) {
+ var snapshot =
+ createSavepointResource(
+ kubernetesClient,
+ flinkResource,
+ savepointPath,
+ SnapshotTriggerType.UPGRADE,
+ savepointFormatType,
+ conf.get(
+ KubernetesOperatorConfigOptions
+
.OPERATOR_JOB_SAVEPOINT_DISPOSE_ON_DELETE));
+ return FlinkStateSnapshotReference.fromResource(snapshot);
+ } else {
Review Comment:
nit: else can be omitted
##########
flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/utils/EventSourceUtils.java:
##########
@@ -145,6 +150,125 @@ public static InformerEventSource<Deployment,
FlinkDeployment> getDeploymentInfo
return new InformerEventSource<>(configuration, context);
}
+ public static EventSource[] getFlinkStateSnapshotInformerEventSources(
+ EventSourceContext<FlinkStateSnapshot> context) {
+ context.getPrimaryCache()
+ .addIndexer(
+ FLINK_STATE_SNAPSHOT_IDX,
+ savepoint -> {
+ if (savepoint.getSpec().getJobReference() == null
+ ||
savepoint.getSpec().getJobReference().getName() == null) {
+ return Collections.emptyList();
+ }
+ return List.of(
+ indexKey(
+
savepoint.getSpec().getJobReference().toString(),
+
savepoint.getMetadata().getNamespace()));
+ });
+
+ InformerConfiguration<FlinkSessionJob> configurationFlinkSessionJob =
+ InformerConfiguration.from(FlinkSessionJob.class, context)
+ .withSecondaryToPrimaryMapper(
Review Comment:
IMO this logic can be extracted to a separate method and then we can cast it
at call time:
```java
private static SecondaryToPrimaryMapper<? extends AbstractFlinkResource<?,
?>>
getSnapshotPrimaryMapper(EventSourceContext<FlinkStateSnapshot> ctx)
{
return flinkResource ->
ctx
.getPrimaryCache()
.byIndex(
FLINK_STATE_SNAPSHOT_IDX,
indexKey(
flinkResource.getMetadata().getName(),
flinkResource.getMetadata().getNamespace()))
.stream()
.map(ResourceID::fromResource)
.collect(Collectors.toSet());
}
```
Since the body is the same for both FlinkDeployment and FlinkSessionJob.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]