azagrebin commented on a change in pull request #11427: [FLINK-15790][k8s] Make FlinkKubeClient and its implementations asynchronous URL: https://github.com/apache/flink/pull/11427#discussion_r408000408
########## File path: flink-kubernetes/src/main/java/org/apache/flink/kubernetes/kubeclient/Fabric8FlinkKubeClient.java ########## @@ -61,158 +61,180 @@ private final String clusterId; private final String nameSpace; - public Fabric8FlinkKubeClient(Configuration flinkConfig, KubernetesClient client) { + private final ExecutorWrapper executorWrapper; + + public Fabric8FlinkKubeClient(Configuration flinkConfig, KubernetesClient client, ExecutorWrapper executorWrapper) { this.flinkConfig = checkNotNull(flinkConfig); this.internalClient = checkNotNull(client); this.clusterId = checkNotNull(flinkConfig.getString(KubernetesConfigOptions.CLUSTER_ID)); this.nameSpace = flinkConfig.getString(KubernetesConfigOptions.NAMESPACE); + + this.executorWrapper = executorWrapper; } @Override - public void createJobManagerComponent(KubernetesJobManagerSpecification kubernetesJMSpec) { + public CompletableFuture<Void> createJobManagerComponent(KubernetesJobManagerSpecification kubernetesJMSpec) { final Deployment deployment = kubernetesJMSpec.getDeployment(); final List<HasMetadata> accompanyingResources = kubernetesJMSpec.getAccompanyingResources(); // create Deployment LOG.debug("Start to create deployment with spec {}", deployment.getSpec().toString()); - final Deployment createdDeployment = this.internalClient - .apps() - .deployments() - .inNamespace(this.nameSpace) - .create(deployment); - - // Note that we should use the uid of the created Deployment for the OwnerReference. - setOwnerReference(createdDeployment, accompanyingResources); - this.internalClient - .resourceList(accompanyingResources) - .inNamespace(this.nameSpace) - .createOrReplace(); + return CompletableFuture.runAsync(() -> { + final Deployment createdDeployment = this.internalClient + .apps() + .deployments() + .inNamespace(this.nameSpace) + .create(deployment); + + // Note that we should use the uid of the created Deployment for the OwnerReference. + setOwnerReference(createdDeployment, accompanyingResources); + + this.internalClient + .resourceList(accompanyingResources) + .inNamespace(this.nameSpace) + .createOrReplace(); + }, executorWrapper.getExecutor()); } @Override public void createTaskManagerPod(KubernetesPod kubernetesPod) { - final Deployment masterDeployment = this.internalClient - .apps() - .deployments() - .inNamespace(this.nameSpace) - .withName(KubernetesUtils.getDeploymentName(clusterId)) - .get(); - - if (masterDeployment == null) { - throw new RuntimeException( - "Failed to find Deployment named " + clusterId + " in namespace " + this.nameSpace); - } + CompletableFuture.runAsync(() -> { Review comment: Thanks for the feedback @zhengcanbin We might want to investigate your experience. Previously, we did not have the retry discussed in this PR. Therefore, there may be other sources of these infinite creation requests, e.g. from the #onError callback, depending how the k8s client works internally. If there is already an issue for your problem (or you could create one), could you link to https://issues.apache.org/jira/browse/FLINK-17127 or share your problem there? We might want to have a back-off also for the #onError callback depending on the actual problem. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services