VictorPlusC commented on a change in pull request #16741:
URL: https://github.com/apache/beam/pull/16741#discussion_r807416546
##########
File path:
sdks/python/apache_beam/runners/interactive/interactive_environment.py
##########
@@ -549,6 +589,50 @@ def set_cached_source_signature(self, pipeline, signature):
def get_cached_source_signature(self, pipeline):
return self._cached_source_signature.get(str(id(pipeline)), set())
+ def set_dataproc_cluster_manager(self, pipeline):
+ """Sets the instance of DataprocClusterManager to be used by the
+ pipeline.
+ """
+ if self._is_in_ipython:
+ warnings.filterwarnings(
+ 'ignore',
+ 'options is deprecated since First stable release. References to '
+ '<pipeline>.options will not be supported',
+ category=DeprecationWarning)
+ project_id = (pipeline.options.view_as(GoogleCloudOptions).project)
+ region = (pipeline.options.view_as(GoogleCloudOptions).region)
+ cluster_name = self.clusters.default_cluster_name
+ cluster_manager = DataprocClusterManager(
+ project_id=project_id, region=region, cluster_name=cluster_name)
+ self.clusters._dataproc_cluster_managers[str(id(pipeline))] =
cluster_manager
+
+ def get_dataproc_cluster_manager(self, pipeline):
+ """Gets the instance of DataprocClusterManager currently used by the
+ pipeline.
+ """
+ return self.clusters._dataproc_cluster_managers.get(str(id(pipeline)),
None)
+
+ def evict_dataproc_cluster_manager(self, pipeline):
+ """Evicts and pops the instance of DataprocClusterManager that is currently
+ used by the pipeline. Noop if the given pipeline is absent from the
+ environment or if the DataprocClusterManager instance is being used by
+ another pipeline. If no pipeline is specified, evicts for all pipelines.
+ """
+ if pipeline:
+ cluster_manager = self.clusters._dataproc_cluster_managers.pop(
+ str(id(pipeline)), None)
+ if cluster_manager:
+ master_url = cluster_manager.master_url
+ if len(self.clusters.get_pipelines_using_master_url( \
+ master_url)) == 1:
+ del self.clusters._master_urls[master_url]
+ del self.clusters._master_urls_to_pipelines[master_url]
+ return
+ self.clusters._dataproc_cluster_managers.clear()
+ self.clusters._master_urls.clear()
+ self.clusters._master_urls.inverse.clear()
Review comment:
That makes sense. I've now implemented this change, thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]