VictorPlusC commented on a change in pull request #16741:
URL: https://github.com/apache/beam/pull/16741#discussion_r807380508
##########
File path: sdks/python/apache_beam/runners/interactive/interactive_runner.py
##########
@@ -209,6 +214,50 @@ def visit_transform(self, transform_node):
return main_job_result
+ # TODO(victorhc): Move this method somewhere else if performance is impacted
+ # by generating a cluster during runtime.
+ def _create_dataproc_cluster_if_applicable(self, user_pipeline):
+ """ Creates a Dataproc cluster if the provided user_pipeline is running
+ FlinkRunner and no flink_master_url was provided as an option. A cluster
+ is not created when a flink_master_url is detected.
+
+ Example pipeline options to enable automatic Dataproc cluster creation:
+ options = PipelineOptions([
+ '--runner=FlinkRunner',
+ '--project=my-project',
+ '--region=my-region',
+ '--environment_type=DOCKER'
+ ])
+
+ Example pipeline options to skip automatic Dataproc cluster creation:
+ options = PipelineOptions([
+ '--runner=FlinkRunner',
+ '--flink_master=example.internal:41979',
+ '--environment_type=DOCKER'
+ ])
+ """
+ from apache_beam.options.pipeline_options import FlinkRunnerOptions
+ flink_master = user_pipeline.options.view_as(
+ FlinkRunnerOptions).flink_master
+ if flink_master != '[auto]':
+ _LOGGER.info(
+ 'Skipping Dataproc cluster creation as a flink_master_url '
+ 'was detected.')
+ else:
+ from apache_beam.runners.portability.flink_runner import FlinkRunner
+ if isinstance(self._underlying_runner, FlinkRunner):
+ if not ie.current_env().get_dataproc_cluster_manager(user_pipeline):
+ ie.current_env().set_dataproc_cluster_manager(user_pipeline)
Review comment:
I've moved the logic over and changed the behavior slightly. What do you
think about my current approach?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]