VictorPlusC commented on a change in pull request #16741:
URL: https://github.com/apache/beam/pull/16741#discussion_r808407791
##########
File path: sdks/python/apache_beam/runners/interactive/interactive_runner.py
##########
@@ -209,6 +218,62 @@ def visit_transform(self, transform_node):
return main_job_result
+ # TODO(victorhc): Move this method somewhere else if performance is impacted
+ # by generating a cluster during runtime.
+ def _create_dataproc_cluster_if_applicable(self, user_pipeline):
+ """ Creates a Dataproc cluster if the provided user_pipeline is running
+ FlinkRunner and no flink_master_url was provided as an option. A cluster
+ is not created when a flink_master_url is detected.
+
+ Example pipeline options to enable automatic Dataproc cluster creation:
+ options = PipelineOptions([
+ '--runner=FlinkRunner',
+ '--project=my-project',
+ '--region=my-region',
+ '--environment_type=DOCKER'
+ ])
+
+ Example pipeline options to skip automatic Dataproc cluster creation:
+ options = PipelineOptions([
+ '--runner=FlinkRunner',
+ '--flink_master=example.internal:41979',
+ '--environment_type=DOCKER'
+ ])
+ """
+ from apache_beam.options.pipeline_options import FlinkRunnerOptions
+ flink_master = user_pipeline.options.view_as(
+ FlinkRunnerOptions).flink_master
+ if flink_master == '[auto]':
+ from apache_beam.runners.portability.flink_runner import FlinkRunner
+ if isinstance(self._underlying_runner, FlinkRunner):
Review comment:
This flow is much nicer, thanks! I've changed the format and added
additional comments for clarity.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]