yifanmai commented on a change in pull request #13763:
URL: https://github.com/apache/beam/pull/13763#discussion_r565534696
##########
File path: sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
##########
@@ -462,6 +462,62 @@ def run_pipeline(self, pipeline, options):
self._maybe_add_unified_worker_missing_options(options)
+ from apache_beam.transforms import environments
+ if options.view_as(SetupOptions).prebuild_sdk_container_engine:
+ # if prebuild_sdk_container_engine is specified we will build a new sdk
+ # container image with dependencies pre-installed and use that image,
+ # instead of using the inferred default container image.
+ self._default_environment = (
+ environments.DockerEnvironment.from_options(options))
+ options.view_as(WorkerOptions).worker_harness_container_image = (
+ self._default_environment.container_image)
+ else:
+ self._default_environment = (
+ environments.DockerEnvironment.from_container_image(
+ apiclient.get_container_image_from_options(options),
+ artifacts=environments.python_sdk_dependencies(options)))
+
+ # Optimize the pipeline if it not streaming and optimizations are enabled
+ # in options.
+ pre_optimize = options.view_as(DebugOptions).lookup_experiment(
+ 'pre_optimize', 'default').lower()
+ if (not options.view_as(StandardOptions).streaming and
+ pre_optimize != 'none' and pre_optimize != 'default'):
+ from apache_beam.runners.portability.fn_api_runner import translations
+ if pre_optimize == 'all':
+ phases = [
+ translations.eliminate_common_key_with_none,
+ translations.pack_combiners,
+ translations.sort_stages
+ ]
+ else:
Review comment:
This follows the [behavior in
PortableRunner](https://github.com/apache/beam/blob/fc14f0b5bf9f98759d318f5708854b49bdcdcf68/sdks/python/apache_beam/runners/portability/portable_runner.py#L360-L373)
where we allow pre_optimize to be a comma-delimited list of phases.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]