This is an automated email from the ASF dual-hosted git repository.
tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new bd1c95cb0de Better flag documentation. (#28753)
bd1c95cb0de is described below
commit bd1c95cb0de4dd5b5d5fe34ad9059dc463460841
Author: tvalentyn <[email protected]>
AuthorDate: Mon Oct 2 16:49:36 2023 -0700
Better flag documentation. (#28753)
---
.../apache_beam/runners/dataflow/dataflow_runner.py | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 950bff768ef..7ad6ab04be6 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -97,10 +97,6 @@ class DataflowRunner(PipelineRunner):
def is_fnapi_compatible(self):
return False
- def apply(self, transform, input, options):
- _check_and_add_missing_options(options)
- return super().apply(transform, input, options)
-
@staticmethod
def poll_for_job_completion(
runner, result, duration, state_update_callback=None):
@@ -496,10 +492,6 @@ class DataflowRunner(PipelineRunner):
coders.registry.get_coder(typehint), window_coder=window_coder)
return coders.registry.get_coder(typehint)
- # TODO(srohde): Remove this after internal usages have been removed.
- def apply_GroupByKey(self, transform, pcoll, options):
- return transform.expand(pcoll)
-
def _verify_gbk_coders(self, transform, pcoll):
# Infer coder of parent.
#
@@ -589,12 +581,14 @@ def _check_and_add_missing_options(options):
sdk_location = options.view_as(SetupOptions).sdk_location
if 'dev' in beam.version.__version__ and sdk_location == 'default':
raise ValueError(
- "When launching Dataflow Jobs with an unreleased SDK, "
+ "You are submitting a pipeline with Apache Beam Python SDK "
+ f"{beam.version.__version__}. "
+ "When launching Dataflow jobs with an unreleased (dev) SDK, "
"please provide an SDK distribution in the --sdk_location option "
- "to use consistent SDK version at "
- "pipeline submission and runtime. To ignore this error and use the "
- "SDK installed in Dataflow dev containers, use "
- "--sdk_location=container.")
+ "to use a consistent SDK version at "
+ "pipeline submission and runtime. To ignore this error and use "
+ "an SDK preinstalled in the default Dataflow dev runtime environment "
+ "or in a custom container image, use --sdk_location=container.")
# Streaming only supports using runner v2 (aka unified worker).
# Runner v2 only supports using streaming engine (aka windmill service)