This is an automated email from the ASF dual-hosted git repository.
ahmedabu98 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new efe4e941939 extend to yaml (#38371)
efe4e941939 is described below
commit efe4e941939a77275146ecceb01cd74b28555286
Author: Ahmed Abualsaud <[email protected]>
AuthorDate: Tue May 5 14:56:14 2026 -0400
extend to yaml (#38371)
---
sdks/python/apache_beam/yaml/yaml_io.py | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/sdks/python/apache_beam/yaml/yaml_io.py
b/sdks/python/apache_beam/yaml/yaml_io.py
index 336e32adc25..92f8ec47cca 100644
--- a/sdks/python/apache_beam/yaml/yaml_io.py
+++ b/sdks/python/apache_beam/yaml/yaml_io.py
@@ -563,6 +563,7 @@ def write_to_iceberg(
drop: Optional[Iterable[str]] = None,
only: Optional[str] = None,
distribution_mode: Optional[str] = None,
+ autosharding: Optional[bool] = None,
):
# TODO(robertwb): It'd be nice to derive this list of parameters, along with
# their types and docs, programmatically from the iceberg (or managed)
@@ -616,6 +617,11 @@ def write_to_iceberg(
distributions:
- none: don't shuffle rows (default)
- hash: shuffle rows by partition key before writing data
+ autosharding: Enables dynamic sharding to automatically adjust the number
+ of parallel writers based on data volume. It handles data skew by
+ further sub-dividing partitions into multiple shards to prevent
+ bottlenecks during high-throughput writes. Only available with 'hash'
+ distribution mode.
"""
return beam.managed.Write(
"iceberg",
@@ -630,7 +636,8 @@ def write_to_iceberg(
keep=keep,
drop=drop,
only=only,
- distribution_mode=distribution_mode))
+ distribution_mode=distribution_mode,
+ autosharding=autosharding))
def io_providers():