robertwb commented on code in PR #33579:
URL: https://github.com/apache/beam/pull/33579#discussion_r1919399207
##########
sdks/python/apache_beam/yaml/yaml_io.py:
##########
@@ -484,6 +484,96 @@ def attributes_extractor(row):
timestamp_attribute=timestamp_attribute))
+def read_from_iceberg(
+ table: str,
+ catalog_name: Optional[str] = None,
+ catalog_properties: Optional[Mapping[str, str]] = None,
+ config_properties: Optional[Mapping[str, str]] = None,
+):
+ # TODO(robertwb): It'd be nice to derive this list of parameters, along with
+ # their types and docs, programmatically from the iceberg (or managed)
+ # schemas.
+
+ """Reads an Apache Iceberg table.
+
+ See also the [Apache Iceberg Beam documentation](
+ https://cloud.google.com/dataflow/docs/guides/managed-io#iceberg).
+
+ Args:
+ table: The identifier of the Apache Iceberg table. Example: "db.table1".
+ catalog_name: The name of the catalog. Example: "local".
+ catalog_properties: A map of configuration properties for the Apache
Iceberg
+ catalog.
+ The required properties depend on the catalog. For more information, see
+ CatalogUtil in the Apache Iceberg documentation.
+ config_properties: An optional set of Hadoop configuration properties.
+ For more information, see CatalogUtil in the Apache Iceberg
documentation.
+ """
+ return beam.managed.Read(
+ "iceberg",
+ config=dict(
+ table=table,
+ catalog_name=catalog_name,
+ catalog_properties=catalog_properties,
+ config_properties=config_properties))
+
+
+def write_to_iceberg(
+ table: str,
+ catalog_name: Optional[str] = None,
+ catalog_properties: Optional[Mapping[str, str]] = None,
+ config_properties: Optional[Mapping[str, str]] = None,
+ triggering_frequency_seconds: Optional[int] = None,
+ keep: Optional[Iterable[str]] = None,
+ drop: Optional[Iterable[str]] = None,
+ only: Optional[str] = None,
+):
+ # TODO(robertwb): It'd be nice to derive this list of parameters, along with
+ # their types and docs, programmatically from the iceberg (or managed)
+ # schemas.
+
+ """Writes to an Apache Iceberg table.
+
+ See also the [Apache Iceberg Beam documentation](
+ https://cloud.google.com/dataflow/docs/guides/managed-io#iceberg)
+ and [](
Review Comment:
Nice catch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]