derrickaw commented on code in PR #34411:
URL: https://github.com/apache/beam/pull/34411#discussion_r2036140028


##########
sdks/python/apache_beam/yaml/yaml_io.py:
##########
@@ -574,3 +578,87 @@ def write_to_iceberg(
 def io_providers():
   return yaml_provider.load_providers(
       yaml_utils.locate_data_file('standard_io.yaml'))
+
+
+def read_from_tfrecord(
+    file_pattern: str,
+    coder: Optional[coders.BytesCoder] = coders.BytesCoder(),
+    compression_type: str = "AUTO",
+    validate: Optional[bool] = True):
+  """Reads data from TFRecord.
+
+  Args:
+    file_pattern (str): A file glob pattern to read TFRecords from.
+    coder (coders.BytesCoder): Coder used to decode each record.
+    compression_type (CompressionTypes): Used to handle compressed input files.
+      Default value is CompressionTypes.AUTO, in which case the file_path's
+      extension will be used to detect the compression.
+    validate (bool): Boolean flag to verify that the files exist during the 
+      pipeline creation time.
+  """
+  return ReadFromTFRecord(
+      file_pattern=file_pattern,
+      compression_type=getattr(CompressionTypes, compression_type),
+      validate=validate) | beam.Map(lambda s: beam.Row(record=s))
+
+
+@beam.ptransform_fn
+def write_to_tfrecord(
+    pcoll,
+    file_path_prefix: str,
+    coder: Optional[coders.BytesCoder] = coders.BytesCoder(),
+    file_name_suffix: Optional[str] = "",
+    num_shards: Optional[int] = 0,
+    shard_name_template: Optional[str] = None,
+    compression_type: str = "AUTO",
+    no_spilling: Optional[bool] = None):

Review Comment:
   removed, thanks



##########
sdks/python/apache_beam/yaml/yaml_io.py:
##########
@@ -574,3 +578,87 @@ def write_to_iceberg(
 def io_providers():
   return yaml_provider.load_providers(
       yaml_utils.locate_data_file('standard_io.yaml'))
+
+
+def read_from_tfrecord(
+    file_pattern: str,
+    coder: Optional[coders.BytesCoder] = coders.BytesCoder(),
+    compression_type: str = "AUTO",
+    validate: Optional[bool] = True):
+  """Reads data from TFRecord.
+
+  Args:
+    file_pattern (str): A file glob pattern to read TFRecords from.
+    coder (coders.BytesCoder): Coder used to decode each record.
+    compression_type (CompressionTypes): Used to handle compressed input files.
+      Default value is CompressionTypes.AUTO, in which case the file_path's
+      extension will be used to detect the compression.
+    validate (bool): Boolean flag to verify that the files exist during the 
+      pipeline creation time.
+  """
+  return ReadFromTFRecord(
+      file_pattern=file_pattern,
+      compression_type=getattr(CompressionTypes, compression_type),
+      validate=validate) | beam.Map(lambda s: beam.Row(record=s))
+
+
+@beam.ptransform_fn
+def write_to_tfrecord(
+    pcoll,
+    file_path_prefix: str,
+    coder: Optional[coders.BytesCoder] = coders.BytesCoder(),
+    file_name_suffix: Optional[str] = "",
+    num_shards: Optional[int] = 0,
+    shard_name_template: Optional[str] = None,
+    compression_type: str = "AUTO",
+    no_spilling: Optional[bool] = None):
+  """Writes data to TFRecord.
+
+  public abstract Builder setNoSpilling(boolean value);

Review Comment:
   removed, thanks



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to