Polber commented on code in PR #33185:
URL: https://github.com/apache/beam/pull/33185#discussion_r1894372776
##########
sdks/python/apache_beam/yaml/yaml_io.py:
##########
@@ -92,6 +126,137 @@ def write_to_text(pcoll, path: str):
lambda x: str(getattr(x, sole_field_name))) | beam.io.WriteToText(path)
+def read_from_csv(
+ path: str,
+ comment: Optional[str] = None,
+ delimiter: Optional[str] = None,
+ **kwargs):
+ """Reads comma-separated values (csv) files into Beam rows.
+
+ For more information about possible arguments, see
+ <https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html>
+
+ Args:
+ delimiter (str): Character or regex pattern to treat as the delimiter,
+ default ',' (comma).
+ comment (str): Character indicating that the remainder of line should
+ not be parsed. If found at the beginning of a line, the line will be
+ ignored altogether. This parameter must be a single character.
+ path (str): The file path to read from as a local file path or a
+ GCS ``gs://`` path. The path can contain glob
+ characters such as ``*`` and ``?``.
+ """
+ return ReadFromCsv(path=path, comment=comment, delimiter=delimiter, **kwargs)
+
+
+def write_to_csv(path: str, delimiter: Optional[str] = ",", **kwargs):
+ """Writes Beam rows to a (set of) comma-separated values (csv) files.
+
+ For more information about possible arguments, see
+ <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html>
+
+ Args:
+ delimiter (str): Character or regex pattern to treat as the delimiter.
+ path (str): The file path to write to as a local file path or a
+ GCS ``gs://`` path. The files written will
+ begin with this prefix, followed by a shard identifier.
+ """
+ return WriteToCsv(path=path, sep=delimiter, **kwargs)
+
+
+def read_from_json(path: str, **kwargs):
+ """Reads json values from files into Beam rows.
+
+ For more information about possible arguments, see
+ <https://pandas.pydata.org/docs/reference/api/pandas.read_json.html>
+
+ Args:
+ path (str): The file path to read from as a local file path or a
Review Comment:
Right, I also found that limiting, but I copied that description from
another docstring, e.g.
https://github.com/apache/beam/blob/142e39250db74d3d7c1491c8683c291948a86751/sdks/python/apache_beam/io/textio.py#L757-L759
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]