timsaucer commented on code in PR #1361:
URL:
https://github.com/apache/datafusion-python/pull/1361#discussion_r2770347366
##########
python/tests/test_context.py:
##########
@@ -710,3 +710,68 @@ def test_create_dataframe_with_global_ctx(batch):
result = df.collect()[0].column(0)
assert result == pa.array([4, 5, 6])
+
+
+def test_csv_read_options_builder_pattern():
+ """Test CsvReadOptions builder pattern."""
+ from datafusion import CsvReadOptions
+
+ options = (
+ CsvReadOptions()
+ .with_has_header(False) # noqa: FBT003
+ .with_delimiter("|")
+ .with_quote("'")
+ .with_schema_infer_max_records(2000)
+ .with_truncated_rows(True) # noqa: FBT003
+ .with_newlines_in_values(True) # noqa: FBT003
+ .with_file_extension(".tsv")
+ )
+ assert options.has_header is False
+ assert options.delimiter == "|"
+ assert options.quote == "'"
+ assert options.schema_infer_max_records == 2000
+ assert options.truncated_rows is True
+ assert options.newlines_in_values is True
+ assert options.file_extension == ".tsv"
+
+
[email protected](
+ ("as_read", "global_ctx"),
+ [
+ (True, True),
+ (True, False),
+ (False, False),
+ ],
+)
+def test_read_csv_with_options(tmp_path, as_read, global_ctx):
+ """Test reading CSV with CsvReadOptions."""
+ from datafusion import CsvReadOptions, SessionContext
+
+ # Create a test CSV file
+ csv_path = tmp_path / "test.csv"
+ csv_content = "name;age;city\nAlice;30;New York\nBob;25\n#Charlie;35;Paris"
+ csv_path.write_text(csv_content)
+
+ ctx = SessionContext()
+
+ # Test with CsvReadOptions
+ options = CsvReadOptions(
+ has_header=True, delimiter=";", comment="#", truncated_rows=True
+ )
Review Comment:
Added unit tests to cover all parameters. Good suggestion!
##########
docs/source/user-guide/io/csv.rst:
##########
@@ -36,3 +36,22 @@ An alternative is to use
:py:func:`~datafusion.context.SessionContext.register_c
ctx.register_csv("file", "file.csv")
df = ctx.table("file")
+
+If you require additional control over how to read the CSV file, you can use
+:py:class:`~datafusion.options.CsvReadOptions` to set a variety of options.
+
+.. code-block:: python
+
Review Comment:
Added
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]