AlenkaF commented on code in PR #47397:
URL: https://github.com/apache/arrow/pull/47397#discussion_r2315291406
##########
python/pyarrow/_csv.pyx:
##########
@@ -1108,6 +1148,38 @@ cdef class ConvertOptions(_Weakrefable):
except TypeError:
return False
+ def __repr__(self):
Review Comment:
Here's one idea that might help simplify the current setup (also matching
more closely with existing `__repr__`
[examples](https://github.com/apache/arrow/blob/55be8c011cd9e82707382d97417b0c3a0aabf822/python/pyarrow/_parquet.pyx#L73-L83):
```python
def _repr_base(self):
return (f"""
check_utf8={self.check_utf8}
column_types={self.column_types}
null_values={self.null_values}
true_values={self.true_values}
false_values={self.false_values}
decimal_point={self.decimal_point!r}
strings_can_be_null={self.strings_can_be_null}
quoted_strings_can_be_null={self.quoted_strings_can_be_null}
include_columns={self.include_columns}
include_missing_columns={self.include_missing_columns}
auto_dict_encode={self.auto_dict_encode}
auto_dict_max_cardinality={self.auto_dict_max_cardinality}
timestamp_parsers={self.timestamp_parsers}""")
def __repr__(self):
return (f"{object.__repr__(self)}{self._repr_base()}")
def __str__(self):
return (f"ConvertOptions({self._repr_base()})")
```
<details>
<summary>Output example</summary>
```
<pyarrow._csv.ConvertOptions object at 0x1147ea040>
check_utf8=True
column_types={'a': DataType(null)}
null_values=['N', 'nn']
true_values=['T', 'tt']
false_values=['F', 'ff']
decimal_point='.'
strings_can_be_null=False
quoted_strings_can_be_null=True
include_columns=[]
include_missing_columns=False
auto_dict_encode=False
auto_dict_max_cardinality=999
timestamp_parsers=[<pyarrow._csv._ISO8601 object at 0x10cd360d0>, '%Y-%m-%d']
```
and
```python
ConvertOptions(
check_utf8=True
column_types={'a': DataType(null)}
null_values=['N', 'nn']
true_values=['T', 'tt']
false_values=['F', 'ff']
decimal_point='.'
strings_can_be_null=False
quoted_strings_can_be_null=True
include_columns=[]
include_missing_columns=False
auto_dict_encode=False
auto_dict_max_cardinality=999
timestamp_parsers=[<pyarrow._csv._ISO8601 object at 0x10cd360d0>,
'%Y-%m-%d'])
```
</details>
Something similar to this idea can be done for all the classes (and for the
tests).
##########
python/pyarrow/_csv.pyx:
##########
@@ -585,6 +605,26 @@ cdef class ParseOptions(_Weakrefable):
except TypeError:
return False
+ def __repr__(self):
+ return (f"<pyarrow.csv.ParseOptions("
+ f"delimiter={self.delimiter!r}, "
+ f"quote_char={self.quote_char!r}, "
+ f"double_quote={self.double_quote}, "
+ f"escape_char={self.escape_char!r}, "
+ f"newlines_in_values={self.newlines_in_values}, "
+ f"ignore_empty_lines={self.ignore_empty_lines}, "
+ f"invalid_row_handler={self.invalid_row_handler})>")
+
+ def __str__(self):
+ return (f"ParseOptions("
+ f"delimiter={self.delimiter!r}, "
+ f"quote_char={self.quote_char!r}, "
+ f"double_quote={self.double_quote}, "
+ f"escape_char={self.escape_char!r}, "
+ f"newlines_in_values={self.newlines_in_values}, "
+ f"ignore_empty_lines={self.ignore_empty_lines}, "
+ f"invalid_row_handler={self.invalid_row_handler})")
Review Comment:
Thanks for adding this! One thing that I am thinking now is if
`invalid_row_handler` and also `timestamp_parsers` could be more readable if
not `None`:
```python
timestamp_parsers=[<pyarrow._csv._ISO8601 object at 0x10cd360d0>, '%Y-%m-%d']
```
```python
invalid_row_handler=<pyarrow.tests.test_csv.InvalidRowHandler object at
0x11e828050>
```
Not a blocker, of course, but maybe there's a cleaner way to represent these
in the output - not sure off the top of my head though!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]