lidavidm commented on a change in pull request #10654:
URL: https://github.com/apache/arrow/pull/10654#discussion_r663899010
##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -1057,6 +1057,23 @@ cdef class FileFragment(Fragment):
Fragment.init(self, sp)
self.file_fragment = <CFileFragment*> sp.get()
+ def __repr__(self):
+ type_name = frombytes(self.fragment.type_name())
+ if type_name != "parquet":
+ typ = " type={0}".format(type_name)
+ else:
+ # parquet has a subclass -> type embedded in class name
+ typ = ""
+ partition_dict = _get_partition_keys(self.partition_expression)
+ partition = ", ".join(
+ ["{0}={1}".format(key, val) for key, val in partition_dict.items()]
Review comment:
nit: this might be a little clearer with f-strings over str.format
##########
File path: python/pyarrow/tests/test_dataset.py
##########
@@ -1365,6 +1366,37 @@ def test_fragments_parquet_subset_invalid(tempdir):
fragment.subset()
[email protected]
[email protected]
+def test_fragments_repr(tempdir, dataset):
+ # partitioned parquet dataset
+ fragment = list(dataset.get_fragments())[0]
+ assert (
+ repr(fragment) ==
+ "<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet "
Review comment:
Looks like this needs to be adjusted based on platform.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]