This is an automated email from the ASF dual-hosted git repository. kszucs pushed a commit to branch release-8.0.0 in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 64d11b48258ac95af753405bb273e3fd5cde523e Author: Joris Van den Bossche <[email protected]> AuthorDate: Tue May 3 13:50:55 2022 +0200 ARROW-16442: [Python][Dataset] Fix fragments of ORC Dataset to use FileFragment class Closes #13052 from jorisvandenbossche/ARROW-16442 Authored-by: Joris Van den Bossche <[email protected]> Signed-off-by: Krisztián Szűcs <[email protected]> --- python/pyarrow/_dataset.pyx | 1 + python/pyarrow/tests/test_dataset.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/python/pyarrow/_dataset.pyx b/python/pyarrow/_dataset.pyx index 0abb28c879..9cc93e4e7f 100644 --- a/python/pyarrow/_dataset.pyx +++ b/python/pyarrow/_dataset.pyx @@ -840,6 +840,7 @@ cdef class Fragment(_Weakrefable): # corresponding subclasses of FileFragment 'ipc': FileFragment, 'csv': FileFragment, + 'orc': FileFragment, 'parquet': _get_parquet_symbol('ParquetFileFragment'), } diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py index b8e15c597f..44769b4ec0 100644 --- a/python/pyarrow/tests/test_dataset.py +++ b/python/pyarrow/tests/test_dataset.py @@ -2880,6 +2880,8 @@ def test_orc_format(tempdir, dataset_reader): orc.write_table(table, path) dataset = ds.dataset(path, format=ds.OrcFileFormat()) + fragments = list(dataset.get_fragments()) + assert isinstance(fragments[0], ds.FileFragment) result = dataset_reader.to_table(dataset) result.validate(full=True) assert result.equals(table)
