jorisvandenbossche commented on PR #13033:
URL: https://github.com/apache/arrow/pull/13033#issuecomment-1116094657
Hmm, the AppVeyor failure is actually not unrelated at the moment:
```
____________________ test_parquet_dataset_factory_fsspec
_____________________
tempdir =
WindowsPath('C:/Users/appveyor/AppData/Local/Temp/1/pytest-of-appveyor/pytest-0/test_parquet_dataset_factory_f0')
@pytest.mark.parquet
def test_parquet_dataset_factory_fsspec(tempdir):
# https://issues.apache.org/jira/browse/ARROW-16413
fsspec = pytest.importorskip("fsspec")
# create dataset with pyarrow
root_path = tempdir / "test_parquet_dataset"
metadata_path, table = _create_parquet_dataset_simple(root_path)
# read using fsspec filesystem
fsspec_fs = fsspec.filesystem("file")
# manually creating a PyFileSystem, because passing the local fsspec
# filesystem would internally be converted to native LocalFileSystem
filesystem = fs.PyFileSystem(fs.FSSpecHandler(fsspec_fs))
dataset = ds.parquet_dataset(metadata_path, filesystem=filesystem)
assert dataset.schema.equals(table.schema)
assert len(dataset.files) == 4
> result = dataset.to_table()
pyarrow\tests\test_dataset.py:3140:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _
pyarrow\_dataset.pyx:304: in pyarrow._dataset.Dataset.to_table
return self.scanner(**kwargs).to_table()
pyarrow\_dataset.pyx:2549: in pyarrow._dataset.Scanner.to_table
return pyarrow_wrap_table(GetResultValue(result))
pyarrow\error.pxi:144: in pyarrow.lib.pyarrow_internal_check_status
return check_status(status)
pyarrow\_fs.pyx:1190: in pyarrow._fs._cb_open_input_file
stream = handler.open_input_file(frombytes(path))
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _
self = <pyarrow.fs.FSSpecHandler object at 0x000002CA4A397D70>
path = 'f07c005726c84dc69f13ce79116d3304-0.parquet'
def open_input_file(self, path):
from pyarrow import PythonFile
if not self.fs.isfile(path):
> raise FileNotFoundError(path)
E FileNotFoundError: f07c005726c84dc69f13ce79116d3304-0.parquet
pyarrow\fs.py:400: FileNotFoundError
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]