kou commented on PR #14052: URL: https://github.com/apache/arrow/pull/14052#issuecomment-1364579136
@jorisvandenbossche `test-conda-python-3.7-hdfs-*` failures seem to related: https://github.com/ursacomputing/crossbow/actions/runs/3765628706/jobs/6401304143#step:5:9344 ```text =================================== FAILURES =================================== _________________ TestLibHdfs.test_read_multiple_parquet_files _________________ self = <pyarrow.tests.test_hdfs.TestLibHdfs testMethod=test_read_multiple_parquet_files> @pytest.mark.pandas @pytest.mark.parquet def test_read_multiple_parquet_files(self): tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid()) self.hdfs.mkdir(tmpdir) expected = self._write_multiple_hdfs_pq_files(tmpdir) > result = self.hdfs.read_parquet(tmpdir) opt/conda/envs/arrow/lib/python3.7/site-packages/pyarrow/tests/test_hdfs.py:318: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ opt/conda/envs/arrow/lib/python3.7/site-packages/pyarrow/filesystem.py:227: in read_parquet filesystem=self) opt/conda/envs/arrow/lib/python3.7/site-packages/pyarrow/parquet/core.py:1759: in __new__ thrift_container_size_limit=thrift_container_size_limit, opt/conda/envs/arrow/lib/python3.7/site-packages/pyarrow/parquet/core.py:2402: in __init__ filesystem, use_mmap=memory_map) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ filesystem = <pyarrow.hdfs.HadoopFileSystem object at 0x7ff3f8702d40> use_mmap = False, allow_legacy_filesystem = False def _ensure_filesystem( filesystem, use_mmap=False, allow_legacy_filesystem=False ): if isinstance(filesystem, FileSystem): return filesystem elif isinstance(filesystem, str): if use_mmap: raise ValueError( "Specifying to use memory mapping not supported for " "filesystem specified as an URI string" ) return _filesystem_from_str(filesystem) # handle fsspec-compatible filesystems try: import fsspec except ImportError: pass else: if isinstance(filesystem, fsspec.AbstractFileSystem): if type(filesystem).__name__ == 'LocalFileSystem': # In case its a simple LocalFileSystem, use native arrow one return LocalFileSystem(use_mmap=use_mmap) return PyFileSystem(FSSpecHandler(filesystem)) # map old filesystems to new ones import pyarrow.filesystem as legacyfs if isinstance(filesystem, legacyfs.LocalFileSystem): return LocalFileSystem(use_mmap=use_mmap) # TODO handle HDFS? if allow_legacy_filesystem and isinstance(filesystem, legacyfs.FileSystem): return filesystem raise TypeError( "Unrecognized filesystem: {}. `filesystem` argument must be a " "FileSystem instance or a valid file system URI'".format( > type(filesystem)) ) E TypeError: Unrecognized filesystem: <class 'pyarrow.hdfs.HadoopFileSystem'>. `filesystem` argument must be a FileSystem instance or a valid file system URI' ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
