[
https://issues.apache.org/jira/browse/ARROW-2085?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16351537#comment-16351537
]
ASF GitHub Bot commented on ARROW-2085:
---------------------------------------
wesm closed pull request #1552: ARROW-2085: [Python]
HadoopFileSystem.isdir/.isfile return False on missing paths
URL: https://github.com/apache/arrow/pull/1552
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/python/pyarrow/io-hdfs.pxi b/python/pyarrow/io-hdfs.pxi
index 83b14b687..dc6ba23ab 100644
--- a/python/pyarrow/io-hdfs.pxi
+++ b/python/pyarrow/io-hdfs.pxi
@@ -137,12 +137,18 @@ cdef class HadoopFileSystem:
def isdir(self, path):
cdef HdfsPathInfo info
- self._path_info(path, &info)
+ try:
+ self._path_info(path, &info)
+ except ArrowIOError:
+ return False
return info.kind == ObjectType_DIRECTORY
def isfile(self, path):
cdef HdfsPathInfo info
- self._path_info(path, &info)
+ try:
+ self._path_info(path, &info)
+ except ArrowIOError:
+ return False
return info.kind == ObjectType_FILE
def get_capacity(self):
diff --git a/python/pyarrow/tests/test_hdfs.py
b/python/pyarrow/tests/test_hdfs.py
index b62458cd7..885272ba8 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -36,10 +36,10 @@
def hdfs_test_client(driver='libhdfs'):
- host = os.environ.get('ARROW_HDFS_TEST_HOST', 'localhost')
+ host = os.environ.get('ARROW_HDFS_TEST_HOST', 'default')
user = os.environ.get('ARROW_HDFS_TEST_USER', None)
try:
- port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 20500))
+ port = int(os.environ.get('ARROW_HDFS_TEST_PORT', 0))
except ValueError:
raise ValueError('Env variable ARROW_HDFS_TEST_PORT was not '
'an integer')
@@ -162,6 +162,27 @@ def test_info(self):
assert file_path_info['kind'] == 'file'
assert file_path_info['size'] == len(data)
+ def test_exists_isdir_isfile(self):
+ dir_path = pjoin(self.tmp_path, 'info-base')
+ file_path = pjoin(dir_path, 'ex')
+ missing_path = pjoin(dir_path, 'this-path-is-missing')
+
+ self.hdfs.mkdir(dir_path)
+ with self.hdfs.open(file_path, 'wb') as f:
+ f.write(b'foobarbaz')
+
+ assert self.hdfs.exists(dir_path)
+ assert self.hdfs.exists(file_path)
+ assert not self.hdfs.exists(missing_path)
+
+ assert self.hdfs.isdir(dir_path)
+ assert not self.hdfs.isdir(file_path)
+ assert not self.hdfs.isdir(missing_path)
+
+ assert not self.hdfs.isfile(dir_path)
+ assert self.hdfs.isfile(file_path)
+ assert not self.hdfs.isfile(missing_path)
+
def test_disk_usage(self):
path = pjoin(self.tmp_path, 'disk-usage-base')
p1 = pjoin(path, 'p1')
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> HadoopFileSystem.isdir and .isfile should return False if the path doesn't
> exist
> ---------------------------------------------------------------------------------
>
> Key: ARROW-2085
> URL: https://issues.apache.org/jira/browse/ARROW-2085
> Project: Apache Arrow
> Issue Type: Bug
> Components: Python
> Reporter: Jim Crist
> Priority: Major
> Labels: pull-request-available
> Fix For: 0.9.0
>
>
> Per the python standard library, `isdir` and `isfile` should return `False`
> if the given path doesn't exist. Currently these methods error on missing
> paths.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)