This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e279a7e  ARROW-8213: [Python][Dataset] Opening a dataset with a local 
incorrect path gives confusing error message
e279a7e is described below

commit e279a7e06e61c14868ca7d71dea795420aea6539
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Tue Apr 7 19:47:55 2020 -0500

    ARROW-8213: [Python][Dataset] Opening a dataset with a local incorrect path 
gives confusing error message
    
    Workaround until it is properly handled in the C++ implementation.
    
    Closes #6854 from kszucs/ARROW-8213
    
    Lead-authored-by: Krisztián Szűcs <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 python/pyarrow/dataset.py            | 22 ++++++++++++++++------
 python/pyarrow/tests/test_dataset.py |  7 +++++++
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/python/pyarrow/dataset.py b/python/pyarrow/dataset.py
index fff11be..daacf2b 100644
--- a/python/pyarrow/dataset.py
+++ b/python/pyarrow/dataset.py
@@ -159,15 +159,24 @@ def _ensure_fs(filesystem, path):
         FileSystem, LocalFileSystem, FileType, _normalize_path)
 
     if filesystem is None:
-        # first check if the file exists as a local (relative) file path
+        # First check if the file exists as a local (relative) file path
         filesystem = LocalFileSystem()
         try:
             infos = filesystem.get_file_info([path])[0]
         except OSError:
-            return FileSystem.from_uri(path)
-
-        if infos.type == FileType.NotFound:
-            return FileSystem.from_uri(path)
+            local_path_exists = False
+        else:
+            local_path_exists = (infos.type != FileType.NotFound)
+
+        if not local_path_exists:
+            # Perhaps it's a URI?
+            try:
+                return FileSystem.from_uri(path)
+            except ValueError as e:
+                if "empty scheme" not in str(e):
+                    raise
+                # ARROW-8213: not a URI, assume local path
+                # to get a nice error message.
 
     # ensure we have a proper path (eg no backslashes on Windows)
     path = _normalize_path(filesystem, path)
@@ -179,7 +188,8 @@ def _ensure_fs_and_paths(path, filesystem=None):
     # Return filesystem and list of string paths or FileSelector
     from pyarrow.fs import FileType, FileSelector
 
-    filesystem, path = _ensure_fs(filesystem, _stringify_path(path))
+    path = _stringify_path(path)
+    filesystem, path = _ensure_fs(filesystem, path)
     infos = filesystem.get_file_info([path])[0]
     if infos.type == FileType.Directory:
         # for directory, pass a selector
diff --git a/python/pyarrow/tests/test_dataset.py 
b/python/pyarrow/tests/test_dataset.py
index dc28512..9bb0b91 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -1042,6 +1042,13 @@ def 
test_open_dataset_from_source_additional_kwargs(multisourcefs):
         ds.dataset(child, format="parquet")
 
 
+def test_open_dataset_non_existing_file():
+    # ARROW-8213: Opening a dataset with a local incorrect path gives confusing
+    #             error message
+    with pytest.raises(FileNotFoundError):
+        ds.dataset('i-am-not-existing.parquet', format='parquet')
+
+
 @pytest.mark.parquet
 @pytest.mark.s3
 def test_open_dataset_from_uri_s3(s3_connection, s3_server):

Reply via email to