This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 7ba65eb9 Bug Fix: Return the parsed path without netloc for HDFS (#675)
7ba65eb9 is described below

commit 7ba65eb936297edd0a7d996d577ede42eee2b0eb
Author: frankliee <[email protected]>
AuthorDate: Tue Apr 30 20:48:52 2024 +0800

    Bug Fix: Return the parsed path without netloc for HDFS (#675)
---
 pyiceberg/io/pyarrow.py  | 2 +-
 tests/io/test_pyarrow.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index f8deb2f9..2a10e0ef 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -333,7 +333,7 @@ class PyArrowFileIO(FileIO):
         if not uri.scheme:
             return "file", uri.netloc, os.path.abspath(location)
         elif uri.scheme == "hdfs":
-            return uri.scheme, uri.netloc, location
+            return uri.scheme, uri.netloc, uri.path
         else:
             return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
 
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index 9e917775..90f5b08b 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -1644,9 +1644,9 @@ def test_parse_location() -> None:
         assert netloc == expected_netloc
         assert uri == expected_uri
 
-    check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs", 
"127.0.0.1:9000", "hdfs://127.0.0.1:9000/root/foo.txt")
-    check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1", 
"hdfs://127.0.0.1/root/foo.txt")
-    check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA", 
"hdfs://clusterA/root/foo.txt")
+    check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs", 
"127.0.0.1:9000", "/root/foo.txt")
+    check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1", 
"/root/foo.txt")
+    check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA", 
"/root/foo.txt")
 
     check_results("/root/foo.txt", "file", "", "/root/foo.txt")
     check_results("/root/tmp/foo.txt", "file", "", "/root/tmp/foo.txt")

Reply via email to