This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 7ba65eb9 Bug Fix: Return the parsed path without netloc for HDFS (#675)
7ba65eb9 is described below
commit 7ba65eb936297edd0a7d996d577ede42eee2b0eb
Author: frankliee <[email protected]>
AuthorDate: Tue Apr 30 20:48:52 2024 +0800
Bug Fix: Return the parsed path without netloc for HDFS (#675)
---
pyiceberg/io/pyarrow.py | 2 +-
tests/io/test_pyarrow.py | 6 +++---
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index f8deb2f9..2a10e0ef 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -333,7 +333,7 @@ class PyArrowFileIO(FileIO):
if not uri.scheme:
return "file", uri.netloc, os.path.abspath(location)
elif uri.scheme == "hdfs":
- return uri.scheme, uri.netloc, location
+ return uri.scheme, uri.netloc, uri.path
else:
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
diff --git a/tests/io/test_pyarrow.py b/tests/io/test_pyarrow.py
index 9e917775..90f5b08b 100644
--- a/tests/io/test_pyarrow.py
+++ b/tests/io/test_pyarrow.py
@@ -1644,9 +1644,9 @@ def test_parse_location() -> None:
assert netloc == expected_netloc
assert uri == expected_uri
- check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs",
"127.0.0.1:9000", "hdfs://127.0.0.1:9000/root/foo.txt")
- check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1",
"hdfs://127.0.0.1/root/foo.txt")
- check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA",
"hdfs://clusterA/root/foo.txt")
+ check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs",
"127.0.0.1:9000", "/root/foo.txt")
+ check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1",
"/root/foo.txt")
+ check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA",
"/root/foo.txt")
check_results("/root/foo.txt", "file", "", "/root/foo.txt")
check_results("/root/tmp/foo.txt", "file", "", "/root/tmp/foo.txt")