This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 8d79664d Support viewfs scheme along side with hdfs (#777)
8d79664d is described below
commit 8d79664d3a6010a92468bfbee1a55283591d7800
Author: Yothin M <[email protected]>
AuthorDate: Fri May 31 15:00:53 2024 +0700
Support viewfs scheme along side with hdfs (#777)
---
pyiceberg/io/__init__.py | 1 +
pyiceberg/io/pyarrow.py | 6 +++---
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 1a78f306..9143cf66 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -284,6 +284,7 @@ SCHEMA_TO_FILE_IO: Dict[str, List[str]] = {
"gs": [ARROW_FILE_IO],
"file": [ARROW_FILE_IO, FSSPEC_FILE_IO],
"hdfs": [ARROW_FILE_IO],
+ "viewfs": [ARROW_FILE_IO],
"abfs": [FSSPEC_FILE_IO],
"abfss": [FSSPEC_FILE_IO],
}
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 9216c37f..04f30ec6 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -332,7 +332,7 @@ class PyArrowFileIO(FileIO):
uri = urlparse(location)
if not uri.scheme:
return "file", uri.netloc, os.path.abspath(location)
- elif uri.scheme == "hdfs":
+ elif uri.scheme in ("hdfs", "viewfs"):
return uri.scheme, uri.netloc, uri.path
else:
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
@@ -356,12 +356,12 @@ class PyArrowFileIO(FileIO):
client_kwargs["connect_timeout"] = float(connect_timeout)
return S3FileSystem(**client_kwargs)
- elif scheme == "hdfs":
+ elif scheme in ("hdfs", "viewfs"):
from pyarrow.fs import HadoopFileSystem
hdfs_kwargs: Dict[str, Any] = {}
if netloc:
- return HadoopFileSystem.from_uri(f"hdfs://{netloc}")
+ return HadoopFileSystem.from_uri(f"{scheme}://{netloc}")
if host := self.properties.get(HDFS_HOST):
hdfs_kwargs["host"] = host
if port := self.properties.get(HDFS_PORT):