This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new d59e6c85e2 Python: Don't warn on an identifier (#6844)
d59e6c85e2 is described below
commit d59e6c85e216dab2316d52cd45f18fa4896302c0
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Mar 6 08:47:13 2023 +0100
Python: Don't warn on an identifier (#6844)
When we pass in a warehouse identifier, we don't
want to emit a warning. Also, the help message won't
be helpful:
```
No preferred file implementation for scheme:
```
Since there is no scheme.
---
python/pyiceberg/io/__init__.py | 22 ++++++++++++----------
python/tests/io/test_io.py | 9 +++++++++
2 files changed, 21 insertions(+), 10 deletions(-)
diff --git a/python/pyiceberg/io/__init__.py b/python/pyiceberg/io/__init__.py
index 9690e53c67..f1287b4f4a 100644
--- a/python/pyiceberg/io/__init__.py
+++ b/python/pyiceberg/io/__init__.py
@@ -26,6 +26,7 @@ from __future__ import annotations
import importlib
import logging
+import warnings
from abc import ABC, abstractmethod
from io import SEEK_SET
from types import TracebackType
@@ -275,21 +276,22 @@ def _import_file_io(io_impl: str, properties: Properties)
-> Optional[FileIO]:
class_ = getattr(module, class_name)
return class_(properties)
except ModuleNotFoundError:
- logger.warning("Could not initialize FileIO: %s", io_impl)
+ warnings.warn(f"Could not initialize FileIO: {io_impl}")
return None
PY_IO_IMPL = "py-io-impl"
-def _infer_file_io_from_schema(path: str, properties: Properties) ->
Optional[FileIO]:
+def _infer_file_io_from_scheme(path: str, properties: Properties) ->
Optional[FileIO]:
parsed_url = urlparse(path)
- if file_ios := SCHEMA_TO_FILE_IO.get(parsed_url.scheme):
- for file_io_path in file_ios:
- if file_io := _import_file_io(file_io_path, properties):
- return file_io
- else:
- logger.warning("No preferred file implementation for schema: %s",
parsed_url.scheme)
+ if parsed_url.scheme:
+ if file_ios := SCHEMA_TO_FILE_IO.get(parsed_url.scheme):
+ for file_io_path in file_ios:
+ if file_io := _import_file_io(file_io_path, properties):
+ return file_io
+ else:
+ warnings.warn(f"No preferred file implementation for scheme:
{parsed_url.scheme}")
return None
@@ -304,12 +306,12 @@ def load_file_io(properties: Properties = EMPTY_DICT,
location: Optional[str] =
# Check the table location
if location:
- if file_io := _infer_file_io_from_schema(location, properties):
+ if file_io := _infer_file_io_from_scheme(location, properties):
return file_io
# Look at the schema of the warehouse
if warehouse_location := properties.get(WAREHOUSE):
- if file_io := _infer_file_io_from_schema(warehouse_location,
properties):
+ if file_io := _infer_file_io_from_scheme(warehouse_location,
properties):
return file_io
try:
diff --git a/python/tests/io/test_io.py b/python/tests/io/test_io.py
index c872ae1c7c..c4dc3d45a5 100644
--- a/python/tests/io/test_io.py
+++ b/python/tests/io/test_io.py
@@ -24,6 +24,7 @@ from pyiceberg.io import (
ARROW_FILE_IO,
PY_IO_IMPL,
_import_file_io,
+ _infer_file_io_from_scheme,
load_file_io,
)
from pyiceberg.io.pyarrow import PyArrowFileIO
@@ -302,3 +303,11 @@ def test_mock_table_location_file_io() -> None:
def test_gibberish_table_location_file_io() -> None:
# For testing the selection logic
assert isinstance(load_file_io({}, "gibberish"), PyArrowFileIO)
+
+
+def test_infer_file_io_from_schema_unknown() -> None:
+ # When we have an unknown scheme, we would like to know
+ with pytest.warns(UserWarning) as w:
+ _infer_file_io_from_scheme("unknown://bucket/path/", {})
+
+ assert str(w[0].message) == "No preferred file implementation for scheme:
unknown"