This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new fe0a2378 Make REST catalog namespace separator configurable (#2826)
fe0a2378 is described below
commit fe0a2378933973f440851c56a9df7c10bc4c3148
Author: Alex Stephen <[email protected]>
AuthorDate: Thu Jan 22 09:11:37 2026 -0800
Make REST catalog namespace separator configurable (#2826)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
Closes #1183
This adds a configurable namespace separator in the REST Catalog.
# Rationale for this change
Certain implementations expect a different namespace separator.
## Are these changes tested?
Tests included.
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/catalog/rest/__init__.py | 46 +++++++++++++++++++++++++++++---------
tests/catalog/test_rest.py | 25 +++++++++++++++++++++
tests/integration/test_catalog.py | 34 ++++++++++++++++++++++++++++
3 files changed, 95 insertions(+), 10 deletions(-)
diff --git a/pyiceberg/catalog/rest/__init__.py
b/pyiceberg/catalog/rest/__init__.py
index d7ef6ec8..e3cc09ed 100644
--- a/pyiceberg/catalog/rest/__init__.py
+++ b/pyiceberg/catalog/rest/__init__.py
@@ -21,6 +21,7 @@ from typing import (
Any,
Union,
)
+from urllib.parse import quote, unquote
from pydantic import ConfigDict, Field, TypeAdapter, field_validator
from requests import HTTPError, Session
@@ -234,7 +235,8 @@ REST_SCAN_PLANNING_ENABLED_DEFAULT = False
VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"
VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False
-NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
+NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator"
+DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
def _retry_hook(retry_state: RetryCallState) -> None:
@@ -330,6 +332,7 @@ class RestCatalog(Catalog):
_session: Session
_auth_manager: AuthManager | None
_supported_endpoints: set[Endpoint]
+ _namespace_separator: str
def __init__(self, name: str, **properties: str):
"""Rest Catalog.
@@ -596,6 +599,16 @@ class RestCatalog(Catalog):
return optional_oauth_param
+ def _encode_namespace_path(self, namespace: Identifier) -> str:
+ """
+ Encode a namespace for use as a path parameter in a URL.
+
+ Each part of the namespace is URL-encoded using `urllib.parse.quote`
+ (ensuring characters like '/' are encoded) and then joined by the
+ configured namespace separator.
+ """
+ return self._namespace_separator.join(quote(part, safe="") for part in
namespace)
+
def _fetch_config(self) -> None:
params = {}
if warehouse_location := self.properties.get(WAREHOUSE_LOCATION):
@@ -628,6 +641,11 @@ class RestCatalog(Catalog):
if property_as_bool(self.properties, VIEW_ENDPOINTS_SUPPORTED,
VIEW_ENDPOINTS_SUPPORTED_DEFAULT):
self._supported_endpoints.update(VIEW_ENDPOINTS)
+ separator_from_properties =
self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR)
+ if not separator_from_properties:
+ raise ValueError("Namespace separator cannot be an empty string")
+ self._namespace_separator = unquote(separator_from_properties)
+
def _identifier_to_validated_tuple(self, identifier: str | Identifier) ->
Identifier:
identifier_tuple = self.identifier_to_tuple(identifier)
if len(identifier_tuple) <= 1:
@@ -638,10 +656,17 @@ class RestCatalog(Catalog):
self, identifier: str | Identifier | TableIdentifier, kind:
IdentifierKind = IdentifierKind.TABLE
) -> Properties:
if isinstance(identifier, TableIdentifier):
- return {"namespace":
NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value:
identifier.name}
+ return {
+ "namespace":
self._encode_namespace_path(tuple(identifier.namespace.root)),
+ kind.value: quote(identifier.name, safe=""),
+ }
identifier_tuple = self._identifier_to_validated_tuple(identifier)
- return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]),
kind.value: identifier_tuple[-1]}
+ # Use quote to ensure that '/' aren't treated as path separators.
+ return {
+ "namespace": self._encode_namespace_path(identifier_tuple[:-1]),
+ kind.value: quote(identifier_tuple[-1], safe=""),
+ }
def _split_identifier_for_json(self, identifier: str | Identifier) ->
dict[str, Identifier | str]:
identifier_tuple = self._identifier_to_validated_tuple(identifier)
@@ -864,7 +889,7 @@ class RestCatalog(Catalog):
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
self._check_endpoint(Capability.V1_LIST_TABLES)
namespace_tuple = self._check_valid_namespace_identifier(namespace)
- namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
+ namespace_concat = self._encode_namespace_path(namespace_tuple)
response = self._session.get(self.url(Endpoints.list_tables,
namespace=namespace_concat))
try:
response.raise_for_status()
@@ -950,7 +975,7 @@ class RestCatalog(Catalog):
if Capability.V1_LIST_VIEWS not in self._supported_endpoints:
return []
namespace_tuple = self._check_valid_namespace_identifier(namespace)
- namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
+ namespace_concat = self._encode_namespace_path(namespace_tuple)
response = self._session.get(self.url(Endpoints.list_views,
namespace=namespace_concat))
try:
response.raise_for_status()
@@ -1020,7 +1045,7 @@ class RestCatalog(Catalog):
def drop_namespace(self, namespace: str | Identifier) -> None:
self._check_endpoint(Capability.V1_DELETE_NAMESPACE)
namespace_tuple = self._check_valid_namespace_identifier(namespace)
- namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+ namespace = self._encode_namespace_path(namespace_tuple)
response = self._session.delete(self.url(Endpoints.drop_namespace,
namespace=namespace))
try:
response.raise_for_status()
@@ -1033,7 +1058,7 @@ class RestCatalog(Catalog):
namespace_tuple = self.identifier_to_tuple(namespace)
response = self._session.get(
self.url(
-
f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}"
+
f"{Endpoints.list_namespaces}?parent={self._encode_namespace_path(namespace_tuple)}"
if namespace_tuple
else Endpoints.list_namespaces
),
@@ -1049,7 +1074,7 @@ class RestCatalog(Catalog):
def load_namespace_properties(self, namespace: str | Identifier) ->
Properties:
self._check_endpoint(Capability.V1_LOAD_NAMESPACE)
namespace_tuple = self._check_valid_namespace_identifier(namespace)
- namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+ namespace = self._encode_namespace_path(namespace_tuple)
response =
self._session.get(self.url(Endpoints.load_namespace_metadata,
namespace=namespace))
try:
response.raise_for_status()
@@ -1064,7 +1089,7 @@ class RestCatalog(Catalog):
) -> PropertiesUpdateSummary:
self._check_endpoint(Capability.V1_UPDATE_NAMESPACE)
namespace_tuple = self._check_valid_namespace_identifier(namespace)
- namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+ namespace = self._encode_namespace_path(namespace_tuple)
payload = {"removals": list(removals or []), "updates": updates}
response =
self._session.post(self.url(Endpoints.update_namespace_properties,
namespace=namespace), json=payload)
try:
@@ -1081,7 +1106,8 @@ class RestCatalog(Catalog):
@retry(**_RETRY_ARGS)
def namespace_exists(self, namespace: str | Identifier) -> bool:
namespace_tuple = self._check_valid_namespace_identifier(namespace)
- namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+ namespace = self._encode_namespace_path(namespace_tuple)
+
# fallback in order to work with older rest catalog implementations
if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints:
try:
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 2e3b0d50..71341c8b 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -1991,6 +1991,31 @@ def test_rest_catalog_with_google_credentials_path(
assert actual_headers["Authorization"] == expected_auth_header
+def test_custom_namespace_separator(rest_mock: Mocker) -> None:
+ custom_separator = "-"
+ namespace_part1 = "some"
+ namespace_part2 = "namespace"
+ # The expected URL path segment should use the literal custom_separator
+ expected_url_path_segment =
f"{namespace_part1}{custom_separator}{namespace_part2}"
+
+ rest_mock.get(
+ f"{TEST_URI}v1/config",
+ json={"defaults": {}, "overrides": {}},
+ status_code=200,
+ )
+ rest_mock.get(
+ f"{TEST_URI}v1/namespaces/{expected_url_path_segment}",
+ json={"namespace": [namespace_part1, namespace_part2], "properties":
{"prop": "yes"}},
+ status_code=200,
+ request_headers=TEST_HEADERS,
+ )
+
+ catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN,
**{"namespace-separator": custom_separator})
+ catalog.load_namespace_properties((namespace_part1, namespace_part2))
+
+ assert rest_mock.last_request.url ==
f"{TEST_URI}v1/namespaces/{expected_url_path_segment}"
+
+
@pytest.mark.filterwarnings(
"ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client
is missing the OAuth2 server URI:DeprecationWarning"
)
diff --git a/tests/integration/test_catalog.py
b/tests/integration/test_catalog.py
index 0c776665..fb702625 100644
--- a/tests/integration/test_catalog.py
+++ b/tests/integration/test_catalog.py
@@ -16,6 +16,7 @@
# under the License.
import os
+import uuid
from collections.abc import Generator
from pathlib import Path, PosixPath
@@ -601,3 +602,36 @@ def test_register_table_existing(test_catalog: Catalog,
table_schema_nested: Sch
# Assert that registering the table again raises TableAlreadyExistsError
with pytest.raises(TableAlreadyExistsError):
test_catalog.register_table(identifier,
metadata_location=table.metadata_location)
+
+
[email protected]
+def test_rest_custom_namespace_separator(rest_catalog: RestCatalog,
table_schema_simple: Schema) -> None:
+ """
+ Tests that the REST catalog correctly picks up the namespace-separator
from the config endpoint.
+ The REST Catalog is configured with a '.' namespace separator.
+ """
+ assert rest_catalog._namespace_separator == "."
+
+ unique_id = uuid.uuid4().hex
+ parent_namespace = (f"test_parent_{unique_id}",)
+ child_namespace_part = "child"
+ full_namespace_tuple = (*parent_namespace, child_namespace_part)
+
+ table_name = "my_table"
+ full_table_identifier_tuple = (*full_namespace_tuple, table_name)
+
+ rest_catalog.create_namespace(namespace=parent_namespace)
+ rest_catalog.create_namespace(namespace=full_namespace_tuple)
+
+ namespaces = rest_catalog.list_namespaces(parent_namespace)
+ assert full_namespace_tuple in namespaces
+
+ # Test with a table
+ table = rest_catalog.create_table(identifier=full_table_identifier_tuple,
schema=table_schema_simple)
+ assert table.name() == full_table_identifier_tuple
+
+ tables = rest_catalog.list_tables(full_namespace_tuple)
+ assert full_table_identifier_tuple in tables
+
+ loaded_table =
rest_catalog.load_table(identifier=full_table_identifier_tuple)
+ assert loaded_table.name() == full_table_identifier_tuple