kevinjqliu commented on code in PR #2848: URL: https://github.com/apache/iceberg-python/pull/2848#discussion_r2644568732
########## tests/catalog/test_rest.py: ########## Review Comment: nit: wdyt of adding the `endpoints` in the `rest_mock` [here](https://github.com/apache/iceberg-python/blob/175406457b4b6660bc6b740b1208a715779c1b27/tests/catalog/test_rest.py#L107-L118) that way we dont need to add it to each test. We can modify it when testing specific cases, such as when an older server does not return the view endpoints, or when testing the endpoint response directly ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -76,6 +76,43 @@ import pyarrow as pa +class HttpMethod(str, Enum): + GET = "GET" + HEAD = "HEAD" + POST = "POST" + DELETE = "DELETE" + + +class Endpoint(IcebergBaseModel): + model_config = ConfigDict(frozen=True) + + http_method: HttpMethod = Field() + path: str = Field() + + @field_validator("path", mode="before") + @classmethod + def _validate_path(cls, raw_path: str) -> str: + if not raw_path: + raise ValueError("Invalid path: empty") + raw_path = raw_path.strip() + if not raw_path: + raise ValueError("Invalid path: empty") Review Comment: ```suggestion raw_path = raw_path.strip() if not raw_path: raise ValueError("Invalid path: empty") ``` i think we can just check once here ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -843,6 +998,13 @@ def table_exists(self, identifier: str | Identifier) -> bool: Returns: bool: True if the table exists, False otherwise. """ + if Capability.V1_TABLE_EXISTS not in self._supported_endpoints: Review Comment: same as above, add a note about this special case https://github.com/apache/iceberg/blob/0069c5e09617d99a3ddce33a4a093fe288243eeb/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java#L375-L392 https://github.com/apache/iceberg/blob/0069c5e09617d99a3ddce33a4a093fe288243eeb/api/src/main/java/org/apache/iceberg/catalog/SessionCatalog.java#L180-L187 ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -100,6 +137,66 @@ class Endpoints: fetch_scan_tasks: str = "namespaces/{namespace}/tables/{table}/tasks" +class Capability: Review Comment: nit: maybe we can refactor the `Endpoints` class and consolidate this class ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -76,6 +76,43 @@ import pyarrow as pa +class HttpMethod(str, Enum): + GET = "GET" + HEAD = "HEAD" + POST = "POST" + DELETE = "DELETE" + + +class Endpoint(IcebergBaseModel): + model_config = ConfigDict(frozen=True) + + http_method: HttpMethod = Field() + path: str = Field() + + @field_validator("path", mode="before") + @classmethod + def _validate_path(cls, raw_path: str) -> str: + if not raw_path: + raise ValueError("Invalid path: empty") + raw_path = raw_path.strip() + if not raw_path: + raise ValueError("Invalid path: empty") + return raw_path + + def __str__(self) -> str: + """Return the string representation of the Endpoint class.""" + return f"{self.http_method.value} {self.path}" + + @classmethod + def from_string(cls, endpoint: str | None) -> "Endpoint": Review Comment: ```suggestion def from_string(cls, endpoint: str) -> "Endpoint": ``` can we enforce that `endpoint` must be `str`? ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -327,6 +437,18 @@ def url(self, endpoint: str, prefixed: bool = True, **kwargs: Any) -> str: return url + endpoint.format(**kwargs) + def _check_endpoint(self, endpoint: Endpoint) -> None: + """Check if an endpoint is supported by the server. + + Args: + endpoint: The endpoint to check against the set of supported endpoints + + Raises: + NotImplementedError: If the endpoint is not supported. + """ + if endpoint not in self._supported_endpoints: + raise NotImplementedError(f"Server does not support endpoint: {endpoint}") Review Comment: nit: java throws `UnsupportedOperationException` [here](https://github.com/apache/iceberg/blob/0069c5e09617d99a3ddce33a4a093fe288243eeb/core/src/main/java/org/apache/iceberg/rest/Endpoint.java#L139C17-L139C46) ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -76,6 +76,43 @@ import pyarrow as pa +class HttpMethod(str, Enum): + GET = "GET" + HEAD = "HEAD" + POST = "POST" + DELETE = "DELETE" + + +class Endpoint(IcebergBaseModel): + model_config = ConfigDict(frozen=True) + + http_method: HttpMethod = Field() + path: str = Field() + + @field_validator("path", mode="before") + @classmethod + def _validate_path(cls, raw_path: str) -> str: + if not raw_path: + raise ValueError("Invalid path: empty") + raw_path = raw_path.strip() + if not raw_path: + raise ValueError("Invalid path: empty") + return raw_path + + def __str__(self) -> str: + """Return the string representation of the Endpoint class.""" + return f"{self.http_method.value} {self.path}" + + @classmethod + def from_string(cls, endpoint: str | None) -> "Endpoint": + if endpoint is None: + raise ValueError("Invalid endpoint (must consist of 'METHOD /path'): None") + elements = endpoint.split(None, 1) Review Comment: ```suggestion elements = endpoint.strip().split(None, 1) ``` strip leading/trailing whitespace before split, just in case ########## pyiceberg/catalog/rest/__init__.py: ########## @@ -819,6 +966,14 @@ def update_namespace_properties( def namespace_exists(self, namespace: str | Identifier) -> bool: namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace = NAMESPACE_SEPARATOR.join(namespace_tuple) + + if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints: + try: + self.load_namespace_properties(namespace_tuple) + return True + except NoSuchNamespaceError: + return False + Review Comment: took me a while to find this logic 😄 could we add a note similar here similar to java's ``` // fallback in order to work with 1.7.x and older servers ``` https://github.com/apache/iceberg/blob/0069c5e09617d99a3ddce33a4a093fe288243eeb/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java#L656-L673 https://github.com/apache/iceberg/blob/0069c5e09617d99a3ddce33a4a093fe288243eeb/api/src/main/java/org/apache/iceberg/catalog/SessionCatalog.java#L349-L364 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
