rdblue commented on code in PR #4318:
URL: https://github.com/apache/iceberg/pull/4318#discussion_r842119531


##########
python/src/iceberg/schema.py:
##########
@@ -0,0 +1,448 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import sys
+from abc import ABC, abstractmethod
+from typing import Dict, Generic, Iterable, List, TypeVar
+
+if sys.version_info >= (3, 8):
+    from functools import singledispatch  # pragma: no cover
+else:
+    from singledispatch import singledispatch  # pragma: no cover
+
+from iceberg.types import (
+    IcebergType,
+    ListType,
+    MapType,
+    NestedField,
+    PrimitiveType,
+    StructType,
+)
+
+T = TypeVar("T")
+
+
+class Schema:
+    """A table Schema
+
+    Example:
+        >>> from iceberg import schema
+        >>> from iceberg import types
+    """
+
+    def __init__(self, *columns: Iterable[NestedField], schema_id: int, 
identifier_field_ids: List[int] = []):
+        self._struct = StructType(*columns)  # type: ignore
+        self._schema_id = schema_id
+        self._identifier_field_ids = identifier_field_ids
+        self._name_to_id: Dict[str, int] = index_by_name(self)
+        self._name_to_id_lower: Dict[str, int] = {}  # Should be accessed 
through self._lazy_name_to_id_lower()
+        self._id_to_field: Dict[int, NestedField] = {}  # Should be accessed 
through self._lazy_id_to_field()
+
+    def __str__(self):
+        return "table {\n" + "\n".join(["  " + str(field) for field in 
self.columns]) + "\n}"
+
+    def __repr__(self):
+        return (
+            f"Schema(fields={repr(self.columns)}, schema_id={self.schema_id}, 
identifier_field_ids={self.identifier_field_ids})"
+        )
+
+    @property
+    def columns(self) -> Iterable[NestedField]:
+        """A list of the top-level fields in the underlying struct"""
+        return self._struct.fields
+
+    @property
+    def schema_id(self) -> int:
+        """The ID of this Schema"""
+        return self._schema_id
+
+    @property
+    def identifier_field_ids(self) -> List[int]:
+        return self._identifier_field_ids
+
+    def _lazy_id_to_field(self) -> Dict[int, NestedField]:
+        """Returns an index of field ID to NestedField instance
+
+        This property is calculated once when called for the first time. 
Subsequent calls to this property will use a cached index.
+        """
+        if not self._id_to_field:
+            self._id_to_field = index_by_id(self)
+        return self._id_to_field
+
+    def _lazy_name_to_id_lower(self) -> Dict[str, int]:
+        """Returns an index of lower-case field names to field IDs
+
+        This property is calculated once when called for the first time. 
Subsequent calls to this property will use a cached index.
+        """
+        if not self._name_to_id_lower:
+            self._name_to_id_lower = {name.lower(): field_id for name, 
field_id in self._name_to_id.items()}
+        return self._name_to_id_lower
+
+    def as_struct(self) -> StructType:
+        """Returns the underlying struct"""
+        return self._struct
+
+    def find_field(self, name_or_id: str | int, case_sensitive: bool = True) 
-> NestedField:
+        """Find a field using a field name or field ID
+
+        Args:
+            name_or_id (str | int): Either a field name or a field ID
+            case_sensitive (bool, optional): Whether to peform a 
case-sensitive lookup using a field name. Defaults to True.
+
+        Returns:
+            NestedField: The matched NestedField
+        """
+        if isinstance(name_or_id, int):
+            field = self._lazy_id_to_field().get(name_or_id)
+            return field  # type: ignore
+        if case_sensitive:
+            field_id = self._name_to_id.get(name_or_id)
+        else:
+            field_id = self._lazy_name_to_id_lower().get(name_or_id.lower())
+        return self._lazy_id_to_field().get(field_id)  # type: ignore
+
+    def find_type(self, name_or_id: str | int, case_sensitive: bool = True) -> 
IcebergType:
+        """Find a field type using a field name or field ID
+
+        Args:
+            name_or_id (str | int): Either a field name or a field ID
+            case_sensitive (bool, optional): Whether to peform a 
case-sensitive lookup using a field name. Defaults to True.
+
+        Returns:
+            NestedField: The type of the matched NestedField
+        """
+        field = self.find_field(name_or_id=name_or_id, 
case_sensitive=case_sensitive)
+        return field.type  # type: ignore
+
+    def find_column_name(self, column_id: int) -> str:
+        """Find a column name given a column ID
+
+        Args:
+            column_id (int): The ID of the column
+
+        Raises:
+            ValueError: If no column name can be found for the given column ID
+
+        Returns:
+            str: The column name
+        """
+        column = self._lazy_id_to_field().get(column_id)
+        return None if column is None else column.name  # type: ignore

Review Comment:
   This actually needs to return the full name, not the field name. In Java, 
this uses the same index visitor as the name to ID, but it produces the `byId` 
map.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to