rdblue commented on a change in pull request #3234: URL: https://github.com/apache/iceberg/pull/3234#discussion_r729900449
########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,824 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from decimal import Decimal +from enum import Enum, unique +import uuid + + +@unique +class TypeID(Enum): + BOOLEAN = {"java_class": "Boolean.class", "python_class": bool, "id": 1} + INTEGER = {"java_class": "Integer.class", "python_class": int, "id": 2} + LONG = {"java_class": "Long.class", "python_class": int, "id": 3} + FLOAT = {"java_class": "Float.class", "python_class": float, "id": 4} + DOUBLE = {"java_class": "Double.class", "python_class": float, "id": 5} + DATE = {"java_class": "Integer.class", "python_class": int, "id": 6} + TIME = {"java_class": "Long.class", "python_class": int, "id": 7} + TIMESTAMP = {"java_class": "Long.class", "python_class": int, "id": 8} + STRING = {"java_class": "CharSequence.class", "python_class": str, "id": 9} + UUID = {"java_class": "java.util.UUID.class", "python_class": uuid.UUID, "id": 10} + FIXED = {"java_class": "ByteBuffer.class", "python_class": bytes, "id": 11} + BINARY = {"java_class": "ByteBuffer.class", "python_class": bytearray, "id": 12} + DECIMAL = {"java_class": "BigDecimal.class", "python_class": Decimal, "id": 13} + STRUCT = {"java_class": "Void.class", "python_class": None, "id": 14} + LIST = {"java_class": "Void.class", "python_class": None, "id": 15} + MAP = {"java_class": "Void.class", "python_class": None, "id": 16} + + +class Type(object): + length: int + scale: int + precision: int + + def __init__(self): + pass + + def type_id(self): + pass + + def is_primitive_type(self): + return False + + def as_primitive_type(self): + raise ValueError("Not a primitive type: " + self) + + def as_struct_type(self): + raise ValueError("Not a struct type: " + self) + + def as_list_type(self): + raise ValueError("Not a list type: " + self) + + def asMapType(self): + raise ValueError("Not a map type: " + self) + + def is_nested_type(self): + return False + + def is_struct_type(self): + return False + + def is_list_type(self): + return False + + def is_map_type(self): + return False + + def as_nested_type(self): + raise ValueError("Not a nested type: " + self) + + +class PrimitiveType(Type): + + def __eq__(self, other): + return type(self) == type(other) + + def __ne__(self, other): + return not self.__eq__(other) Review comment: Are you sure? This sounds like what you're doing here: > For __ne__(), by default it delegates to __eq__() and inverts the result unless it is NotImplemented. ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) Review comment: The repr string should be something that you can paste into Python to re-create the object, so this should produce `FixedType({length})`. That is, it should use parens instead of square brackets. ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) + self._length = length + + +class DecimalType(Type): + def __init__(self, precision: int, scale: int): + super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True) + self._precision = precision + self._scale = scale + + def precision(self): Review comment: +1 for `@property` and annotations. ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): Review comment: I think this needs a `@property` method for `length`. ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) + self._length = length + + +class DecimalType(Type): + def __init__(self, precision: int, scale: int): + super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True) + self._precision = precision + self._scale = scale + + def precision(self): + return self._precision + + def scale(self): + return self._scale + + +class NestedField(object): + def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc=None): + self._is_optional = is_optional + self._id = field_id + self._name = name + self._type = field_type + self._doc = doc + + @property + def is_optional(self): + return self._is_optional + + @property + def is_required(self): + return not self._is_optional + + @property + def field_id(self): + return self._id + + @property + def type(self): + return self._type + + def __repr__(self): Review comment: The implementation here should be the one for `__str__`. The `__repr__` implementation should produce a string that is the Python representation. ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) + self._length = length + + +class DecimalType(Type): + def __init__(self, precision: int, scale: int): + super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True) + self._precision = precision + self._scale = scale + + def precision(self): + return self._precision + + def scale(self): + return self._scale + + +class NestedField(object): + def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc=None): + self._is_optional = is_optional + self._id = field_id + self._name = name + self._type = field_type + self._doc = doc + + @property + def is_optional(self): + return self._is_optional + + @property + def is_required(self): + return not self._is_optional + + @property + def field_id(self): + return self._id + + @property + def type(self): + return self._type + + def __repr__(self): + return f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}" \ + "" if self._doc is None else f" ({self._doc})" + + def __str__(self): + return self.__repr__() + + +class StructType(Type): + def __init__(self, fields: list): + super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType<{', '.join(map(str, fields))}>") + self._fields = fields Review comment: Accessor method for fields? ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) + self._length = length + + +class DecimalType(Type): + def __init__(self, precision: int, scale: int): + super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True) + self._precision = precision + self._scale = scale + + def precision(self): + return self._precision + + def scale(self): + return self._scale + + +class NestedField(object): + def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc=None): + self._is_optional = is_optional + self._id = field_id + self._name = name + self._type = field_type + self._doc = doc + + @property + def is_optional(self): + return self._is_optional + + @property + def is_required(self): + return not self._is_optional + + @property + def field_id(self): + return self._id + + @property + def type(self): + return self._type + + def __repr__(self): + return f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}" \ + "" if self._doc is None else f" ({self._doc})" + + def __str__(self): + return self.__repr__() + + +class StructType(Type): + def __init__(self, fields: list): + super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType<{', '.join(map(str, fields))}>") + self._fields = fields + + +class ListType(Type): + def __init__(self, element_field: NestedField): + super().__init__(f"list<{element_field.type}>", f"ListType<{element_field.type}>") + self._element_field = element_field Review comment: Accessor method for element? ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) + self._length = length + + +class DecimalType(Type): + def __init__(self, precision: int, scale: int): + super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True) + self._precision = precision + self._scale = scale + + def precision(self): + return self._precision + + def scale(self): + return self._scale + + +class NestedField(object): + def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc=None): + self._is_optional = is_optional + self._id = field_id + self._name = name + self._type = field_type + self._doc = doc + + @property + def is_optional(self): + return self._is_optional + + @property + def is_required(self): + return not self._is_optional + + @property + def field_id(self): + return self._id + + @property + def type(self): + return self._type + + def __repr__(self): + return f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}" \ + "" if self._doc is None else f" ({self._doc})" + + def __str__(self): + return self.__repr__() + + +class StructType(Type): + def __init__(self, fields: list): + super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType<{', '.join(map(str, fields))}>") + self._fields = fields + + +class ListType(Type): + def __init__(self, element_field: NestedField): + super().__init__(f"list<{element_field.type}>", f"ListType<{element_field.type}>") + self._element_field = element_field + + +class MapType(Type): + def __init__(self, key_field: NestedField, value_field: NestedField): + super().__init__(f"map<{key_field.type}, {value_field.type}>", + f"MapType<{key_field.type}, {value_field.type}>") + self._key_field = key_field + self._value_field = value_field Review comment: Accessor methods for key and value? ########## File path: python/src/iceberg/types.py ########## @@ -0,0 +1,117 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class Type(object): + def __init__(self, type_string: str, repr_string: str, is_primitive=False): + self._type_string = type_string + self._repr_string = repr_string + self._is_primitive = is_primitive + + def __repr__(self): + return self._repr_string + + def __str__(self): + return self._type_string + + @property + def is_primitive(self): + return self._is_primitive + + +class FixedType(Type): + def __init__(self, length: int): + super().__init__(f"fixed[{length}]", f"FixedType[{length}]", is_primitive=True) + self._length = length + + +class DecimalType(Type): + def __init__(self, precision: int, scale: int): + super().__init__(f"decimal({precision}, {scale})", f"DecimalType({precision}, {scale})", is_primitive=True) + self._precision = precision + self._scale = scale + + def precision(self): + return self._precision + + def scale(self): + return self._scale + + +class NestedField(object): + def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc=None): + self._is_optional = is_optional + self._id = field_id + self._name = name + self._type = field_type + self._doc = doc + + @property + def is_optional(self): + return self._is_optional + + @property + def is_required(self): + return not self._is_optional + + @property + def field_id(self): + return self._id + + @property + def type(self): + return self._type + + def __repr__(self): + return f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}" \ + "" if self._doc is None else f" ({self._doc})" + + def __str__(self): + return self.__repr__() + + +class StructType(Type): + def __init__(self, fields: list): + super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType<{', '.join(map(str, fields))}>") + self._fields = fields Review comment: The main thing is that fields should be accessible without going directly to `_fields` because convention says that is private. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
