rdblue commented on a change in pull request #4016:
URL: https://github.com/apache/iceberg/pull/4016#discussion_r807048847
##########
File path: python/src/iceberg/types.py
##########
@@ -57,30 +57,65 @@ def __str__(self):
@property
def is_primitive(self) -> bool:
- return self._is_primitive
+ return isinstance(self, PrimitiveType)
+
+
+class PrimitiveType(IcebergType):
+ """Base class for all Iceberg Primitive Types"""
+
+ _instances = {} # type: ignore
+
+ def __new__(cls):
+ cls._instances[cls] = cls._instances.get(cls) or object.__new__(cls)
+ return cls._instances[cls]
+
+
+class FixedType(PrimitiveType):
+ """A fixed data type in Iceberg.
+
+ Example:
+ >>> FixedType(8)
+ FixedType(length=8)
+ >>> FixedType(8)==FixedType(8)
+ True
+ """
+
+ _instances: Dict[int, "FixedType"] = {}
+ def __new__(cls, length: int):
+ cls._instances[length] = cls._instances.get(length) or
object.__new__(cls)
+ return cls._instances[length]
-class FixedType(Type):
def __init__(self, length: int):
- super().__init__(f"fixed[{length}]", f"FixedType(length={length})",
is_primitive=True)
+ super().__init__(f"fixed[{length}]", f"FixedType(length={length})")
self._length = length
@property
def length(self) -> int:
return self._length
- def __eq__(self, other):
- if type(self) is type(other):
- return self.length == other.length
- return False
+class DecimalType(PrimitiveType):
+ """A fixed data type in Iceberg.
+
+ Example:
+ >>> DecimalType(32, 3)
+ DecimalType(precision=32, scale=3)
+ >>> DecimalType(8, 3)==DecimalType(8, 3)
+ True
+ """
+
+ _instances: Dict[Tuple[int, int], "DecimalType"] = {}
+
+ def __new__(cls, precision: int, scale: int):
+ key = precision, scale
+ cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
Review comment:
Okay, I understand. So Python is still going to call `__init__` after
this method on the object that this returns.
##########
File path: python/src/iceberg/types.py
##########
@@ -57,30 +57,65 @@ def __str__(self):
@property
def is_primitive(self) -> bool:
- return self._is_primitive
+ return isinstance(self, PrimitiveType)
+
+
+class PrimitiveType(IcebergType):
+ """Base class for all Iceberg Primitive Types"""
+
+ _instances = {} # type: ignore
+
+ def __new__(cls):
+ cls._instances[cls] = cls._instances.get(cls) or object.__new__(cls)
+ return cls._instances[cls]
+
+
+class FixedType(PrimitiveType):
+ """A fixed data type in Iceberg.
+
+ Example:
+ >>> FixedType(8)
+ FixedType(length=8)
+ >>> FixedType(8)==FixedType(8)
+ True
+ """
+
+ _instances: Dict[int, "FixedType"] = {}
+ def __new__(cls, length: int):
+ cls._instances[length] = cls._instances.get(length) or
object.__new__(cls)
+ return cls._instances[length]
-class FixedType(Type):
def __init__(self, length: int):
- super().__init__(f"fixed[{length}]", f"FixedType(length={length})",
is_primitive=True)
+ super().__init__(f"fixed[{length}]", f"FixedType(length={length})")
self._length = length
@property
def length(self) -> int:
return self._length
- def __eq__(self, other):
- if type(self) is type(other):
- return self.length == other.length
- return False
+class DecimalType(PrimitiveType):
+ """A fixed data type in Iceberg.
+
+ Example:
+ >>> DecimalType(32, 3)
+ DecimalType(precision=32, scale=3)
+ >>> DecimalType(8, 3)==DecimalType(8, 3)
+ True
+ """
+
+ _instances: Dict[Tuple[int, int], "DecimalType"] = {}
+
+ def __new__(cls, precision: int, scale: int):
+ key = precision, scale
+ cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
Review comment:
Okay, I understand. So Python is still going to call `__init__` after
this method on the object that this returns?
Is it a concern that `__init__` is called every time this is returned?
##########
File path: python/src/iceberg/types.py
##########
@@ -93,21 +121,37 @@ def precision(self) -> int:
def scale(self) -> int:
return self._scale
- def __eq__(self, other):
- if type(self) is type(other):
- return self.precision == other.precision and self.scale ==
other.scale
- return False
+class NestedField(IcebergType):
+ """This represents a field of a struct, a map key, a map value, or a list
element. This is where field IDs, names, docs, and nullability are tracked."""
Review comment:
This doc string should be wrapped at 130 characters, right?
##########
File path: python/src/iceberg/types.py
##########
@@ -135,75 +179,122 @@ def doc(self) -> Optional[str]:
return self._doc
@property
- def type(self) -> Type:
+ def type(self) -> IcebergType:
return self._type
- def __repr__(self):
- return (
- f"NestedField(is_optional={self._is_optional},
field_id={self._id}, "
- f"name={repr(self._name)}, field_type={repr(self._type)},
doc={repr(self._doc)})"
- )
- def __str__(self):
- return (
- f"{self._id}: {self._name}: {'optional' if self._is_optional else
'required'} {self._type}" ""
- if self._doc is None
- else f" ({self._doc})"
+class StructType(IcebergType):
+ """A struct type in Iceberg
+
+ Example:
+ >>> StructType(
+ NestedField(True, 1, "required_field", StringType()),
+ NestedField(False, 2, "optional_field", IntegerType())
)
+ """
- def __eq__(self, other):
- if type(self) is type(other):
- return (
- self.is_optional == other.is_optional
- and self.field_id == other.field_id
- and self.name == other.name
- and self.doc == other.doc
- and self.type == other.type
- )
- return False
+ _instances: Dict[Tuple[NestedField, ...], "StructType"] = {}
+ def __new__(cls, *fields: NestedField):
+ cls._instances[fields] = cls._instances.get(fields) or
object.__new__(cls)
+ return cls._instances[fields]
-class StructType(Type):
- def __init__(self, fields: list):
+ def __init__(self, *fields: NestedField):
super().__init__(
f"struct<{', '.join(map(str, fields))}>",
- f"StructType(fields={repr(fields)})",
+ f"StructType{repr(fields)}",
)
self._fields = fields
@property
- def fields(self) -> list:
+ def fields(self) -> Tuple[NestedField, ...]:
return self._fields
- def __eq__(self, other):
- if type(self) is type(other):
- return self.fields == other.fields
- return False
+class ListType(IcebergType):
+ """A list type in Iceberg
+
+ Example:
+ >>> ListType(element_id=3, element_type=StringType(),
element_is_optional=True)
+ ListType(element=NestedField(is_optional=True, field_id=3,
name='element', field_type=StringType(), doc=None))
+ """
+
+ _instances: Dict[Tuple[bool, int, IcebergType], "ListType"] = {}
+
+ def __new__(
+ cls,
+ element_id: int,
+ element_type: IcebergType,
+ element_is_optional: bool,
+ ):
+ key = (element_is_optional, element_id, element_type)
+ cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
+ return cls._instances[key]
-class ListType(Type):
- def __init__(self, element: NestedField):
- super().__init__(f"list<{element.type}>",
f"ListType(element={repr(element)})")
- self._element_field = element
+ def __init__(
+ self,
+ element_id: int,
+ element_type: IcebergType,
+ element_is_optional: bool,
+ ):
+ super().__init__(
+ f"list<{element_type}>",
+ f"ListType(element_is_optional={element_is_optional},
element_id={element_id}, "
+ f"element_type={repr(element_type)})",
+ )
+ self._element_field = NestedField(
+ name="element",
+ is_optional=element_is_optional,
+ field_id=element_id,
+ field_type=element_type,
+ )
@property
def element(self) -> NestedField:
return self._element_field
- def __eq__(self, other):
- if type(self) is type(other):
- return self.element == other.element
- return False
+class MapType(IcebergType):
+ """A map type in Iceberg
+
+ Example:
+ >>> MapType(key_id=1, key_type=StringType(), value_id=2,
value_type=IntegerType(), value_is_optional=True)
+ MapType(key=NestedField(is_optional=False, field_id=1, name='key',
field_type=StringType(), doc=None), value=NestedField(is_optional=True,
field_id=2, name='value', field_type=IntegerType(), doc=None))
Review comment:
Looks like this also needs to be wrapped to the line length.
##########
File path: python/src/iceberg/types.py
##########
@@ -135,75 +179,122 @@ def doc(self) -> Optional[str]:
return self._doc
@property
- def type(self) -> Type:
+ def type(self) -> IcebergType:
return self._type
- def __repr__(self):
- return (
- f"NestedField(is_optional={self._is_optional},
field_id={self._id}, "
- f"name={repr(self._name)}, field_type={repr(self._type)},
doc={repr(self._doc)})"
- )
- def __str__(self):
- return (
- f"{self._id}: {self._name}: {'optional' if self._is_optional else
'required'} {self._type}" ""
- if self._doc is None
- else f" ({self._doc})"
+class StructType(IcebergType):
+ """A struct type in Iceberg
+
+ Example:
+ >>> StructType(
+ NestedField(True, 1, "required_field", StringType()),
+ NestedField(False, 2, "optional_field", IntegerType())
)
+ """
- def __eq__(self, other):
- if type(self) is type(other):
- return (
- self.is_optional == other.is_optional
- and self.field_id == other.field_id
- and self.name == other.name
- and self.doc == other.doc
- and self.type == other.type
- )
- return False
+ _instances: Dict[Tuple[NestedField, ...], "StructType"] = {}
+ def __new__(cls, *fields: NestedField):
+ cls._instances[fields] = cls._instances.get(fields) or
object.__new__(cls)
+ return cls._instances[fields]
-class StructType(Type):
- def __init__(self, fields: list):
+ def __init__(self, *fields: NestedField):
super().__init__(
f"struct<{', '.join(map(str, fields))}>",
- f"StructType(fields={repr(fields)})",
+ f"StructType{repr(fields)}",
)
self._fields = fields
@property
- def fields(self) -> list:
+ def fields(self) -> Tuple[NestedField, ...]:
return self._fields
- def __eq__(self, other):
- if type(self) is type(other):
- return self.fields == other.fields
- return False
+class ListType(IcebergType):
+ """A list type in Iceberg
+
+ Example:
+ >>> ListType(element_id=3, element_type=StringType(),
element_is_optional=True)
+ ListType(element=NestedField(is_optional=True, field_id=3,
name='element', field_type=StringType(), doc=None))
+ """
+
+ _instances: Dict[Tuple[bool, int, IcebergType], "ListType"] = {}
+
+ def __new__(
+ cls,
+ element_id: int,
+ element_type: IcebergType,
+ element_is_optional: bool,
+ ):
+ key = (element_is_optional, element_id, element_type)
+ cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
+ return cls._instances[key]
-class ListType(Type):
- def __init__(self, element: NestedField):
- super().__init__(f"list<{element.type}>",
f"ListType(element={repr(element)})")
- self._element_field = element
+ def __init__(
+ self,
+ element_id: int,
+ element_type: IcebergType,
+ element_is_optional: bool,
+ ):
+ super().__init__(
+ f"list<{element_type}>",
+ f"ListType(element_is_optional={element_is_optional},
element_id={element_id}, "
Review comment:
Minor: It would be nice if the argument order were consistent. This is
slightly misleading because `element_is_optional` is last if you're passing by
position.
##########
File path: python/src/iceberg/types.py
##########
@@ -93,21 +121,37 @@ def precision(self) -> int:
def scale(self) -> int:
return self._scale
- def __eq__(self, other):
- if type(self) is type(other):
- return self.precision == other.precision and self.scale ==
other.scale
- return False
+class NestedField(IcebergType):
+ """This represents a field of a struct, a map key, a map value, or a list
element. This is where field IDs, names, docs, and nullability are tracked."""
+
+ _instances: Dict[Tuple[bool, int, str, IcebergType, Optional[str]],
"NestedField"] = {}
+
+ def __new__(
+ cls,
+ is_optional: bool,
Review comment:
The optional boolean is at the end for map and list types. What about
adding this after `doc` and defaulting it to `True`?
##########
File path: python/src/iceberg/types.py
##########
@@ -93,21 +121,37 @@ def precision(self) -> int:
def scale(self) -> int:
return self._scale
- def __eq__(self, other):
- if type(self) is type(other):
- return self.precision == other.precision and self.scale ==
other.scale
- return False
+class NestedField(IcebergType):
+ """This represents a field of a struct, a map key, a map value, or a list
element. This is where field IDs, names, docs, and nullability are tracked."""
+
+ _instances: Dict[Tuple[bool, int, str, IcebergType, Optional[str]],
"NestedField"] = {}
+
+ def __new__(
Review comment:
Testing this out in ipython, I missed a couple args and got this error
message:
```
__new__() missing 2 required positional arguments: 'name' and 'field_type'
```
Is there a way to improve that? Or is that helpful enough?
##########
File path: python/src/iceberg/types.py
##########
@@ -135,75 +179,122 @@ def doc(self) -> Optional[str]:
return self._doc
@property
- def type(self) -> Type:
+ def type(self) -> IcebergType:
return self._type
- def __repr__(self):
- return (
- f"NestedField(is_optional={self._is_optional},
field_id={self._id}, "
- f"name={repr(self._name)}, field_type={repr(self._type)},
doc={repr(self._doc)})"
- )
- def __str__(self):
- return (
- f"{self._id}: {self._name}: {'optional' if self._is_optional else
'required'} {self._type}" ""
- if self._doc is None
- else f" ({self._doc})"
+class StructType(IcebergType):
+ """A struct type in Iceberg
+
+ Example:
+ >>> StructType(
+ NestedField(True, 1, "required_field", StringType()),
+ NestedField(False, 2, "optional_field", IntegerType())
)
+ """
- def __eq__(self, other):
- if type(self) is type(other):
- return (
- self.is_optional == other.is_optional
- and self.field_id == other.field_id
- and self.name == other.name
- and self.doc == other.doc
- and self.type == other.type
- )
- return False
+ _instances: Dict[Tuple[NestedField, ...], "StructType"] = {}
+ def __new__(cls, *fields: NestedField):
+ cls._instances[fields] = cls._instances.get(fields) or
object.__new__(cls)
+ return cls._instances[fields]
-class StructType(Type):
- def __init__(self, fields: list):
+ def __init__(self, *fields: NestedField):
super().__init__(
f"struct<{', '.join(map(str, fields))}>",
- f"StructType(fields={repr(fields)})",
+ f"StructType{repr(fields)}",
)
self._fields = fields
@property
- def fields(self) -> list:
+ def fields(self) -> Tuple[NestedField, ...]:
return self._fields
- def __eq__(self, other):
- if type(self) is type(other):
- return self.fields == other.fields
- return False
+class ListType(IcebergType):
+ """A list type in Iceberg
+
+ Example:
+ >>> ListType(element_id=3, element_type=StringType(),
element_is_optional=True)
+ ListType(element=NestedField(is_optional=True, field_id=3,
name='element', field_type=StringType(), doc=None))
+ """
+
+ _instances: Dict[Tuple[bool, int, IcebergType], "ListType"] = {}
+
+ def __new__(
+ cls,
+ element_id: int,
+ element_type: IcebergType,
+ element_is_optional: bool,
Review comment:
Can we default element_is_optional to True? Same with value_is_optional
for maps.
##########
File path: python/src/iceberg/types.py
##########
@@ -93,21 +121,37 @@ def precision(self) -> int:
def scale(self) -> int:
return self._scale
- def __eq__(self, other):
- if type(self) is type(other):
- return self.precision == other.precision and self.scale ==
other.scale
- return False
+class NestedField(IcebergType):
+ """This represents a field of a struct, a map key, a map value, or a list
element. This is where field IDs, names, docs, and nullability are tracked."""
+
+ _instances: Dict[Tuple[bool, int, str, IcebergType, Optional[str]],
"NestedField"] = {}
+
+ def __new__(
+ cls,
+ is_optional: bool,
+ field_id: int,
+ name: str,
+ field_type: IcebergType,
+ doc: Optional[str] = None,
+ ):
+ key = (is_optional, field_id, name, field_type, doc)
+ cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
+ return cls._instances[key]
-class NestedField:
def __init__(
self,
is_optional: bool,
field_id: int,
name: str,
- field_type: Type,
+ field_type: IcebergType,
doc: Optional[str] = None,
):
+ super().__init__(
+ (f"{field_id}: {name}: {'optional' if is_optional else 'required'}
{field_type}" "" if doc is None else f" ({doc})"),
+ f"NestedField(is_optional={is_optional}, field_id={field_id}, "
+ f"name={repr(name)}, field_type={repr(field_type)},
doc={repr(doc)})",
Review comment:
Can we omit doc if it is None? In the repr string, I see `doc=None`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]