rdblue commented on a change in pull request #3952:
URL: https://github.com/apache/iceberg/pull/3952#discussion_r790079138
##########
File path: python/src/iceberg/types.py
##########
@@ -230,133 +191,260 @@ class LongType(Type):
min: int = -9223372036854775808
- def __init__(self):
- super().__init__("long", "LongType()", is_primitive=True)
+ def __init__(self, value: Optional[int] = None):
+ super().__init__(value)
-class FloatType(Type):
+class Float(PrimitiveType):
"""A Float data type in Iceberg can be represented using an instance of
this class. Floats in Iceberg are
32-bit IEEE 754 floating points and can be promoted to Doubles.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = FloatType()
- >>> isinstance(column_foo, FloatType)
+ >>> column_foo = Float()
+ >>> isinstance(column_foo, Float)
True
+ >>> Float(3.14)
+ Float(value=3.14)
+
"""
- def __init__(self):
- super().__init__("float", "FloatType()", is_primitive=True)
+ def __init__(self, value: Optional[float] = None):
+ super().__init__(value)
-class DoubleType(Type):
+class Double(PrimitiveType):
"""A Double data type in Iceberg can be represented using an instance of
this class. Doubles in Iceberg are
64-bit IEEE 754 floating points.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = DoubleType()
- >>> isinstance(column_foo, DoubleType)
+ >>> column_foo = Double()
+ >>> isinstance(column_foo, Double)
True
+ >>> Double(3.14)
+ Double(value=3.14)
+
"""
- def __init__(self):
- super().__init__("double", "DoubleType()", is_primitive=True)
+ def __init__(self, value: Optional[float] = None):
+ super().__init__(value)
-class DateType(Type):
+class Date(PrimitiveType):
"""A Date data type in Iceberg can be represented using an instance of
this class. Dates in Iceberg are
calendar dates without a timezone or time.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = DateType()
- >>> isinstance(column_foo, DateType)
+ >>> column_foo = Date()
+ >>> isinstance(column_foo, Date)
True
+ >>> Date("2017-11-16")
+ Date(value="2017-11-16")
"""
- def __init__(self):
- super().__init__("date", "DateType()", is_primitive=True)
+ def __init__(self, value: Optional[str] = None):
+ super().__init__(value)
-class TimeType(Type):
+class Time(PrimitiveType):
"""A Time data type in Iceberg can be represented using an instance of
this class. Times in Iceberg
have microsecond precision and are a time of day without a date or
timezone.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = TimeType()
- >>> isinstance(column_foo, TimeType)
+ >>> column_foo = Time()
+ >>> isinstance(column_foo, Time)
True
+ >>> Time("14:31:08")
+ Time(value="14:31:08")
"""
- def __init__(self):
- super().__init__("time", "TimeType()", is_primitive=True)
+ def __init__(self, value: Optional[str] = None):
+ super().__init__(value)
-class TimestampType(Type):
+class Timestamp(PrimitiveType):
"""A Timestamp data type in Iceberg can be represented using an instance
of this class. Timestamps in
Iceberg have microsecond precision and include a date and a time of day
without a timezone.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = TimestampType()
- >>> isinstance(column_foo, TimestampType)
+ >>> column_foo = Timestamp()
+ >>> isinstance(column_foo, Timestamp)
True
+ >>> Timestamp("2017-11-16T14:31:08-08:00")
+ Timestamp(value="2017-11-16T14:31:08-08:00")
"""
- def __init__(self):
- super().__init__("timestamp", "TimestampType()", is_primitive=True)
+ def __init__(self, value: Optional[str] = None):
+ super().__init__(value)
-class TimestamptzType(Type):
+class Timestamptz(PrimitiveType):
"""A Timestamptz data type in Iceberg can be represented using an instance
of this class. Timestamptzs in
Iceberg are stored as UTC and include a date and a time of day with a
timezone.
Example:
- >>> column_foo = TimestamptzType()
- >>> isinstance(column_foo, TimestamptzType)
+ >>> column_foo = Timestamptz()
+ >>> isinstance(column_foo, Timestamptz)
True
+ >>> Timestamptz("2017-11-16T14:31:08-08:00")
+ Timestamptz(value="2017-11-16T14:31:08-08:00")
"""
- def __init__(self):
- super().__init__("timestamptz", "TimestamptzType()", is_primitive=True)
+ def __init__(self, value: Optional[str] = None):
+ super().__init__(value)
-class StringType(Type):
+class String(PrimitiveType):
"""A String data type in Iceberg can be represented using an instance of
this class. Strings in
Iceberg are arbitrary-length character sequences and are encoded with
UTF-8.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = StringType()
- >>> isinstance(column_foo, StringType)
+ >>> column_foo = String()
+ >>> isinstance(column_foo, String)
True
+ >>> String("hello")
+ String(value="hello")
"""
- def __init__(self):
- super().__init__("string", "StringType()", is_primitive=True)
+ def __init__(self, value: Optional[str] = None):
+ super().__init__(value)
-class UUIDType(Type):
+class UUID(PrimitiveType):
"""A UUID data type in Iceberg can be represented using an instance of
this class. UUIDs in
Iceberg are universally unique identifiers.
Example:
- >>> column_foo = UUIDType()
- >>> isinstance(column_foo, UUIDType)
+ >>> column_foo = UUID()
+ >>> isinstance(column_foo, UUID)
True
+ >>> UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")
+ UUID(value="f79c3e09-677c-4bbd-a479-3f349cb785e7")
"""
- def __init__(self):
- super().__init__("uuid", "UUIDType()", is_primitive=True)
+ def __init__(self, value: Optional[str] = None):
+ super().__init__(value)
-class BinaryType(Type):
+class Binary(PrimitiveType):
"""A Binary data type in Iceberg can be represented using an instance of
this class. Binarys in
Iceberg are arbitrary-length byte arrays.
+ Args:
+ value: an optional argument for creating a literal without which
instantiation represents just a type
+
Example:
- >>> column_foo = BinaryType()
- >>> isinstance(column_foo, BinaryType)
+ >>> column_foo = Binary()
+ >>> isinstance(column_foo, Binary)
True
+ >>> Binary(b'hello'))
+
"""
- def __init__(self):
- super().__init__("binary", "BinaryType()", is_primitive=True)
+ def __init__(self, value: Optional[bytes] = None):
+ super().__init__(value)
+
+
+class NestedField(object):
Review comment:
Rearranging the classes appears to have introduced a lot more changes
than needed. We ended up with primitives being diffed against the old nested
types, and we don't get to see changes for these classes. I think making this a
two-step change would allow us to see what changed much more easily. Could you
move these back to the top, or move them and then base the PR on master after
the move?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]