Fokko commented on code in PR #5030:
URL: https://github.com/apache/iceberg/pull/5030#discussion_r899874718
##########
python/src/iceberg/transforms.py:
##########
@@ -301,6 +301,86 @@ def _(self, value_type: IcebergType, value: int) -> str:
return datetime.to_human_timestamptz(value)
+class TruncateTransform(Transform[S, S]):
+ """A transform for truncating a value to a specified width.
+ Args:
+ source_type (Type): An Iceberg Type of IntegerType, LongType,
StringType, BinaryType or DecimalType
+ width (int): The truncate width
+ Raises:
+ ValueError: If a type is provided that is incompatible with a Truncate
transform
+ """
+
+ def __init__(self, source_type: IcebergType, width: int):
+ super().__init__(
+ f"truncate[{width}]",
+ f"transforms.truncate(source_type={repr(source_type)},
width={width})",
+ )
+ self._type = source_type
+ self._width = width
+
+ @property
+ def width(self):
+ return self._width
+
+ def apply(self, value: Optional[S]) -> Optional[S]:
+ return self._truncate_value(value) if value is not None else None
+
+ @singledispatchmethod
+ def _truncate_value(self, value: S) -> S:
+ raise ValueError(f"Cannot truncate value: {value}")
+
+ @_truncate_value.register(int)
+ def _(self, value):
Review Comment:
```suggestion
def _(self, value: int) -> int:
```
##########
python/src/iceberg/transforms.py:
##########
@@ -301,6 +301,86 @@ def _(self, value_type: IcebergType, value: int) -> str:
return datetime.to_human_timestamptz(value)
+class TruncateTransform(Transform[S, S]):
+ """A transform for truncating a value to a specified width.
+ Args:
+ source_type (Type): An Iceberg Type of IntegerType, LongType,
StringType, BinaryType or DecimalType
+ width (int): The truncate width
+ Raises:
+ ValueError: If a type is provided that is incompatible with a Truncate
transform
+ """
+
+ def __init__(self, source_type: IcebergType, width: int):
+ super().__init__(
+ f"truncate[{width}]",
+ f"transforms.truncate(source_type={repr(source_type)},
width={width})",
+ )
+ self._type = source_type
+ self._width = width
+
+ @property
+ def width(self):
Review Comment:
```suggestion
def width(self) -> int:
```
##########
python/src/iceberg/transforms.py:
##########
@@ -301,6 +301,86 @@ def _(self, value_type: IcebergType, value: int) -> str:
return datetime.to_human_timestamptz(value)
+class TruncateTransform(Transform[S, S]):
+ """A transform for truncating a value to a specified width.
+ Args:
+ source_type (Type): An Iceberg Type of IntegerType, LongType,
StringType, BinaryType or DecimalType
+ width (int): The truncate width
+ Raises:
+ ValueError: If a type is provided that is incompatible with a Truncate
transform
+ """
+
+ def __init__(self, source_type: IcebergType, width: int):
+ super().__init__(
+ f"truncate[{width}]",
+ f"transforms.truncate(source_type={repr(source_type)},
width={width})",
+ )
+ self._type = source_type
+ self._width = width
+
+ @property
+ def width(self):
+ return self._width
+
+ def apply(self, value: Optional[S]) -> Optional[S]:
+ return self._truncate_value(value) if value is not None else None
+
+ @singledispatchmethod
+ def _truncate_value(self, value: S) -> S:
+ raise ValueError(f"Cannot truncate value: {value}")
+
+ @_truncate_value.register(int)
+ def _(self, value):
+ """Truncate a given int value into a given width if feasible."""
+ if type(self._type) in {IntegerType, LongType}:
+ return value - value % self._width
+ else:
+ raise ValueError(f"Cannot truncate type: {self._type} for value:
{value}")
+
+ @_truncate_value.register(str)
+ def _(self, value):
+ """Truncate a given string to a given width."""
+ return value[0 : min(self._width, len(value))]
+
+ @_truncate_value.register(bytes)
+ def _(self, value):
Review Comment:
```suggestion
def _(self, value: bytes) -> bytes:
```
##########
python/src/iceberg/transforms.py:
##########
@@ -301,6 +301,86 @@ def _(self, value_type: IcebergType, value: int) -> str:
return datetime.to_human_timestamptz(value)
+class TruncateTransform(Transform[S, S]):
+ """A transform for truncating a value to a specified width.
+ Args:
+ source_type (Type): An Iceberg Type of IntegerType, LongType,
StringType, BinaryType or DecimalType
+ width (int): The truncate width
+ Raises:
+ ValueError: If a type is provided that is incompatible with a Truncate
transform
+ """
+
+ def __init__(self, source_type: IcebergType, width: int):
+ super().__init__(
+ f"truncate[{width}]",
+ f"transforms.truncate(source_type={repr(source_type)},
width={width})",
+ )
+ self._type = source_type
+ self._width = width
+
+ @property
+ def width(self):
+ return self._width
+
+ def apply(self, value: Optional[S]) -> Optional[S]:
+ return self._truncate_value(value) if value is not None else None
+
+ @singledispatchmethod
+ def _truncate_value(self, value: S) -> S:
+ raise ValueError(f"Cannot truncate value: {value}")
+
+ @_truncate_value.register(int)
+ def _(self, value):
+ """Truncate a given int value into a given width if feasible."""
+ if type(self._type) in {IntegerType, LongType}:
+ return value - value % self._width
+ else:
+ raise ValueError(f"Cannot truncate type: {self._type} for value:
{value}")
+
+ @_truncate_value.register(str)
+ def _(self, value):
+ """Truncate a given string to a given width."""
+ return value[0 : min(self._width, len(value))]
+
+ @_truncate_value.register(bytes)
+ def _(self, value):
+ """Truncate a given binary bytes into a given width."""
+ if isinstance(self._type, BinaryType):
+ return value[0 : min(self._width, len(value))]
+ else:
+ raise ValueError(f"Cannot truncate type: {self._type}")
+
+ @_truncate_value.register(Decimal)
+ def _(self, value):
Review Comment:
```suggestion
def _(self, value: Decimal) -> Decimal:
```
##########
python/src/iceberg/transforms.py:
##########
@@ -301,6 +301,86 @@ def _(self, value_type: IcebergType, value: int) -> str:
return datetime.to_human_timestamptz(value)
+class TruncateTransform(Transform[S, S]):
+ """A transform for truncating a value to a specified width.
+ Args:
+ source_type (Type): An Iceberg Type of IntegerType, LongType,
StringType, BinaryType or DecimalType
+ width (int): The truncate width
+ Raises:
+ ValueError: If a type is provided that is incompatible with a Truncate
transform
+ """
+
+ def __init__(self, source_type: IcebergType, width: int):
+ super().__init__(
+ f"truncate[{width}]",
+ f"transforms.truncate(source_type={repr(source_type)},
width={width})",
+ )
+ self._type = source_type
+ self._width = width
+
+ @property
+ def width(self):
+ return self._width
+
+ def apply(self, value: Optional[S]) -> Optional[S]:
+ return self._truncate_value(value) if value is not None else None
+
+ @singledispatchmethod
+ def _truncate_value(self, value: S) -> S:
+ raise ValueError(f"Cannot truncate value: {value}")
+
+ @_truncate_value.register(int)
+ def _(self, value):
+ """Truncate a given int value into a given width if feasible."""
+ if type(self._type) in {IntegerType, LongType}:
+ return value - value % self._width
+ else:
+ raise ValueError(f"Cannot truncate type: {self._type} for value:
{value}")
+
+ @_truncate_value.register(str)
+ def _(self, value):
Review Comment:
```suggestion
def _(self, value: str) -> str:
```
##########
python/src/iceberg/transforms.py:
##########
@@ -301,6 +301,86 @@ def _(self, value_type: IcebergType, value: int) -> str:
return datetime.to_human_timestamptz(value)
+class TruncateTransform(Transform[S, S]):
+ """A transform for truncating a value to a specified width.
+ Args:
+ source_type (Type): An Iceberg Type of IntegerType, LongType,
StringType, BinaryType or DecimalType
+ width (int): The truncate width
+ Raises:
+ ValueError: If a type is provided that is incompatible with a Truncate
transform
+ """
+
+ def __init__(self, source_type: IcebergType, width: int):
+ super().__init__(
+ f"truncate[{width}]",
+ f"transforms.truncate(source_type={repr(source_type)},
width={width})",
+ )
+ self._type = source_type
+ self._width = width
+
+ @property
+ def width(self):
+ return self._width
+
+ def apply(self, value: Optional[S]) -> Optional[S]:
+ return self._truncate_value(value) if value is not None else None
+
+ @singledispatchmethod
+ def _truncate_value(self, value: S) -> S:
+ raise ValueError(f"Cannot truncate value: {value}")
+
+ @_truncate_value.register(int)
+ def _(self, value):
+ """Truncate a given int value into a given width if feasible."""
+ if type(self._type) in {IntegerType, LongType}:
Review Comment:
Having the validation in processing itself feels a bit weird to me,
shouldn't we check this when initializing the transform?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]