cccs-eric commented on a change in pull request #3988:
URL: https://github.com/apache/iceberg/pull/3988#discussion_r796527010
##########
File path: python_legacy/iceberg/api/types/conversions.py
##########
@@ -53,48 +54,45 @@ class Conversions(object):
&
0xFFFFFFFFFFFFFFFF),
TypeID.FIXED: lambda type_id, value: value,
TypeID.BINARY: lambda type_id, value: value,
- # TypeId.DECIMAL: lambda type_var, value:
struct.pack(value.quantize(
- # Decimal('.' + "".join(['0' for x in range(0,
type_var.scale)]) + '1'))
+ TypeID.DECIMAL: decimal_to_bytes
}
- from_byte_buff_mapping = {TypeID.BOOLEAN: lambda type_id, value:
struct.unpack('<?', value)[0] != 0,
- TypeID.INTEGER: lambda type_id, value:
struct.unpack('<i', value)[0],
- TypeID.DATE: lambda type_id, value:
struct.unpack('<i', value)[0],
- TypeID.LONG: lambda type_id, value:
struct.unpack('<q', value)[0],
- TypeID.TIME: lambda type_id, value:
struct.unpack('<q', value)[0],
- TypeID.TIMESTAMP: lambda type_id, value:
struct.unpack('<q', value)[0],
- TypeID.FLOAT: lambda type_id, value:
struct.unpack('<f', value)[0],
- TypeID.DOUBLE: lambda type_id, value:
struct.unpack('<d', value)[0],
- TypeID.STRING: lambda type_id, value:
bytes(value).decode("utf-8"),
- TypeID.UUID: lambda type_id, value:
+ from_byte_buff_mapping = {TypeID.BOOLEAN: lambda type_var, value:
struct.unpack('<?', value)[0] != 0,
+ TypeID.INTEGER: lambda type_var, value:
struct.unpack('<i', value)[0],
+ TypeID.DATE: lambda type_var, value:
struct.unpack('<i', value)[0],
+ TypeID.LONG: lambda type_var, value:
struct.unpack('<q', value)[0],
+ TypeID.TIME: lambda type_var, value:
struct.unpack('<q', value)[0],
+ TypeID.TIMESTAMP: lambda type_var, value:
struct.unpack('<q', value)[0],
+ TypeID.FLOAT: lambda type_var, value:
struct.unpack('<f', value)[0],
+ TypeID.DOUBLE: lambda type_var, value:
struct.unpack('<d', value)[0],
+ TypeID.STRING: lambda type_var, value:
bytes(value).decode("utf-8"),
+ TypeID.UUID: lambda type_var, value:
uuid.UUID(int=struct.unpack('>QQ', value)[0] <<
64 | struct.unpack('>QQ', value)[1]),
- TypeID.FIXED: lambda type_id, value: value,
- TypeID.BINARY: lambda type_id, value: value}
+ TypeID.FIXED: lambda type_var, value: value,
+ TypeID.BINARY: lambda type_var, value: value,
+ TypeID.DECIMAL: lambda type_var, value:
Decimal(int.from_bytes(value, 'big', signed=True) * 10**-type_var.scale)
+ }
@staticmethod
def from_partition_string(type_var, as_string):
if as_string is None or Conversions.HIVE_NULL == as_string:
return None
part_func = Conversions.value_mapping.get(type_var.type_id)
if part_func is None:
- raise RuntimeError("Unsupported type for fromPartitionString: %s"
% type_var)
+ raise RuntimeError(f"Unsupported type for from_partition_string:
{type_var}")
return part_func(as_string)
@staticmethod
def to_byte_buffer(type_id, value):
Review comment:
@jun-he Even though I fully agree, what would look like a simple change
becomes very invasive. I've looked into it and it's all good except for this
[line](https://github.com/apache/iceberg/blob/master/python_legacy/iceberg/api/expressions/literals.py#L140):
`self.byte_buffer = Conversions.to_byte_buffer(self.type_id, self.value)`
`BaseLiteral` instances are built using a `type_id` and it becomes tricky to
use a `type_var` for literal types like `TimestampType`, `DecimalType` and
`FixedType` that do not offer a `get()` method. Because you guys are
refactoring the Python library, I would make that change there is leave
python_legacy as-is. What do you think? Am I over-complexifying the task at
hand?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]