This is an automated email from the ASF dual-hosted git repository.
rymurr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new a6fb658 Update to_byte_buff_mapping and from_byte_buff_mapping to be
consistent and add additional tests. (#2672)
a6fb658 is described below
commit a6fb658e95c15158973609acf55b6eed9a6b12a2
Author: jun-he <[email protected]>
AuthorDate: Mon Jun 7 01:02:06 2021 -0700
Update to_byte_buff_mapping and from_byte_buff_mapping to be consistent and
add additional tests. (#2672)
---
python/iceberg/api/types/conversions.py | 42 +++++++++++++++----------------
python/tests/api/test_conversions.py | 44 ++++++++++++++++++++++++++++++---
2 files changed, 61 insertions(+), 25 deletions(-)
diff --git a/python/iceberg/api/types/conversions.py
b/python/iceberg/api/types/conversions.py
index ae92a63..c6dbbf6 100644
--- a/python/iceberg/api/types/conversions.py
+++ b/python/iceberg/api/types/conversions.py
@@ -42,34 +42,34 @@ class Conversions(object):
to_byte_buff_mapping = {TypeID.BOOLEAN: lambda type_id, value:
struct.pack("<h", 1 if value else 0),
TypeID.INTEGER: lambda type_id, value:
struct.pack("<i", value),
TypeID.DATE: lambda type_id, value:
struct.pack("<i", value),
- TypeID.LONG: lambda type_id, value:
struct.pack("<l", value),
- TypeID.TIME: lambda type_id, value:
struct.pack("<l", value),
- TypeID.TIMESTAMP: lambda type_id, value:
struct.pack("<l", value),
+ TypeID.LONG: lambda type_id, value:
struct.pack("<q", value),
+ TypeID.TIME: lambda type_id, value:
struct.pack("<q", value),
+ TypeID.TIMESTAMP: lambda type_id, value:
struct.pack("<q", value),
TypeID.FLOAT: lambda type_id, value:
struct.pack("<f", value),
TypeID.DOUBLE: lambda type_id, value:
struct.pack("<d", value),
TypeID.STRING: lambda type_id, value:
value.encode('UTF-8'),
TypeID.UUID: lambda type_id, value:
struct.pack('>QQ', (value.int >> 64)
&
0xFFFFFFFFFFFFFFFF, value.int
&
0xFFFFFFFFFFFFFFFF),
- # TypeId.FIXED: lambda as_str: None,
- # TypeId.BINARY: lambda as_str: None,
+ TypeID.FIXED: lambda type_id, value: value,
+ TypeID.BINARY: lambda type_id, value: value,
# TypeId.DECIMAL: lambda type_var, value:
struct.pack(value.quantize(
# Decimal('.' + "".join(['0' for x in range(0,
type_var.scale)]) + '1'))
}
- from_byte_buff_mapping = {TypeID.BOOLEAN: lambda type_var, value:
struct.unpack('<h', value)[0] != chr(0),
- TypeID.INTEGER: lambda type_var, value:
struct.unpack('<i', value)[0],
- TypeID.DATE: lambda type_var, value:
struct.unpack('<i', value)[0],
- TypeID.LONG: lambda type_var, value:
struct.unpack('<q', value)[0],
- TypeID.TIME: lambda type_var, value:
struct.unpack('<q', value)[0],
- TypeID.TIMESTAMP: lambda type_var, value:
struct.unpack('<q', value)[0],
- TypeID.FLOAT: lambda type_var, value:
struct.unpack('<f)', value)[0],
- TypeID.DOUBLE: lambda type_var, value:
struct.unpack('<d', value)[0],
- TypeID.STRING: lambda type_var, value:
bytes(value).decode("utf-8"),
- TypeID.UUID: lambda type_var, value:
+ from_byte_buff_mapping = {TypeID.BOOLEAN: lambda type_id, value:
struct.unpack('<h', value)[0] != 0,
+ TypeID.INTEGER: lambda type_id, value:
struct.unpack('<i', value)[0],
+ TypeID.DATE: lambda type_id, value:
struct.unpack('<i', value)[0],
+ TypeID.LONG: lambda type_id, value:
struct.unpack('<q', value)[0],
+ TypeID.TIME: lambda type_id, value:
struct.unpack('<q', value)[0],
+ TypeID.TIMESTAMP: lambda type_id, value:
struct.unpack('<q', value)[0],
+ TypeID.FLOAT: lambda type_id, value:
struct.unpack('<f', value)[0],
+ TypeID.DOUBLE: lambda type_id, value:
struct.unpack('<d', value)[0],
+ TypeID.STRING: lambda type_id, value:
bytes(value).decode("utf-8"),
+ TypeID.UUID: lambda type_id, value:
uuid.UUID(int=struct.unpack('>QQ', value)[0] <<
64 | struct.unpack('>QQ', value)[1]),
- TypeID.FIXED: lambda type_var, value: value,
- TypeID.BINARY: lambda type_var, value: value}
+ TypeID.FIXED: lambda type_id, value: value,
+ TypeID.BINARY: lambda type_id, value: value}
@staticmethod
def from_partition_string(type_var, as_string):
@@ -90,11 +90,11 @@ class Conversions(object):
@staticmethod
def from_byte_buffer(type_var, buffer_var):
- return Conversions.internal_from_byte_buffer(type_var, buffer_var)
+ return Conversions.internal_from_byte_buffer(type_var.type_id,
buffer_var)
@staticmethod
- def internal_from_byte_buffer(type_var, buffer_var):
+ def internal_from_byte_buffer(type_id, buffer_var):
try:
- return
Conversions.from_byte_buff_mapping[type_var.type_id](type_var.type_id,
buffer_var)
+ return Conversions.from_byte_buff_mapping.get(type_id)(type_id,
buffer_var)
except KeyError:
- raise RuntimeError("Cannot Serialize Type: %s" % type_var)
+ raise TypeError("Cannot deserialize Type: %s" % type_id)
diff --git a/python/tests/api/test_conversions.py
b/python/tests/api/test_conversions.py
index 7f6778c..d7397ce 100644
--- a/python/tests/api/test_conversions.py
+++ b/python/tests/api/test_conversions.py
@@ -16,30 +16,66 @@
# under the License.
import unittest
+import uuid
from iceberg.api.expressions import Literal
-from iceberg.api.types import (DateType,
+from iceberg.api.types import (BinaryType,
+ BooleanType,
+ DateType,
DoubleType,
+ FixedType,
+ FloatType,
IntegerType,
LongType,
- StringType)
+ StringType,
+ TimestampType,
+ TimeType,
+ UUIDType)
from iceberg.api.types.conversions import Conversions
class TestConversions(unittest.TestCase):
def test_from_bytes(self):
+ self.assertEqual(False,
Conversions.from_byte_buffer(BooleanType.get(), b'\x00\x00'))
+ self.assertEqual(True, Conversions.from_byte_buffer(BooleanType.get(),
b'\x01\x00'))
self.assertEqual(1234, Conversions.from_byte_buffer(IntegerType.get(),
b'\xd2\x04\x00\x00'))
self.assertEqual(1234, Conversions.from_byte_buffer(LongType.get(),
b'\xd2\x04\x00\x00\x00\x00\x00\x00'))
+ self.assertAlmostEqual(1.2345,
Conversions.from_byte_buffer(FloatType.get(),
+
b'\x19\x04\x9e?'), places=5)
self.assertAlmostEqual(1.2345,
Conversions.from_byte_buffer(DoubleType.get(),
b'\x8d\x97\x6e\x12\x83\xc0\xf3\x3f'))
+ self.assertEqual(1234, Conversions.from_byte_buffer(DateType.get(),
+
b'\xd2\x04\x00\x00'))
+ self.assertEqual(100000000000,
Conversions.from_byte_buffer(TimeType.get(),
+
b'\x00\xe8vH\x17\x00\x00\x00'))
+ self.assertEqual(100000000000,
Conversions.from_byte_buffer(TimestampType.with_timezone(),
+
b'\x00\xe8vH\x17\x00\x00\x00'))
+ self.assertEqual(100000000000,
Conversions.from_byte_buffer(TimestampType.without_timezone(),
+
b'\x00\xe8vH\x17\x00\x00\x00'))
self.assertEqual("foo", Conversions.from_byte_buffer(StringType.get(),
b'foo'))
+ self.assertEqual(uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
+ Conversions.from_byte_buffer(UUIDType.get(),
b'\xf7\x9c>\tg|K\xbd\xa4y?4\x9c\xb7\x85\xe7'))
+ self.assertEqual(b'foo',
Conversions.from_byte_buffer(FixedType.of_length(3), b'foo'))
+ self.assertEqual(b'foo',
Conversions.from_byte_buffer(BinaryType.get(), b'foo'))
def test_to_bytes(self):
self.assertEqual(b'\x00\x00', Literal.of(False).to_byte_buffer())
self.assertEqual(b'\x01\x00', Literal.of(True).to_byte_buffer())
- self.assertEqual(b'foo', Literal.of("foo").to_byte_buffer())
self.assertEqual(b'\xd2\x04\x00\x00',
Literal.of(1234).to_byte_buffer())
- self.assertEqual(b'\xe8\x03\x00\x00',
Literal.of(1000).to(DateType.get()).to_byte_buffer())
+ self.assertEqual(b'\xd2\x04\x00\x00\x00\x00\x00\x00',
Literal.of(1234).to(LongType.get()).to_byte_buffer())
+ self.assertEqual(b'\x19\x04\x9e?', Literal.of(1.2345).to_byte_buffer())
+ self.assertEqual(b'\x8d\x97\x6e\x12\x83\xc0\xf3\x3f',
Literal.of(1.2345).to(DoubleType.get()).to_byte_buffer())
+ self.assertEqual(b'\xd2\x04\x00\x00',
Literal.of(1234).to(DateType.get()).to_byte_buffer())
+ self.assertEqual(b'\x00\xe8vH\x17\x00\x00\x00',
Literal.of(100000000000).to(TimeType.get()).to_byte_buffer())
+ self.assertEqual(b'\x00\xe8vH\x17\x00\x00\x00',
+
Literal.of(100000000000).to(TimestampType.with_timezone()).to_byte_buffer())
+ self.assertEqual(b'\x00\xe8vH\x17\x00\x00\x00',
+
Literal.of(100000000000).to(TimestampType.without_timezone()).to_byte_buffer())
+ self.assertEqual(b'foo', Literal.of("foo").to_byte_buffer())
+ self.assertEqual(b'\xf7\x9c>\tg|K\xbd\xa4y?4\x9c\xb7\x85\xe7',
+
Literal.of(uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")).to_byte_buffer())
+ self.assertEqual(b'foo', Literal.of(bytes(b'foo')).to_byte_buffer())
+ self.assertEqual(b'foo',
Literal.of(bytearray(b'foo')).to_byte_buffer())