This is an automated email from the ASF dual-hosted git repository. isapego pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/ignite-python-thin-client.git
The following commit(s) were added to refs/heads/master by this push: new e0c22ef IGNITE-14059: Fix hashing of complex objects e0c22ef is described below commit e0c22ef3aef39ea8a42ddb6b4495b7bcaa479417 Author: Igor Sapego <igors...@gmail.com> AuthorDate: Tue Feb 2 12:16:43 2021 +0300 IGNITE-14059: Fix hashing of complex objects This closes #5 --- pyignite/utils.py | 35 ++++++++++++++++++++++++++--------- tests/test_binary.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 9 deletions(-) diff --git a/pyignite/utils.py b/pyignite/utils.py index ebe5501..ce00d53 100644 --- a/pyignite/utils.py +++ b/pyignite/utils.py @@ -106,20 +106,37 @@ def unwrap_binary(client: 'Client', wrapped: tuple) -> object: return result -def hashcode(string: Union[str, bytes]) -> int: +def hashcode(data: Union[str, bytes]) -> int: """ Calculate hash code used for identifying objects in Ignite binary API. - :param string: UTF-8-encoded string identifier of binary buffer, + :param data: UTF-8-encoded string identifier of binary buffer or byte array :return: hash code. """ - result = 1 if isinstance(string, (bytes, bytearray)) else 0 - for char in string: - try: - char = ord(char) - except TypeError: - pass - result = int_overflow(31 * result + char) + if isinstance(data, str): + """ + For strings we iterate over code point which are of the int type + and can take up to 4 bytes and can only be positive. + """ + result = 0 + for char in data: + try: + char_val = ord(char) + result = int_overflow(31 * result + char_val) + except TypeError: + pass + else: + """ + For byte array we iterate over bytes which only take 1 byte. But + according to protocol, bytes during hashing should be treated as signed + integer numbers 8 bits long. On other hand elements in Python's `bytes` + are unsigned. For this reason we use ctypes.c_byte() to make them + signed. + """ + result = 1 + for byte in data: + byte = ctypes.c_byte(byte).value + result = int_overflow(31 * result + byte) return result diff --git a/tests/test_binary.py b/tests/test_binary.py index 5190a6a..4c45afb 100644 --- a/tests/test_binary.py +++ b/tests/test_binary.py @@ -304,3 +304,54 @@ def test_complex_object_names(client): obj = cache.get(key) assert obj.type_name == type_name, 'Complex type name mismatch' assert obj.field == data, 'Complex object data failure' + + +def test_complex_object_hash(client): + """ + Test that Python client correctly calculates hash of the binary + object that contains negative bytes. + """ + class Internal( + metaclass=GenericObjectMeta, + type_name='Internal', + schema=OrderedDict([ + ('id', IntObject), + ('str', String), + ]) + ): + pass + + class TestObject( + metaclass=GenericObjectMeta, + type_name='TestObject', + schema=OrderedDict([ + ('id', IntObject), + ('str', String), + ('internal', BinaryObject), + ]) + ): + pass + + obj_ascii = TestObject() + obj_ascii.id = 1 + obj_ascii.str = 'test_string' + + obj_ascii.internal = Internal() + obj_ascii.internal.id = 2 + obj_ascii.internal.str = 'lorem ipsum' + + hash_ascii = BinaryObject.hashcode(obj_ascii, client=client) + + assert hash_ascii == -1314567146, 'Invalid hashcode value for object with ASCII strings' + + obj_utf8 = TestObject() + obj_utf8.id = 1 + obj_utf8.str = 'юникод' + + obj_utf8.internal = Internal() + obj_utf8.internal.id = 2 + obj_utf8.internal.str = 'ユニコード' + + hash_utf8 = BinaryObject.hashcode(obj_utf8, client=client) + + assert hash_utf8 == -1945378474, 'Invalid hashcode value for object with UTF-8 strings'