This is an automated email from the ASF dual-hosted git repository. rskraba pushed a commit to branch branch-1.9 in repository https://gitbox.apache.org/repos/asf/avro.git
commit 8302424de972a338f3f4d71d224ec1554e653cc3 Author: Michael A. Smith <[email protected]> AuthorDate: Fri May 31 14:38:46 2019 -0400 AVRO-1928: Simplify Python float/double encoding (#528) Switch to using built-in little endian support in the struct module, instead of explicit bit manipulation. --- lang/py/src/avro/io.py | 39 +++++++-------------------------------- lang/py3/avro/io.py | 37 ++++++------------------------------- 2 files changed, 13 insertions(+), 63 deletions(-) diff --git a/lang/py/src/avro/io.py b/lang/py/src/avro/io.py index 8c2ef10..c300fb3 100644 --- a/lang/py/src/avro/io.py +++ b/lang/py/src/avro/io.py @@ -74,15 +74,14 @@ else: return struct.unpack(self.format, *args) struct_class = SimpleStruct -STRUCT_INT = struct_class('!I') # big-endian unsigned int -STRUCT_LONG = struct_class('!Q') # big-endian unsigned long long -STRUCT_FLOAT = struct_class('!f') # big-endian float -STRUCT_DOUBLE = struct_class('!d') # big-endian double +STRUCT_FLOAT = struct_class('<f') # big-endian float +STRUCT_DOUBLE = struct_class('<d') # big-endian double STRUCT_CRC32 = struct_class('>I') # big-endian unsigned int STRUCT_SIGNED_SHORT = struct_class('>h') # big-endian signed short STRUCT_SIGNED_INT = struct_class('>i') # big-endian signed int STRUCT_SIGNED_LONG = struct_class('>q') # big-endian signed long + # # Exceptions # @@ -220,11 +219,7 @@ class BinaryDecoder(object): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xffL)) | - ((ord(self.read(1)) & 0xffL) << 8) | - ((ord(self.read(1)) & 0xffL) << 16) | - ((ord(self.read(1)) & 0xffL) << 24)) - return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0] + return STRUCT_FLOAT.unpack(self.read(4))[0] def read_double(self): """ @@ -232,15 +227,7 @@ class BinaryDecoder(object): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xffL)) | - ((ord(self.read(1)) & 0xffL) << 8) | - ((ord(self.read(1)) & 0xffL) << 16) | - ((ord(self.read(1)) & 0xffL) << 24) | - ((ord(self.read(1)) & 0xffL) << 32) | - ((ord(self.read(1)) & 0xffL) << 40) | - ((ord(self.read(1)) & 0xffL) << 48) | - ((ord(self.read(1)) & 0xffL) << 56)) - return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0] + return STRUCT_DOUBLE.unpack(self.read(8))[0] def read_decimal_from_bytes(self, precision, scale): """ @@ -437,11 +424,7 @@ class BinaryEncoder(object): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0] - self.write(chr((bits) & 0xFF)) - self.write(chr((bits >> 8) & 0xFF)) - self.write(chr((bits >> 16) & 0xFF)) - self.write(chr((bits >> 24) & 0xFF)) + self.write(STRUCT_FLOAT.pack(datum)) def write_double(self, datum): """ @@ -449,15 +432,7 @@ class BinaryEncoder(object): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0] - self.write(chr((bits) & 0xFF)) - self.write(chr((bits >> 8) & 0xFF)) - self.write(chr((bits >> 16) & 0xFF)) - self.write(chr((bits >> 24) & 0xFF)) - self.write(chr((bits >> 32) & 0xFF)) - self.write(chr((bits >> 40) & 0xFF)) - self.write(chr((bits >> 48) & 0xFF)) - self.write(chr((bits >> 56) & 0xFF)) + self.write(STRUCT_DOUBLE.pack(datum)) def write_decimal_bytes(self, datum, scale): """ diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py index cd79393..51f5a13 100644 --- a/lang/py3/avro/io.py +++ b/lang/py3/avro/io.py @@ -61,9 +61,8 @@ LONG_MIN_VALUE = -(1 << 63) LONG_MAX_VALUE = (1 << 63) - 1 STRUCT_INT = struct.Struct('!I') # big-endian unsigned int -STRUCT_LONG = struct.Struct('!Q') # big-endian unsigned long long -STRUCT_FLOAT = struct.Struct('!f') # big-endian float -STRUCT_DOUBLE = struct.Struct('!d') # big-endian double +STRUCT_FLOAT = struct.Struct('<f') # little-endian float +STRUCT_DOUBLE = struct.Struct('<d') # little-endian double STRUCT_CRC32 = struct.Struct('>I') # big-endian unsigned int @@ -198,11 +197,7 @@ class BinaryDecoder(object): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xff)) | - ((ord(self.read(1)) & 0xff) << 8) | - ((ord(self.read(1)) & 0xff) << 16) | - ((ord(self.read(1)) & 0xff) << 24)) - return STRUCT_FLOAT.unpack(STRUCT_INT.pack(bits))[0] + return STRUCT_FLOAT.unpack(self.read(4))[0] def read_double(self): """ @@ -210,15 +205,7 @@ class BinaryDecoder(object): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = (((ord(self.read(1)) & 0xff)) | - ((ord(self.read(1)) & 0xff) << 8) | - ((ord(self.read(1)) & 0xff) << 16) | - ((ord(self.read(1)) & 0xff) << 24) | - ((ord(self.read(1)) & 0xff) << 32) | - ((ord(self.read(1)) & 0xff) << 40) | - ((ord(self.read(1)) & 0xff) << 48) | - ((ord(self.read(1)) & 0xff) << 56)) - return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0] + return STRUCT_DOUBLE.unpack(self.read(8))[0] def read_bytes(self): """ @@ -341,11 +328,7 @@ class BinaryEncoder(object): The float is converted into a 32-bit integer using a method equivalent to Java's floatToIntBits and then encoded in little-endian format. """ - bits = STRUCT_INT.unpack(STRUCT_FLOAT.pack(datum))[0] - self.WriteByte((bits) & 0xFF) - self.WriteByte((bits >> 8) & 0xFF) - self.WriteByte((bits >> 16) & 0xFF) - self.WriteByte((bits >> 24) & 0xFF) + self.write(STRUCT_FLOAT.pack(datum)) def write_double(self, datum): """ @@ -353,15 +336,7 @@ class BinaryEncoder(object): The double is converted into a 64-bit integer using a method equivalent to Java's doubleToLongBits and then encoded in little-endian format. """ - bits = STRUCT_LONG.unpack(STRUCT_DOUBLE.pack(datum))[0] - self.WriteByte((bits) & 0xFF) - self.WriteByte((bits >> 8) & 0xFF) - self.WriteByte((bits >> 16) & 0xFF) - self.WriteByte((bits >> 24) & 0xFF) - self.WriteByte((bits >> 32) & 0xFF) - self.WriteByte((bits >> 40) & 0xFF) - self.WriteByte((bits >> 48) & 0xFF) - self.WriteByte((bits >> 56) & 0xFF) + self.write(STRUCT_DOUBLE.pack(datum)) def write_bytes(self, datum): """
