Hello community, here is the log from the commit of package python-avro for openSUSE:Factory checked in at 2019-06-22 11:24:47 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-avro (Old) and /work/SRC/openSUSE:Factory/.python-avro.new.4615 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-avro" Sat Jun 22 11:24:47 2019 rev:3 rq:711314 version:1.9.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-avro/python-avro.changes 2019-01-24 14:14:27.739289887 +0100 +++ /work/SRC/openSUSE:Factory/.python-avro.new.4615/python-avro.changes 2019-06-22 11:24:50.757427448 +0200 @@ -1,0 +2,6 @@ +Fri Jun 21 11:47:02 UTC 2019 - [email protected] + +- version update to 1.9.0 + * no upstream changelog found + +------------------------------------------------------------------- Old: ---- avro-1.8.2.tar.gz New: ---- avro-1.9.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-avro.spec ++++++ --- /var/tmp/diff_new_pack.OzYwpy/_old 2019-06-22 11:24:52.389429074 +0200 +++ /var/tmp/diff_new_pack.OzYwpy/_new 2019-06-22 11:24:52.393429077 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-avro # -# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-avro -Version: 1.8.2 +Version: 1.9.0 Release: 0 Summary: A serialization and RPC framework for Python License: Apache-2.0 ++++++ avro-1.8.2.tar.gz -> avro-1.9.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/PKG-INFO new/avro-1.9.0/PKG-INFO --- old/avro-1.8.2/PKG-INFO 2017-05-20 17:56:15.000000000 +0200 +++ new/avro-1.9.0/PKG-INFO 2019-05-21 16:59:01.000000000 +0200 @@ -1,11 +1,12 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: avro -Version: 1.8.2 +Version: 1.9.0 Summary: Avro is a serialization and RPC framework. Home-page: http://avro.apache.org/ Author: Apache Avro -Author-email: [email protected] +Author-email: [email protected] License: Apache License 2.0 Description: UNKNOWN Keywords: avro serialization rpc Platform: UNKNOWN +Provides-Extra: snappy diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/avro.egg-info/PKG-INFO new/avro-1.9.0/avro.egg-info/PKG-INFO --- old/avro-1.8.2/avro.egg-info/PKG-INFO 2017-05-20 17:56:15.000000000 +0200 +++ new/avro-1.9.0/avro.egg-info/PKG-INFO 2019-05-21 16:59:01.000000000 +0200 @@ -1,11 +1,12 @@ -Metadata-Version: 1.0 +Metadata-Version: 2.1 Name: avro -Version: 1.8.2 +Version: 1.9.0 Summary: Avro is a serialization and RPC framework. Home-page: http://avro.apache.org/ Author: Apache Avro -Author-email: [email protected] +Author-email: [email protected] License: Apache License 2.0 Description: UNKNOWN Keywords: avro serialization rpc Platform: UNKNOWN +Provides-Extra: snappy diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/scripts/avro new/avro-1.9.0/scripts/avro --- old/avro-1.8.2/scripts/avro 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/scripts/avro 2019-05-21 15:54:34.000000000 +0200 @@ -209,7 +209,7 @@ argv = argv or sys.argv parser = OptionParser(description="Display/write for Avro files", - version="1.8.2", + version="1.9.0", usage="usage: %prog cat|write [options] FILE [FILE...]") # cat options diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/setup.cfg new/avro-1.9.0/setup.cfg --- old/avro-1.8.2/setup.cfg 2017-05-20 17:56:15.000000000 +0200 +++ new/avro-1.9.0/setup.cfg 2019-05-21 16:59:01.000000000 +0200 @@ -1,5 +1,4 @@ [egg_info] tag_build = tag_date = 0 -tag_svn_revision = 0 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/setup.py new/avro-1.9.0/setup.py --- old/avro-1.8.2/setup.py 2017-05-20 17:50:07.000000000 +0200 +++ new/avro-1.9.0/setup.py 2019-05-21 15:54:34.000000000 +0200 @@ -27,7 +27,7 @@ setup( name = 'avro', - version = '1.8.2', + version = '1.9.0', packages = ['avro',], package_dir = {'avro': 'src/avro'}, scripts = ["./scripts/avro"], @@ -41,7 +41,7 @@ # metadata for upload to PyPI author = 'Apache Avro', - author_email = '[email protected]',#'[email protected]', + author_email = '[email protected]', description = 'Avro is a serialization and RPC framework.', license = 'Apache License 2.0', keywords = 'avro serialization rpc', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/src/avro/io.py new/avro-1.9.0/src/avro/io.py --- old/avro-1.8.2/src/avro/io.py 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/src/avro/io.py 2019-05-21 15:54:34.000000000 +0200 @@ -41,6 +41,9 @@ import sys from binascii import crc32 +from decimal import Decimal +from decimal import getcontext + try: import json except ImportError: @@ -68,11 +71,14 @@ return struct.unpack(self.format, *args) struct_class = SimpleStruct -STRUCT_INT = struct_class('!I') # big-endian unsigned int -STRUCT_LONG = struct_class('!Q') # big-endian unsigned long long -STRUCT_FLOAT = struct_class('!f') # big-endian float -STRUCT_DOUBLE = struct_class('!d') # big-endian double -STRUCT_CRC32 = struct_class('>I') # big-endian unsigned int +STRUCT_INT = struct_class('!I') # big-endian unsigned int +STRUCT_LONG = struct_class('!Q') # big-endian unsigned long long +STRUCT_FLOAT = struct_class('!f') # big-endian float +STRUCT_DOUBLE = struct_class('!d') # big-endian double +STRUCT_CRC32 = struct_class('>I') # big-endian unsigned int +STRUCT_SIGNED_SHORT = struct_class('>h') # big-endian signed short +STRUCT_SIGNED_INT = struct_class('>i') # big-endian signed int +STRUCT_SIGNED_LONG = struct_class('>q') # big-endian signed long # # Exceptions @@ -108,6 +114,9 @@ elif schema_type == 'string': return isinstance(datum, basestring) elif schema_type == 'bytes': + if (hasattr(expected_schema, 'logical_type') and + expected_schema.logical_type == 'decimal'): + return isinstance(datum, Decimal) return isinstance(datum, str) elif schema_type == 'int': return ((isinstance(datum, int) or isinstance(datum, long)) @@ -118,7 +127,11 @@ elif schema_type in ['float', 'double']: return (isinstance(datum, int) or isinstance(datum, long) or isinstance(datum, float)) + # Check for int, float, long and decimal elif schema_type == 'fixed': + if (hasattr(expected_schema, 'logical_type') and + expected_schema.logical_type == 'decimal'): + return isinstance(datum, Decimal) return isinstance(datum, str) and len(datum) == expected_schema.size elif schema_type == 'enum': return datum in expected_schema.symbols @@ -219,6 +232,41 @@ ((ord(self.read(1)) & 0xffL) << 56)) return STRUCT_DOUBLE.unpack(STRUCT_LONG.pack(bits))[0] + def read_decimal_from_bytes(self, precision, scale): + """ + Decimal bytes are decoded as signed short, int or long depending on the + size of bytes. + """ + size = self.read_long() + return self.read_decimal_from_fixed(precision, scale, size) + + def read_decimal_from_fixed(self, precision, scale, size): + """ + Decimal is encoded as fixed. Fixed instances are encoded using the + number of bytes declared in the schema. + """ + datum = self.read(size) + unscaled_datum = 0 + msb = struct.unpack('!b', datum[0])[0] + leftmost_bit = (msb >> 7) & 1 + if leftmost_bit == 1: + modified_first_byte = ord(datum[0]) ^ (1 << 7) + datum = chr(modified_first_byte) + datum[1:] + for offset in range(size): + unscaled_datum <<= 8 + unscaled_datum += ord(datum[offset]) + unscaled_datum += pow(-2, (size*8) - 1) + else: + for offset in range(size): + unscaled_datum <<= 8 + unscaled_datum += ord(datum[offset]) + + original_prec = getcontext().prec + getcontext().prec = precision + scaled_datum = Decimal(unscaled_datum).scaleb(-scale) + getcontext().prec = original_prec + return scaled_datum + def read_bytes(self): """ Bytes are encoded as a long followed by that many bytes of data. @@ -341,6 +389,74 @@ self.write(chr((bits >> 48) & 0xFF)) self.write(chr((bits >> 56) & 0xFF)) + def write_decimal_bytes(self, datum, scale): + """ + Decimal in bytes are encoded as long. Since size of packed value in bytes for + signed long is 8, 8 bytes are written. + """ + sign, digits, exp = datum.as_tuple() + if exp > scale: + raise AvroTypeException('Scale provided in schema does not match the decimal') + + unscaled_datum = 0 + for digit in digits: + unscaled_datum = (unscaled_datum * 10) + digit + + bits_req = unscaled_datum.bit_length() + 1 + if sign: + unscaled_datum = (1 << bits_req) - unscaled_datum + + bytes_req = bits_req // 8 + padding_bits = ~((1 << bits_req) - 1) if sign else 0 + packed_bits = padding_bits | unscaled_datum + + bytes_req += 1 if (bytes_req << 3) < bits_req else 0 + self.write_long(bytes_req) + for index in range(bytes_req-1, -1, -1): + bits_to_write = packed_bits >> (8 * index) + self.write(chr(bits_to_write & 0xff)) + + def write_decimal_fixed(self, datum, scale, size): + """ + Decimal in fixed are encoded as size of fixed bytes. + """ + sign, digits, exp = datum.as_tuple() + if exp > scale: + raise AvroTypeException('Scale provided in schema does not match the decimal') + + unscaled_datum = 0 + for digit in digits: + unscaled_datum = (unscaled_datum * 10) + digit + + bits_req = unscaled_datum.bit_length() + 1 + size_in_bits = size * 8 + offset_bits = size_in_bits - bits_req + + mask = 2 ** size_in_bits - 1 + bit = 1 + for i in range(bits_req): + mask ^= bit + bit <<= 1 + + if bits_req < 8: + bytes_req = 1 + else: + bytes_req = bits_req // 8 + if bits_req % 8 != 0: + bytes_req += 1 + if sign: + unscaled_datum = (1 << bits_req) - unscaled_datum + unscaled_datum = mask | unscaled_datum + for index in range(size-1, -1, -1): + bits_to_write = unscaled_datum >> (8 * index) + self.write(chr(bits_to_write & 0xff)) + else: + for i in range(offset_bits/8): + self.write(chr(0)) + for index in range(bytes_req-1, -1, -1): + bits_to_write = unscaled_datum >> (8 * index) + self.write(chr(bits_to_write & 0xff)) + def write_bytes(self, datum): """ Bytes are encoded as a long followed by that many bytes of data. @@ -475,8 +591,22 @@ elif writers_schema.type == 'double': return decoder.read_double() elif writers_schema.type == 'bytes': - return decoder.read_bytes() + if (hasattr(writers_schema, 'logical_type') and + writers_schema.logical_type == 'decimal'): + return decoder.read_decimal_from_bytes( + writers_schema.get_prop('precision'), + writers_schema.get_prop('scale') + ) + else: + return decoder.read_bytes() elif writers_schema.type == 'fixed': + if (hasattr(writers_schema, 'logical_type') and + writers_schema.logical_type == 'decimal'): + return decoder.read_decimal_from_fixed( + writers_schema.get_prop('precision'), + writers_schema.get_prop('scale'), + writers_schema.size + ) return self.read_fixed(writers_schema, readers_schema, decoder) elif writers_schema.type == 'enum': return self.read_enum(writers_schema, readers_schema, decoder) @@ -787,9 +917,21 @@ elif writers_schema.type == 'double': encoder.write_double(datum) elif writers_schema.type == 'bytes': - encoder.write_bytes(datum) + if (hasattr(writers_schema, 'logical_type') and + writers_schema.logical_type == 'decimal'): + encoder.write_decimal_bytes(datum, writers_schema.get_prop('scale')) + else: + encoder.write_bytes(datum) elif writers_schema.type == 'fixed': - self.write_fixed(writers_schema, datum, encoder) + if (hasattr(writers_schema, 'logical_type') and + writers_schema.logical_type == 'decimal'): + encoder.write_decimal_fixed( + datum, + writers_schema.get_prop('scale'), + writers_schema.get_prop('size') + ) + else: + self.write_fixed(writers_schema, datum, encoder) elif writers_schema.type == 'enum': self.write_enum(writers_schema, datum, encoder) elif writers_schema.type == 'array': diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/src/avro/protocol.py new/avro-1.9.0/src/avro/protocol.py --- old/avro-1.8.2/src/avro/protocol.py 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/src/avro/protocol.py 2019-05-21 15:54:34.000000000 +0200 @@ -5,9 +5,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -62,12 +62,13 @@ if message_objects.has_key(name): fail_msg = 'Message name "%s" repeated.' % name raise ProtocolParseException(fail_msg) - elif not(hasattr(body, 'get') and callable(body.get)): + try: + request = body.get('request') + response = body.get('response') + errors = body.get('errors') + except AttributeError: fail_msg = 'Message name "%s" has non-object body %s.' % (name, body) raise ProtocolParseException(fail_msg) - request = body.get('request') - response = body.get('response') - errors = body.get('errors') message_objects[name] = Message(name, request, response, errors, names) return message_objects @@ -79,21 +80,20 @@ elif not isinstance(name, basestring): fail_msg = 'The name property must be a string.' raise ProtocolParseException(fail_msg) - elif namespace is not None and not isinstance(namespace, basestring): + elif not (namespace is None or isinstance(namespace, basestring)): fail_msg = 'The namespace property must be a string.' raise ProtocolParseException(fail_msg) - elif types is not None and not isinstance(types, list): + elif not (types is None or isinstance(types, list)): fail_msg = 'The types property must be a list.' raise ProtocolParseException(fail_msg) - elif (messages is not None and - not(hasattr(messages, 'get') and callable(messages.get))): + elif not (messages is None or callable(getattr(messages, 'get', None))): fail_msg = 'The messages property must be a JSON object.' raise ProtocolParseException(fail_msg) self._props = {} self.set_prop('name', name) type_names = schema.Names() - if namespace is not None: + if namespace is not None: self.set_prop('namespace', namespace) type_names.default_namespace = namespace if types is not None: @@ -105,7 +105,7 @@ # read-only properties name = property(lambda self: self.get_prop('name')) namespace = property(lambda self: self.get_prop('namespace')) - fullname = property(lambda self: + fullname = property(lambda self: schema.Name(self.name, self.namespace).fullname) types = property(lambda self: self.get_prop('types')) types_dict = property(lambda self: dict([(type.name, type) @@ -118,13 +118,13 @@ def get_prop(self, key): return self.props.get(key) def set_prop(self, key, value): - self.props[key] = value + self.props[key] = value def to_json(self): to_dump = {} to_dump['protocol'] = self.name names = schema.Names(default_namespace=self.namespace) - if self.namespace: + if self.namespace: to_dump['namespace'] = self.namespace if self.types: to_dump['types'] = [ t.to_json(names) for t in self.types ] @@ -149,7 +149,7 @@ fail_msg = 'Request property not a list: %s' % request raise ProtocolParseException(fail_msg) return schema.RecordSchema(None, None, request, names, 'request') - + def _parse_response(self, response, names): if isinstance(response, basestring) and names.has_name(response, None): return names.get_name(response, None) @@ -183,7 +183,7 @@ def get_prop(self, key): return self.props.get(key) def set_prop(self, key, value): - self.props[key] = value + self.props[key] = value def __str__(self): return json.dumps(self.to_json()) @@ -200,17 +200,17 @@ def __eq__(self, that): return self.name == that.name and self.props == that.props - + def make_avpr_object(json_data): """Build Avro Protocol from data parsed out of JSON string.""" - if hasattr(json_data, 'get') and callable(json_data.get): + try: name = json_data.get('protocol') namespace = json_data.get('namespace') types = json_data.get('types') messages = json_data.get('messages') - return Protocol(name, namespace, types, messages) - else: + except AttributeError: raise ProtocolParseException('Not a JSON object: %s' % json_data) + return Protocol(name, namespace, types, messages) def parse(json_string): """Constructs the Protocol from the JSON text.""" @@ -221,4 +221,3 @@ # construct the Avro Protocol object return make_avpr_object(json_data) - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/src/avro/schema.py new/avro-1.9.0/src/avro/schema.py --- old/avro-1.8.2/src/avro/schema.py 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/src/avro/schema.py 2019-05-21 15:54:34.000000000 +0200 @@ -5,9 +5,9 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -33,6 +33,9 @@ A boolean; or Null. """ +from math import floor +from math import log10 + try: import json except ImportError: @@ -110,6 +113,7 @@ class Schema(object): """Base class for all Schema classes.""" + _props = None def __init__(self, type, other_props=None): # Ensure valid ctor args if not isinstance(type, basestring): @@ -120,13 +124,14 @@ raise SchemaParseException(fail_msg) # add members - if not hasattr(self, '_props'): self._props = {} + if self._props is None: + self._props = {} self.set_prop('type', type) self.type = type self._props.update(other_props or {}) # Read-only properties dict. Printing schemas - # creates JSON properties directly from this dict. + # creates JSON properties directly from this dict. props = property(lambda self: self._props) # Read-only property dict. Non-reserved properties @@ -155,11 +160,11 @@ class Name(object): """Class to describe Avro name.""" - + def __init__(self, name_attr, space_attr, default_space): """ Formulate full name according to the specification. - + @arg name_attr: name value read in schema or None. @arg space_attr: namespace value read in schema or None. @ard default_space: the current default space or None. @@ -178,19 +183,19 @@ elif name_attr == "": fail_msg = 'Space must be non-empty string or None.' raise SchemaParseException(fail_msg) - + if not (isinstance(default_space, basestring) or (default_space is None)): fail_msg = 'Default space must be non-empty string or None.' raise SchemaParseException(fail_msg) elif name_attr == "": fail_msg = 'Default must be non-empty string or None.' raise SchemaParseException(fail_msg) - - self._full = None; - + + self._full = None; + if name_attr is None or name_attr == "": return; - + if (name_attr.find('.') < 0): if (space_attr is not None) and (space_attr != ""): self._full = "%s.%s" % (space_attr, name_attr) @@ -200,20 +205,20 @@ else: self._full = name_attr else: - self._full = name_attr - + self._full = name_attr + def __eq__(self, other): if not isinstance(other, Name): return False return (self.fullname == other.fullname) - + fullname = property(lambda self: self._full) def get_space(self): """Back out a namespace from full name.""" if self._full is None: return None - + if (self._full.find('.') > 0): return self._full.rsplit(".", 1)[0] else: @@ -224,17 +229,17 @@ def __init__(self, default_namespace=None): self.names = {} self.default_namespace = default_namespace - + def has_name(self, name_attr, space_attr): test = Name(name_attr, space_attr, self.default_namespace).fullname return self.names.has_key(test) - - def get_name(self, name_attr, space_attr): + + def get_name(self, name_attr, space_attr): test = Name(name_attr, space_attr, self.default_namespace).fullname if not self.names.has_key(test): return None return self.names[test] - + def prune_namespace(self, properties): """given a properties, return properties with namespace removed if it matches the own default namespace""" @@ -255,14 +260,14 @@ def add_name(self, name_attr, space_attr, new_schema): """ Add a new schema object to the name set. - + @arg name_attr: name value read in schema @arg space_attr: namespace value read in schema. - + @return: the Name that was just added. """ to_add = Name(name_attr, space_attr, self.default_namespace) - + if to_add.fullname in VALID_TYPES: fail_msg = '%s is a reserved type name.' % to_add.fullname raise SchemaParseException(fail_msg) @@ -295,12 +300,12 @@ # Store name and namespace as they were read in origin schema self.set_prop('name', name) - if namespace is not None: + if namespace is not None: self.set_prop('namespace', new_name.get_space()) # Store full name as calculated from name, namespace self._fullname = new_name.fullname - + def name_ref(self, names): if self.namespace == names.default_namespace: return self.name @@ -312,6 +317,42 @@ namespace = property(lambda self: self.get_prop('namespace')) fullname = property(lambda self: self._fullname) +# +# Logical type class +# + +class LogicalSchema(object): + def __init__(self, logical_type): + self.logical_type = logical_type + +# +# Decimal logical schema +# + +class DecimalLogicalSchema(LogicalSchema): + def __init__(self, precision, scale=0): + max_precision = self._max_precision() + if not isinstance(precision, int) or precision <= 0: + raise SchemaParseException("""Precision is required for logical type + DECIMAL and must be a positive integer but + is %s.""" % precision) + elif precision > max_precision: + raise SchemaParseException("Cannot store precision digits. Max is %s" + %(max_precision)) + + if not isinstance(scale, int) or scale < 0: + raise SchemaParseException("Scale %s must be a positive Integer." % scale) + + elif scale > precision: + raise SchemaParseException("Invalid DECIMAL scale %s. Cannot be greater than precision %s" + %(scale, precision)) + + LogicalSchema.__init__(self, 'decimal') + + def _max_precision(self): + raise NotImplementedError() + + class Field(object): def __init__(self, type, name, has_default, default=None, order=None,names=None, doc=None, other_props=None): @@ -405,14 +446,40 @@ return self.props == that.props # +# Decimal Bytes Type +# + +class BytesDecimalSchema(PrimitiveSchema, DecimalLogicalSchema): + def __init__(self, precision, scale=0, other_props=None): + DecimalLogicalSchema.__init__(self, precision, scale) + PrimitiveSchema.__init__(self, 'bytes', other_props) + self.set_prop('precision', precision) + self.set_prop('scale', scale) + + # read-only properties + precision = property(lambda self: self.get_prop('precision')) + scale = property(lambda self: self.get_prop('scale')) + + def _max_precision(self): + # Considering the max 32 bit integer value + return (1 << 31) - 1 + + def to_json(self, names=None): + return self.props + + def __eq__(self, that): + return self.props == that.props + + +# # Complex Types (non-recursive) # class FixedSchema(NamedSchema): def __init__(self, name, namespace, size, names=None, other_props=None): # Ensure valid ctor args - if not isinstance(size, int): - fail_msg = 'Fixed Schema requires a valid integer for size property.' + if not isinstance(size, int) or size < 0: + fail_msg = 'Fixed Schema requires a valid positive integer for size property.' raise AvroException(fail_msg) # Call parent ctor @@ -436,6 +503,31 @@ def __eq__(self, that): return self.props == that.props +# +# Decimal Fixed Type +# + +class FixedDecimalSchema(FixedSchema, DecimalLogicalSchema): + def __init__(self, size, name, precision, scale=0, namespace=None, names=None, other_props=None): + FixedSchema.__init__(self, name, namespace, size, names, other_props) + DecimalLogicalSchema.__init__(self, precision, scale) + self.set_prop('precision', precision) + self.set_prop('scale', scale) + + # read-only properties + precision = property(lambda self: self.get_prop('precision')) + scale = property(lambda self: self.get_prop('scale')) + + def _max_precision(self): + return round(floor(log10(pow(2, (8 * self.size - 1)) - 1))) + + def to_json(self, names=None): + return self.props + + def __eq__(self, that): + return self.props == that.props + + class EnumSchema(NamedSchema): def __init__(self, name, namespace, symbols, names=None, doc=None, other_props=None): # Ensure valid ctor args @@ -609,8 +701,8 @@ field_objects = [] field_names = [] for i, field in enumerate(field_data): - if hasattr(field, 'get') and callable(field.get): - type = field.get('type') + if callable(getattr(field, 'get', None)): + type = field.get('type') name = field.get('name') # null values can have a default value of None @@ -652,7 +744,7 @@ NamedSchema.__init__(self, schema_type, name, namespace, names, other_props) - if schema_type == 'record': + if schema_type == 'record': old_default = names.default_namespace names.default_namespace = Name(name, namespace, names.default_namespace).get_space() @@ -704,7 +796,7 @@ Retrieve the non-reserved properties from a dictionary of properties @args reserved_props: The set of reserved properties to exclude """ - if hasattr(all_props, 'items') and callable(all_props.items): + if callable(getattr(all_props, 'items', None)): return dict([(k,v) for (k,v) in all_props.items() if k not in reserved_props ]) @@ -717,18 +809,32 @@ """ if names == None: names = Names() - + # JSON object (non-union) - if hasattr(json_data, 'get') and callable(json_data.get): + if callable(getattr(json_data, 'get', None)): type = json_data.get('type') other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS) + logical_type = None + if 'logicalType' in json_data: + logical_type = json_data.get('logicalType') + if logical_type != 'decimal': + raise SchemaParseException("Currently does not support %s logical type" % logical_type) if type in PRIMITIVE_TYPES: + if type == 'bytes': + if logical_type == 'decimal': + precision = json_data.get('precision') + scale = 0 if json_data.get('scale') is None else json_data.get('scale') + return BytesDecimalSchema(precision, scale, other_props) return PrimitiveSchema(type, other_props) elif type in NAMED_TYPES: name = json_data.get('name') namespace = json_data.get('namespace', names.default_namespace) if type == 'fixed': size = json_data.get('size') + if logical_type == 'decimal': + precision = json_data.get('precision') + scale = 0 if json_data.get('scale') is None else json_data.get('scale') + return FixedDecimalSchema(size, name, precision, scale, namespace, names, other_props) return FixedSchema(name, namespace, size, names, other_props) elif type == 'enum': symbols = json_data.get('symbols') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/test/test_datafile_interop.py new/avro-1.9.0/test/test_datafile_interop.py --- old/avro-1.8.2/test/test_datafile_interop.py 2017-05-07 19:26:20.000000000 +0200 +++ new/avro-1.9.0/test/test_datafile_interop.py 2019-05-21 15:54:35.000000000 +0200 @@ -27,12 +27,12 @@ print 'TEST INTEROP' print '============' print '' - for f in os.listdir('/root/avro/lang/py/../../build/interop/data'): + for f in os.listdir('/avro/lang/py/../../build/interop/data'): print 'READING %s' % f print '' # read data in binary from file - reader = open(os.path.join('/root/avro/lang/py/../../build/interop/data', f), 'rb') + reader = open(os.path.join('/avro/lang/py/../../build/interop/data', f), 'rb') datum_reader = io.DatumReader() dfr = datafile.DataFileReader(reader, datum_reader) for datum in dfr: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/test/test_io.py new/avro-1.9.0/test/test_io.py --- old/avro-1.8.2/test/test_io.py 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/test/test_io.py 2019-05-21 15:54:34.000000000 +0200 @@ -14,6 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. import unittest + +from decimal import Decimal + try: from cStringIO import StringIO except ImportError: @@ -35,6 +38,12 @@ ('"float"', 1234.0), ('"double"', 1234.0), ('{"type": "fixed", "name": "Test", "size": 1}', 'B'), + ('{"type": "fixed", "logicalType": "decimal", "name": "Test", "size": 8, "precision": 5, "scale": 4}', + Decimal('3.1415')), + ('{"type": "fixed", "logicalType": "decimal", "name": "Test", "size": 8, "precision": 5, "scale": 4}', + Decimal('-3.1415')), + ('{"type": "bytes", "logicalType": "decimal", "precision": 5, "scale": 4}', Decimal('3.1415')), + ('{"type": "bytes", "logicalType": "decimal", "precision": 5, "scale": 4}', Decimal('-3.1415')), ('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', 'B'), ('{"type": "array", "items": "long"}', [1, 3, 2]), ('{"type": "map", "values": "long"}', {'a': 1, 'b': 3, 'c': 2}), @@ -199,6 +208,9 @@ round_trip_datum = read_datum(writer, writers_schema) print 'Round Trip Datum: %s' % round_trip_datum + if isinstance(round_trip_datum, Decimal): + round_trip_datum = round_trip_datum.to_eng_string() + datum = str(datum) if datum == round_trip_datum: correct += 1 self.assertEquals(correct, len(SCHEMAS_TO_VALIDATE)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/test/test_schema.py new/avro-1.9.0/test/test_schema.py --- old/avro-1.8.2/test/test_schema.py 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/test/test_schema.py 2019-05-21 15:54:34.000000000 +0200 @@ -17,6 +17,9 @@ Test the schema parsing logic. """ import unittest + +from avro.schema import SchemaParseException, AvroException + import set_avro_test_path from avro import schema @@ -295,6 +298,21 @@ """, True) ] +DECIMAL_LOGICAL_TYPE = [ + ExampleSchema("""{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 4, + "size": 10, + "scale": 2}""", True), + ExampleSchema("""{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2}""", True) +] + EXAMPLES = PRIMITIVE_EXAMPLES EXAMPLES += FIXED_EXAMPLES EXAMPLES += ENUM_EXAMPLES @@ -303,6 +321,7 @@ EXAMPLES += UNION_EXAMPLES EXAMPLES += RECORD_EXAMPLES EXAMPLES += DOC_EXAMPLES +EXAMPLES += DECIMAL_LOGICAL_TYPE VALID_EXAMPLES = [e for e in EXAMPLES if e.valid] @@ -491,5 +510,78 @@ self.assertTrue(caught_exception, 'Exception was not caught') + def test_decimal_invalid_schema(self): + invalid_schemas = [ + ExampleSchema("""{ + "type": "bytes", + "logicalType": "decimal", + "precision": 2, + "scale": -2}""", True), + + ExampleSchema("""{ + "type": "bytes", + "logicalType": "decimal", + "precision": -2, + "scale": 2}""", True), + + ExampleSchema("""{ + "type": "bytes", + "logicalType": "decimal", + "precision": 2, + "scale": 3}""", True), + + ExampleSchema("""{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": -10, + "scale": 2, + "size": 5}""", True), + + + ExampleSchema("""{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 2, + "scale": 3, + "size": 2}""", True) + ] + + for invalid_schema in invalid_schemas: + self.assertRaises(SchemaParseException, schema.parse, invalid_schema.schema_string) + + fixed_invalid_schema_size = ExampleSchema("""{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 2, + "scale": 2, + "size": -2}""", True) + self.assertRaises(AvroException, schema.parse, fixed_invalid_schema_size.schema_string) + + def test_decimal_valid_type(self): + fixed_decimal_schema = ExampleSchema("""{ + "type": "fixed", + "logicalType": "decimal", + "name": "TestDecimal", + "precision": 4, + "scale": 2, + "size": 2}""", True) + + bytes_decimal_schema = ExampleSchema("""{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4}""", True) + + fixed_decimal = schema.parse(fixed_decimal_schema.schema_string) + self.assertEqual(4, fixed_decimal.get_prop('precision')) + self.assertEqual(2, fixed_decimal.get_prop('scale')) + self.assertEqual(2, fixed_decimal.get_prop('size')) + + bytes_decimal = schema.parse(bytes_decimal_schema.schema_string) + self.assertEqual(4, bytes_decimal.get_prop('precision')) + self.assertEqual(0, bytes_decimal.get_prop('scale')) + if __name__ == '__main__': unittest.main() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/test/test_tether_task_runner.py new/avro-1.9.0/test/test_tether_task_runner.py --- old/avro-1.8.2/test/test_tether_task_runner.py 2017-05-07 19:26:19.000000000 +0200 +++ new/avro-1.9.0/test/test_tether_task_runner.py 2019-05-21 15:54:34.000000000 +0200 @@ -59,9 +59,6 @@ runner.start(outputport=parent_port,join=False) - # Wait for the server to start - time.sleep(1) - # Test sending various messages to the server and ensuring they are # processed correctly requestor=HTTPRequestor("localhost",runner.server.server_address[1],inputProtocol) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/avro-1.8.2/test/test_tether_word_count.py new/avro-1.9.0/test/test_tether_word_count.py --- old/avro-1.8.2/test/test_tether_word_count.py 2017-05-07 19:26:20.000000000 +0200 +++ new/avro-1.9.0/test/test_tether_word_count.py 2019-05-21 15:54:36.000000000 +0200 @@ -146,7 +146,7 @@ args.append("java") args.append("-jar") - args.append(os.path.abspath("/root/avro/lang/py/../java/tools/target/avro-tools-1.8.2.jar")) + args.append(os.path.abspath("/avro/lang/py/../java/tools/target/avro-tools-1.9.0.jar")) args.append("tether") @@ -189,7 +189,6 @@ proc.wait() - time.sleep(1) # wait a bit longer to clean up # read the output with file(os.path.join(outpath,"part-00000.avro")) as hf:
