This is an automated email from the ASF dual-hosted git repository. rskraba pushed a commit to branch branch-1.9 in repository https://gitbox.apache.org/repos/asf/avro.git
commit b824cf3ec444a2439f0e3ff0eba5871de7342fa4 Author: Michael A. Smith <[email protected]> AuthorDate: Fri Oct 25 17:58:36 2019 -0400 AVRO-2577: Fix Bare Excepts (#665) * AVRO-2577: Fix Bare Excepts * AVRO-2577: Don't Count Failure as Success --- lang/py/setup.cfg | 2 +- lang/py/src/avro/datafile.py | 53 ++++++++++++----- lang/py/src/avro/protocol.py | 39 ++++++------- lang/py/src/avro/schema.py | 7 ++- lang/py/test/sample_http_client.py | 2 +- lang/py/test/test_protocol.py | 90 +++++++++++++++-------------- lang/py/test/txsample_http_client.py | 2 +- lang/py3/avro/tests/sample_http_client.py | 2 +- lang/py3/avro/tests/test_protocol.py | 15 +++-- lang/py3/avro/tests/txsample_http_client.py | 2 +- lang/py3/setup.cfg | 2 +- 11 files changed, 123 insertions(+), 93 deletions(-) diff --git a/lang/py/setup.cfg b/lang/py/setup.cfg index 1f6fbfa..aaa8a29 100644 --- a/lang/py/setup.cfg +++ b/lang/py/setup.cfg @@ -20,6 +20,6 @@ known_third_party=zope [pycodestyle] exclude = .eggs,build -ignore = E101,E111,E114,E121,E122,E124,E125,E126,E127,E128,E129,E201,E202,E203,E222,E226,E225,E231,E241,E251,E261,E262,E265,E266,E301,E302,E303,E305,E306,E402,E501,E701,E703,E704,E711,E722,W191,W291,W292,W293,W391,W503,W504,W601 +ignore = E101,E111,E114,E121,E122,E124,E125,E126,E127,E128,E129,E201,E202,E203,E222,E226,E225,E231,E241,E251,E261,E262,E265,E266,E301,E302,E303,E305,E306,E402,E501,E701,E703,E704,E711,W191,W291,W292,W293,W391,W503,W504,W601 max-line-length = 150 statistics = True diff --git a/lang/py/src/avro/datafile.py b/lang/py/src/avro/datafile.py index 772d949..75a8e4a 100644 --- a/lang/py/src/avro/datafile.py +++ b/lang/py/src/avro/datafile.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python + +## # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -13,9 +16,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Read/Write Avro File Object Containers. -""" + +"""Read/Write Avro File Object Containers.""" + +from __future__ import absolute_import, division, print_function + +import os +import random import zlib from avro import io, schema @@ -29,6 +36,11 @@ try: has_snappy = True except ImportError: has_snappy = False +try: + import zstandard as zstd + has_zstandard = True +except ImportError: + has_zstandard = False # # Constants # @@ -48,6 +60,8 @@ META_SCHEMA = schema.parse("""\ VALID_CODECS = ['null', 'deflate'] if has_snappy: VALID_CODECS.append('snappy') +if has_zstandard: + VALID_CODECS.append('zstandard') VALID_ENCODINGS = ['binary'] # not used yet CODEC_KEY = "avro.codec" @@ -99,7 +113,7 @@ class DataFileWriter(object): else: # open writer for reading to collect metadata dfr = DataFileReader(writer, io.DatumReader()) - + # TODO(hammer): collect arbitrary metadata # collect metadata self._sync_marker = dfr.sync_marker @@ -171,6 +185,9 @@ class DataFileWriter(object): elif self.get_meta(CODEC_KEY) == 'snappy': compressed_data = snappy.compress(uncompressed_data) compressed_data_length = len(compressed_data) + 4 # crc32 + elif self.get_meta(CODEC_KEY) == 'zstandard': + compressed_data = zstd.ZstdCompressor().compress(uncompressed_data) + compressed_data_length = len(compressed_data) else: fail_msg = '"%s" codec is not supported.' % self.get_meta(CODEC_KEY) raise DataFileException(fail_msg) @@ -180,7 +197,7 @@ class DataFileWriter(object): # Write block self.writer.write(compressed_data) - + # Write CRC32 checksum for Snappy if self.get_meta(CODEC_KEY) == 'snappy': self.encoder.write_crc32(uncompressed_data) @@ -189,7 +206,7 @@ class DataFileWriter(object): self.writer.write(self.sync_marker) # reset buffer - self.buffer_writer.truncate(0) + self.buffer_writer.truncate(0) self.block_count = 0 def append(self, datum): @@ -229,7 +246,7 @@ class DataFileReader(object): self._raw_decoder = io.BinaryDecoder(reader) self._datum_decoder = None # Maybe reset at every block. self._datum_reader = datum_reader - + # read the header: magic, meta, sync self._read_header() @@ -293,7 +310,7 @@ class DataFileReader(object): def _read_header(self): # seek to the beginning of the file to get magic block - self.reader.seek(0, 0) + self.reader.seek(0, 0) # read header into a dict header = self.datum_reader.read_data( @@ -332,6 +349,18 @@ class DataFileReader(object): uncompressed = snappy.decompress(data) self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) self.raw_decoder.check_crc32(uncompressed); + elif self.codec == 'zstandard': + length = self.raw_decoder.read_long() + data = self.raw_decoder.read(length) + uncompressed = bytearray() + dctx = zstd.ZstdDecompressor() + with dctx.stream_reader(StringIO(data)) as reader: + while True: + chunk = reader.read(16384) + if not chunk: + break + uncompressed.extend(chunk) + self._datum_decoder = io.BinaryDecoder(StringIO(uncompressed)) else: raise DataFileException("Unknown codec: %r" % self.codec) @@ -360,7 +389,7 @@ class DataFileReader(object): else: self._read_block_header() - datum = self.datum_reader.read(self.datum_decoder) + datum = self.datum_reader.read(self.datum_decoder) self.block_count -= 1 return datum @@ -370,8 +399,6 @@ class DataFileReader(object): def generate_sixteen_random_bytes(): try: - import os return os.urandom(16) - except: - import random - return [ chr(random.randrange(256)) for i in range(16) ] + except NotImplementedError: + return [chr(random.randrange(256)) for i in range(16)] diff --git a/lang/py/src/avro/protocol.py b/lang/py/src/avro/protocol.py index f5c333b..117401c 100644 --- a/lang/py/src/avro/protocol.py +++ b/lang/py/src/avro/protocol.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python + +## # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -14,18 +17,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -""" -Protocol implementation. -""" +"""Protocol implementation.""" -try: - from hashlib import md5 -except ImportError: - from md5 import md5 +from __future__ import absolute_import, division, print_function +import hashlib import json -from avro import schema +import avro.schema # # Constants @@ -38,7 +37,7 @@ VALID_TYPE_SCHEMA_TYPES = ('enum', 'record', 'error', 'fixed') # Exceptions # -class ProtocolParseException(schema.AvroException): +class ProtocolParseException(avro.schema.AvroException): pass # @@ -50,7 +49,7 @@ class Protocol(object): def _parse_types(self, types, type_names): type_objects = [] for type in types: - type_object = schema.make_avsc_object(type, type_names) + type_object = avro.schema.make_avsc_object(type, type_names) if type_object.type not in VALID_TYPE_SCHEMA_TYPES: fail_msg = 'Type %s not an enum, fixed, record, or error.' % type raise ProtocolParseException(fail_msg) @@ -60,7 +59,7 @@ class Protocol(object): def _parse_messages(self, messages, names): message_objects = {} for name, body in messages.iteritems(): - if message_objects.has_key(name): + if name in message_objects: fail_msg = 'Message name "%s" repeated.' % name raise ProtocolParseException(fail_msg) try: @@ -93,7 +92,7 @@ class Protocol(object): self._props = {} self.set_prop('name', name) - type_names = schema.Names() + type_names = avro.schema.Names() if namespace is not None: self.set_prop('namespace', namespace) type_names.default_namespace = namespace @@ -101,13 +100,13 @@ class Protocol(object): self.set_prop('types', self._parse_types(types, type_names)) if messages is not None: self.set_prop('messages', self._parse_messages(messages, type_names)) - self._md5 = md5(str(self)).digest() + self._md5 = hashlib.md5(str(self)).digest() # read-only properties name = property(lambda self: self.get_prop('name')) namespace = property(lambda self: self.get_prop('namespace')) fullname = property(lambda self: - schema.Name(self.name, self.namespace).fullname) + avro.schema.Name(self.name, self.namespace).fullname) types = property(lambda self: self.get_prop('types')) types_dict = property(lambda self: dict([(type.name, type) for type in self.types])) @@ -124,7 +123,7 @@ class Protocol(object): def to_json(self): to_dump = {} to_dump['protocol'] = self.name - names = schema.Names(default_namespace=self.namespace) + names = avro.schema.Names(default_namespace=self.namespace) if self.namespace: to_dump['namespace'] = self.namespace if self.types: @@ -149,20 +148,20 @@ class Message(object): if not isinstance(request, list): fail_msg = 'Request property not a list: %s' % request raise ProtocolParseException(fail_msg) - return schema.RecordSchema(None, None, request, names, 'request') + return avro.schema.RecordSchema(None, None, request, names, 'request') def _parse_response(self, response, names): if isinstance(response, basestring) and names.has_name(response, None): return names.get_name(response, None) else: - return schema.make_avsc_object(response, names) + return avro.schema.make_avsc_object(response, names) def _parse_errors(self, errors, names): if not isinstance(errors, list): fail_msg = 'Errors property not a list: %s' % errors raise ProtocolParseException(fail_msg) errors_for_parsing = {'type': 'error_union', 'declared_errors': errors} - return schema.make_avsc_object(errors_for_parsing, names) + return avro.schema.make_avsc_object(errors_for_parsing, names) def __init__(self, name, request, response, errors=None, names=None): self._name = name @@ -191,7 +190,7 @@ class Message(object): def to_json(self, names=None): if names is None: - names = schema.Names() + names = avro.schema.Names() to_dump = {} to_dump['request'] = self.request.to_json(names) to_dump['response'] = self.response.to_json(names) @@ -217,7 +216,7 @@ def parse(json_string): """Constructs the Protocol from the JSON text.""" try: json_data = json.loads(json_string) - except: + except ValueError: raise ProtocolParseException('Error parsing JSON: %s' % json_string) # construct the Avro Protocol object diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py index e694d7e..776d6e1 100644 --- a/lang/py/src/avro/schema.py +++ b/lang/py/src/avro/schema.py @@ -610,9 +610,10 @@ class MapSchema(Schema): else: try: values_schema = make_avsc_object(values, names) - except: - fail_msg = 'Values schema not a valid Avro schema.' - raise SchemaParseException(fail_msg) + except SchemaParseException: + raise + except Exception: + raise SchemaParseException('Values schema is not a valid Avro schema.') self.set_prop('values', values_schema) diff --git a/lang/py/test/sample_http_client.py b/lang/py/test/sample_http_client.py index 4ad4925..62c91fd 100644 --- a/lang/py/test/sample_http_client.py +++ b/lang/py/test/sample_http_client.py @@ -72,7 +72,7 @@ if __name__ == '__main__': try: num_messages = int(sys.argv[4]) - except: + except IndexError: num_messages = 1 # build the parameters for the request diff --git a/lang/py/test/test_protocol.py b/lang/py/test/test_protocol.py index 3a8649f..4d8e783 100644 --- a/lang/py/test/test_protocol.py +++ b/lang/py/test/test_protocol.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python + +## # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -5,17 +8,19 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # https://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Test the protocol parsing logic. -""" + +"""Test the protocol parsing logic.""" + +from __future__ import absolute_import, division, print_function + import unittest from avro import protocol @@ -154,7 +159,7 @@ ExampleProtocol("""\ "types": [ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16}, - {"name": "ReferencedRecord", "type": "record", + {"name": "ReferencedRecord", "type": "record", "fields": [ {"name": "foo", "type": "string"} ] }, {"name": "TestRecord", "type": "record", "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"}, @@ -167,7 +172,7 @@ ExampleProtocol("""\ "messages": { "echo": { - "request": [{"name": "qualified", + "request": [{"name": "qualified", "type": "org.apache.avro.test.namespace.TestRecord"}], "response": "TestRecord" }, @@ -188,13 +193,13 @@ ExampleProtocol("""\ "types": [ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16}, - {"name": "ReferencedRecord", "type": "record", - "namespace": "org.apache.avro.other.namespace", + {"name": "ReferencedRecord", "type": "record", + "namespace": "org.apache.avro.other.namespace", "fields": [ {"name": "foo", "type": "string"} ] }, {"name": "TestRecord", "type": "record", "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"}, - {"name": "qualified", - "type": "org.apache.avro.other.namespace.ReferencedRecord"} + {"name": "qualified", + "type": "org.apache.avro.other.namespace.ReferencedRecord"} ] }, {"name": "TestError", @@ -204,7 +209,7 @@ ExampleProtocol("""\ "messages": { "echo": { - "request": [{"name": "qualified", + "request": [{"name": "qualified", "type": "org.apache.avro.test.namespace.TestRecord"}], "response": "TestRecord" }, @@ -225,10 +230,10 @@ ExampleProtocol("""\ "types": [ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16}, - {"name": "ReferencedRecord", "type": "record", - "namespace": "org.apache.avro.other.namespace", + {"name": "ReferencedRecord", "type": "record", + "namespace": "org.apache.avro.other.namespace", "fields": [ {"name": "foo", "type": "string"} ] }, - {"name": "ReferencedRecord", "type": "record", + {"name": "ReferencedRecord", "type": "record", "fields": [ {"name": "bar", "type": "double"} ] }, {"name": "TestError", "type": "error", "fields": [ {"name": "message", "type": "string"} ] @@ -237,7 +242,7 @@ ExampleProtocol("""\ "messages": { "echo": { - "request": [{"name": "qualified", + "request": [{"name": "qualified", "type": "ReferencedRecord"}], "response": "org.apache.avro.other.namespace.ReferencedRecord" }, @@ -258,9 +263,9 @@ ExampleProtocol("""\ "types": [ {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16}, - {"name": "ReferencedRecord", "type": "record", + {"name": "ReferencedRecord", "type": "record", "fields": [ {"name": "foo", "type": "string"} ] }, - {"name": "ReferencedRecord", "type": "record", + {"name": "ReferencedRecord", "type": "record", "fields": [ {"name": "bar", "type": "double"} ] }, {"name": "TestError", "type": "error", "fields": [ {"name": "message", "type": "string"} ] @@ -269,7 +274,7 @@ ExampleProtocol("""\ "messages": { "echo": { - "request": [{"name": "qualified", + "request": [{"name": "qualified", "type": "ReferencedRecord"}], "response": "org.apache.avro.other.namespace.ReferencedRecord" }, @@ -352,12 +357,12 @@ class TestProtocol(unittest.TestCase): for example in EXAMPLES: try: protocol.parse(example.protocol_string) - if example.valid: + if example.valid: num_correct += 1 else: self.fail("Parsed invalid protocol: %s" % (example.name,)) - except Exception, e: - if not example.valid: + except Exception as e: + if not example.valid: num_correct += 1 else: self.fail("Coudl not parse valid protocol: %s" % (example.name,)) @@ -367,10 +372,10 @@ class TestProtocol(unittest.TestCase): self.assertEqual(num_correct, len(EXAMPLES), fail_msg) def test_inner_namespace_set(self): - print '' - print 'TEST INNER NAMESPACE' - print '===================' - print '' + print('') + print('TEST INNER NAMESPACE') + print('===================') + print('') proto = protocol.parse(HELLO_WORLD.protocol_string) self.assertEqual(proto.namespace, "com.acme") greeting_type = proto.types_dict['Greeting'] @@ -388,23 +393,22 @@ class TestProtocol(unittest.TestCase): Test that the string generated by an Avro Protocol object is, in fact, a valid Avro protocol. """ - print '' - print 'TEST CAST TO STRING' - print '===================' - print '' + print('') + print('TEST CAST TO STRING') + print('===================') + print('') num_correct = 0 for example in VALID_EXAMPLES: protocol_data = protocol.parse(example.protocol_string) try: - try: - protocol.parse(str(protocol_data)) - debug_msg = "%s: STRING CAST SUCCESS" % example.name - num_correct += 1 - except: - debug_msg = "%s: STRING CAST FAILURE" % example.name - finally: - print debug_msg + protocol.parse(str(protocol_data)) + except (ValueError, ProtocolParseException): + debug_msg = "%s: STRING CAST FAILURE" % example.name + else: + debug_msg = "%s: STRING CAST SUCCESS" % example.name + num_correct += 1 + print(debug_msg) fail_msg = "Cast to string success on %d out of %d protocols" % \ (num_correct, len(VALID_EXAMPLES)) @@ -417,10 +421,10 @@ class TestProtocol(unittest.TestCase): to generate Avro protocol "round trip". 3. Ensure "original" and "round trip" protocols are equivalent. """ - print '' - print 'TEST ROUND TRIP' - print '===============' - print '' + print('') + print('TEST ROUND TRIP') + print('===============') + print('') num_correct = 0 for example in VALID_EXAMPLES: @@ -430,7 +434,7 @@ class TestProtocol(unittest.TestCase): if original_protocol == round_trip_protocol: num_correct += 1 debug_msg = "%s: ROUND TRIP SUCCESS" % example.name - else: + else: self.fail("Round trip failure: %s %s %s", (example.name, example.protocol_string, str(original_protocol))) fail_msg = "Round trip success on %d out of %d protocols" % \ diff --git a/lang/py/test/txsample_http_client.py b/lang/py/test/txsample_http_client.py index 3841062..dba4ade 100644 --- a/lang/py/test/txsample_http_client.py +++ b/lang/py/test/txsample_http_client.py @@ -74,7 +74,7 @@ if __name__ == '__main__': try: num_messages = int(sys.argv[4]) - except: + except IndexError: num_messages = 1 # build the parameters for the request diff --git a/lang/py3/avro/tests/sample_http_client.py b/lang/py3/avro/tests/sample_http_client.py index da8d030..47212a1 100644 --- a/lang/py3/avro/tests/sample_http_client.py +++ b/lang/py3/avro/tests/sample_http_client.py @@ -74,7 +74,7 @@ if __name__ == '__main__': try: num_messages = int(sys.argv[4]) - except: + except IndexError: num_messages = 1 # build the parameters for the request diff --git a/lang/py3/avro/tests/test_protocol.py b/lang/py3/avro/tests/test_protocol.py index abd17e8..7dabd59 100644 --- a/lang/py3/avro/tests/test_protocol.py +++ b/lang/py3/avro/tests/test_protocol.py @@ -453,14 +453,13 @@ class TestProtocol(unittest.TestCase): proto = protocol.Parse(example.protocol_string) try: protocol.Parse(str(proto)) - logging.debug( - 'Successfully reparsed protocol:\n%s', - example.protocol_string) - num_correct += 1 - except: - logging.debug( - 'Failed to reparse protocol:\n%s', - example.protocol_string) + except ProtocolParseException: + logging.debug('Failed to reparse protocol:\n%s', + example.protocol_string) + continue + logging.debug('Successfully reparsed protocol:\n%s', + example.protocol_string) + num_correct += 1 fail_msg = ( 'Cast to string success on %d out of %d protocols' diff --git a/lang/py3/avro/tests/txsample_http_client.py b/lang/py3/avro/tests/txsample_http_client.py index fede960..f3e573a 100644 --- a/lang/py3/avro/tests/txsample_http_client.py +++ b/lang/py3/avro/tests/txsample_http_client.py @@ -76,7 +76,7 @@ if __name__ == '__main__': try: num_messages = int(sys.argv[4]) - except: + except IndexError: num_messages = 1 # build the parameters for the request diff --git a/lang/py3/setup.cfg b/lang/py3/setup.cfg index 89b17fe..28fcd45 100644 --- a/lang/py3/setup.cfg +++ b/lang/py3/setup.cfg @@ -72,6 +72,6 @@ known_third_party=zope [pycodestyle] exclude = .eggs,build -ignore = E111,E114,E121,E122,E124,E127,E128,E129,E201,E202,E203,E221,E225,E231,E241,E261,E301,E302,E303,E305,E402,E701,E703,E722,W503,W504 +ignore = E111,E114,E121,E122,E124,E127,E128,E129,E201,E202,E203,E221,E225,E231,E241,E261,E301,E302,E303,E305,E402,E701,E703,W503,W504 max-line-length = 150 statistics = True
