This is an automated email from the ASF dual-hosted git repository. rskraba pushed a commit to branch branch-1.9 in repository https://gitbox.apache.org/repos/asf/avro.git
commit e77241990490b8cd06df4dd7ea02d59535fab745 Author: Michael A. Smith <[email protected]> AuthorDate: Fri Oct 25 16:51:47 2019 -0400 AVRO-2580: Enforce Logical Type and Literal Type Match (#668) * AVRO-2580: Refactor Schema.parse Test * AVRO-2580: Rewrite Tests to Highlight Bug * AVRO-2580: Require Logical Type to Match Literal Type * AVRO-2580: Refactor Schema Tests 1. Enable showing multiple failures in a single run. 2. Use JSON to format test schema when possible. --- lang/py/src/avro/schema.py | 61 ++-- lang/py/test/test_schema.py | 775 +++++++++++++++++--------------------------- 2 files changed, 330 insertions(+), 506 deletions(-) diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py index 776d6e1..822d76c 100644 --- a/lang/py/src/avro/schema.py +++ b/lang/py/src/avro/schema.py @@ -876,15 +876,36 @@ class TimestampMicrosSchema(LogicalSchema, PrimitiveSchema): # # Module Methods # -def get_other_props(all_props,reserved_props): +def get_other_props(all_props, reserved_props): """ Retrieve the non-reserved properties from a dictionary of properties @args reserved_props: The set of reserved properties to exclude """ if callable(getattr(all_props, 'items', None)): - return dict([(k,v) for (k,v) in all_props.items() if k not in - reserved_props ]) - + return {k: v for k, v in all_props.items() if k not in reserved_props} + +def make_bytes_decimal_schema(other_props): + """Make a BytesDecimalSchema from just other_props.""" + return BytesDecimalSchema(other_props.get('precision'), other_props.get('scale', 0)) + +def make_logical_schema(logical_type, type_, other_props): + """Map the logical types to the appropriate literal type and schema class.""" + logical_types = { + constants.DATE: ('int', DateSchema), + # Fixed decimal schema is handled before we get here. + constants.DECIMAL: ('bytes', make_bytes_decimal_schema), + constants.TIMESTAMP_MICROS: ('long', TimestampMicrosSchema), + constants.TIMESTAMP_MILLIS: ('long', TimestampMillisSchema), + constants.TIME_MICROS: ('long', TimeMicrosSchema), + constants.TIME_MILLIS: ('int', TimeMillisSchema), + } + try: + literal_type, schema_type = logical_types[logical_type] + except KeyError: + raise SchemaParseException("Currently does not support {} logical type".format(logical_type)) + if literal_type != type_: + raise SchemaParseException("Logical type {} requires literal type {}, not {}".format(logical_type, literal_type, type_)) + return schema_type(other_props) def make_avsc_object(json_data, names=None): """ @@ -892,35 +913,15 @@ def make_avsc_object(json_data, names=None): @arg names: A Name object (tracks seen names and default space) """ - if names == None: + if names is None: names = Names() # JSON object (non-union) if callable(getattr(json_data, 'get', None)): type = json_data.get('type') other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS) - logical_type = None - if 'logicalType' in json_data: - logical_type = json_data.get('logicalType') - if logical_type not in constants.SUPPORTED_LOGICAL_TYPE: - raise SchemaParseException("Currently does not support %s logical type" % logical_type) - if type in PRIMITIVE_TYPES: - if type == 'int' and logical_type == constants.DATE: - return DateSchema(other_props) - if type == 'int' and logical_type == constants.TIME_MILLIS: - return TimeMillisSchema(other_props=other_props) - if type == 'long' and logical_type == constants.TIME_MICROS: - return TimeMicrosSchema(other_props=other_props) - if type == 'long' and logical_type == constants.TIMESTAMP_MILLIS: - return TimestampMillisSchema(other_props=other_props) - if type == 'long' and logical_type == constants.TIMESTAMP_MICROS: - return TimestampMicrosSchema(other_props=other_props) - if type == 'bytes' and logical_type == constants.DECIMAL: - precision = json_data.get('precision') - scale = 0 if json_data.get('scale') is None else json_data.get('scale') - return BytesDecimalSchema(precision, scale, other_props) - return PrimitiveSchema(type, other_props) - elif type in NAMED_TYPES: + logical_type = json_data.get('logicalType') + if type in NAMED_TYPES: name = json_data.get('name') namespace = json_data.get('namespace', names.default_namespace) if type == 'fixed': @@ -940,7 +941,11 @@ def make_avsc_object(json_data, names=None): return RecordSchema(name, namespace, fields, names, type, doc, other_props) else: raise SchemaParseException('Unknown Named Type: %s' % type) - elif type in VALID_TYPES: + if logical_type: + return make_logical_schema(logical_type, type, other_props or {}) + if type in PRIMITIVE_TYPES: + return PrimitiveSchema(type, other_props) + if type in VALID_TYPES: if type == 'array': items = json_data.get('items') return ArraySchema(items, names, other_props) diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py index dd5aa8a..6b2c0ef 100644 --- a/lang/py/test/test_schema.py +++ b/lang/py/test/test_schema.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python + +## # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -5,17 +8,20 @@ # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at -# +# # https://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -""" -Test the schema parsing logic. -""" + +"""Test the schema parsing logic.""" + +from __future__ import absolute_import, division, print_function + +import json import unittest import set_avro_test_path @@ -23,356 +29,208 @@ from avro import schema from avro.schema import AvroException, SchemaParseException -def print_test_name(test_name): - print '' - print test_name - print '=' * len(test_name) - print '' +class TestSchema(object): + """A proxy for a schema string that provides useful test metadata.""" -class ExampleSchema(object): - def __init__(self, schema_string, valid, name='', comment=''): - self._schema_string = schema_string - self._valid = valid - self._name = name or schema_string # default to schema_string for name + def __init__(self, data, name='', comment=''): + if not isinstance(data, basestring): + data = json.dumps(data) + self.data = data + self.name = name or data # default to data for name self.comment = comment - @property - def schema_string(self): - return self._schema_string + def parse(self): + return schema.parse(str(self)) - @property - def valid(self): - return self._valid + def __str__(self): + return str(self.data) - @property - def name(self): - return self._name -# -# Example Schemas -# +class ValidTestSchema(TestSchema): + """A proxy for a valid schema string that provides useful test metadata.""" + valid = True + -def make_primitive_examples(): - examples = [] - for type in schema.PRIMITIVE_TYPES: - examples.append(ExampleSchema('"%s"' % type, True)) - examples.append(ExampleSchema('{"type": "%s"}' % type, True)) - return examples +class InvalidTestSchema(ValidTestSchema): + """A proxy for an invalid schema string that provides useful test metadata.""" + valid = False -PRIMITIVE_EXAMPLES = [ - ExampleSchema('"True"', False), - ExampleSchema('True', False), - ExampleSchema('{"no_type": "test"}', False), - ExampleSchema('{"type": "panther"}', False), -] + make_primitive_examples() + +PRIMITIVE_EXAMPLES = ([ + InvalidTestSchema('"True"'), + InvalidTestSchema('True'), + InvalidTestSchema('{"no_type": "test"}'), + InvalidTestSchema('{"type": "panther"}'), +] + [ValidTestSchema('"{}"'.format(t)) for t in schema.PRIMITIVE_TYPES] + + [ValidTestSchema({"type": t}) for t in schema.PRIMITIVE_TYPES]) FIXED_EXAMPLES = [ - ExampleSchema('{"type": "fixed", "name": "Test", "size": 1}', True), - ExampleSchema("""\ - {"type": "fixed", - "name": "MyFixed", - "namespace": "org.apache.hadoop.avro", - "size": 1} - """, True), - ExampleSchema("""\ - {"type": "fixed", - "name": "Missing size"} - """, False), - ExampleSchema("""\ - {"type": "fixed", - "size": 314} - """, False), + ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}), + ValidTestSchema({"type": "fixed", "name": "MyFixed", "size": 1, + "namespace": "org.apache.hadoop.avro"}), + InvalidTestSchema({"type": "fixed", "name": "Missing size"}), + InvalidTestSchema({"type": "fixed", "size": 314}), ] ENUM_EXAMPLES = [ - ExampleSchema('{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', True), - ExampleSchema("""\ - {"type": "enum", - "name": "Status", - "symbols": "Normal Caution Critical"} - """, False), - ExampleSchema("""\ - {"type": "enum", - "name": [ 0, 1, 1, 2, 3, 5, 8 ], - "symbols": ["Golden", "Mean"]} - """, False), - ExampleSchema("""\ - {"type": "enum", - "symbols" : ["I", "will", "fail", "no", "name"]} - """, False), - ExampleSchema("""\ - {"type": "enum", - "name": "Test" - "symbols" : ["AA", "AA"]} - """, False), + ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"]}), + InvalidTestSchema({"type": "enum", "name": "Status", "symbols": "Normal Caution Critical"}), + InvalidTestSchema({"type": "enum", "name": [0, 1, 1, 2, 3, 5, 8], + "symbols": ["Golden", "Mean"]}), + InvalidTestSchema({"type": "enum", "symbols" : ["I", "will", "fail", "no", "name"]}), + InvalidTestSchema({"type": "enum", "name": "Test", "symbols": ["AA", "AA"]}), ] ARRAY_EXAMPLES = [ - ExampleSchema('{"type": "array", "items": "long"}', True), - ExampleSchema("""\ - {"type": "array", - "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}} - """, True), + ValidTestSchema({"type": "array", "items": "long"}), + ValidTestSchema({"type": "array", + "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}), ] MAP_EXAMPLES = [ - ExampleSchema('{"type": "map", "values": "long"}', True), - ExampleSchema("""\ - {"type": "map", - "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}} - """, True), + ValidTestSchema({"type": "map", "values": "long"}), + ValidTestSchema({"type": "map", + "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}), ] UNION_EXAMPLES = [ - ExampleSchema('["string", "null", "long"]', True), - ExampleSchema('["null", "null"]', False), - ExampleSchema('["long", "long"]', False), - ExampleSchema("""\ - [{"type": "array", "items": "long"} - {"type": "array", "items": "string"}] - """, False), + ValidTestSchema(["string", "null", "long"]), + InvalidTestSchema(["null", "null"]), + InvalidTestSchema(["long", "long"]), + InvalidTestSchema([{"type": "array", "items": "long"}, + {"type": "array", "items": "string"}]), ] RECORD_EXAMPLES = [ - ExampleSchema("""\ - {"type": "record", - "name": "Test", - "fields": [{"name": "f", - "type": "long"}]} - """, True), - ExampleSchema("""\ - {"type": "error", - "name": "Test", - "fields": [{"name": "f", - "type": "long"}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "Node", - "fields": [{"name": "label", "type": "string"}, - {"name": "children", - "type": {"type": "array", "items": "Node"}}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "Lisp", - "fields": [{"name": "value", - "type": ["null", "string", - {"type": "record", - "name": "Cons", - "fields": [{"name": "car", "type": "Lisp"}, - {"name": "cdr", "type": "Lisp"}]}]}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "HandshakeRequest", - "namespace": "org.apache.avro.ipc", - "fields": [{"name": "clientHash", - "type": {"type": "fixed", "name": "MD5", "size": 16}}, - {"name": "clientProtocol", "type": ["null", "string"]}, - {"name": "serverHash", "type": "MD5"}, - {"name": "meta", - "type": ["null", {"type": "map", "values": "bytes"}]}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "HandshakeResponse", - "namespace": "org.apache.avro.ipc", - "fields": [{"name": "match", - "type": {"type": "enum", - "name": "HandshakeMatch", - "symbols": ["BOTH", "CLIENT", "NONE"]}}, - {"name": "serverProtocol", "type": ["null", "string"]}, - {"name": "serverHash", - "type": ["null", - {"name": "MD5", "size": 16, "type": "fixed"}]}, - {"name": "meta", - "type": ["null", {"type": "map", "values": "bytes"}]}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "Interop", - "namespace": "org.apache.avro", - "fields": [{"name": "intField", "type": "int"}, - {"name": "longField", "type": "long"}, - {"name": "stringField", "type": "string"}, - {"name": "boolField", "type": "boolean"}, - {"name": "floatField", "type": "float"}, - {"name": "doubleField", "type": "double"}, - {"name": "bytesField", "type": "bytes"}, - {"name": "nullField", "type": "null"}, - {"name": "arrayField", - "type": {"type": "array", "items": "double"}}, - {"name": "mapField", - "type": {"type": "map", - "values": {"name": "Foo", - "type": "record", - "fields": [{"name": "label", - "type": "string"}]}}}, - {"name": "unionField", - "type": ["boolean", - "double", - {"type": "array", "items": "bytes"}]}, - {"name": "enumField", - "type": {"type": "enum", - "name": "Kind", - "symbols": ["A", "B", "C"]}}, - {"name": "fixedField", - "type": {"type": "fixed", "name": "MD5", "size": 16}}, - {"name": "recordField", - "type": {"type": "record", - "name": "Node", - "fields": [{"name": "label", "type": "string"}, - {"name": "children", - "type": {"type": "array", - "items": "Node"}}]}}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "ipAddr", - "fields": [{"name": "addr", - "type": [{"name": "IPv6", "type": "fixed", "size": 16}, - {"name": "IPv4", "type": "fixed", "size": 4}]}]} - """, True), - ExampleSchema("""\ - {"type": "record", - "name": "Address", - "fields": [{"type": "string"}, - {"type": "string", "name": "City"}]} - """, False), - ExampleSchema("""\ - {"type": "record", - "name": "Event", - "fields": [{"name": "Sponsor"}, - {"name": "City", "type": "string"}]} - """, False), - ExampleSchema("""\ - {"type": "record", - "fields": "His vision, from the constantly passing bars," - "name", "Rainer"} - """, False), - ExampleSchema("""\ - {"name": ["Tom", "Jerry"], - "type": "record", - "fields": [{"name": "name", "type": "string"}]} - """, False), + ValidTestSchema({"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}), + ValidTestSchema({"type": "error", "name": "Test", "fields": [{"name": "f", "type": "long"}]}), + ValidTestSchema({"type": "record", "name": "Node", + "fields": [ + {"name": "label", "type": "string"}, + {"name": "children", "type": {"type": "array", "items": "Node"}}]}), + ValidTestSchema({"type": "record", "name": "Lisp", + "fields": [{"name": "value", + "type": ["null", "string", + {"type": "record", "name": "Cons", + "fields": [{"name": "car", "type": "Lisp"}, + {"name": "cdr", "type": "Lisp"}]}]}]}), + ValidTestSchema({"type": "record", "name": "HandshakeRequest", + "namespace": "org.apache.avro.ipc", + "fields": [{"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]}]}), + ValidTestSchema({"type": "record", "name": "HandshakeResponse", + "namespace": "org.apache.avro.ipc", + "fields": [{"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]}]}), + ValidTestSchema({"type": "record", + "name": "Interop", + "namespace": "org.apache.avro", + "fields": [{"name": "intField", "type": "int"}, + {"name": "longField", "type": "long"}, + {"name": "stringField", "type": "string"}, + {"name": "boolField", "type": "boolean"}, + {"name": "floatField", "type": "float"}, + {"name": "doubleField", "type": "double"}, + {"name": "bytesField", "type": "bytes"}, + {"name": "nullField", "type": "null"}, + {"name": "arrayField", "type": {"type": "array", "items": "double"}}, + {"name": "mapField", + "type": {"type": "map", + "values": {"name": "Foo", + "type": "record", + "fields": [{"name": "label", "type": "string"}]}}}, + {"name": "unionField", + "type": ["boolean", "double", {"type": "array", "items": "bytes"}]}, + {"name": "enumField", + "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}}, + {"name": "fixedField", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "recordField", + "type": {"type": "record", "name": "Node", + "fields": [{"name": "label", "type": "string"}, + {"name": "children", + "type": {"type": "array", + "items": "Node"}}]}}]}), + ValidTestSchema({"type": "record", "name": "ipAddr", + "fields": [{"name": "addr", "type": [{"name": "IPv6", "type": "fixed", "size": 16}, + {"name": "IPv4", "type": "fixed", "size": 4}]}]}), + InvalidTestSchema({"type": "record", "name": "Address", + "fields": [{"type": "string"}, {"type": "string", "name": "City"}]}), + InvalidTestSchema({"type": "record", "name": "Event", + "fields": [{"name": "Sponsor"}, {"name": "City", "type": "string"}]}), + InvalidTestSchema({"type": "record", "name": "Rainer", + "fields": "His vision, from the constantly passing bars"}), + InvalidTestSchema({"name": ["Tom", "Jerry"], "type": "record", + "fields": [{"name": "name", "type": "string"}]}), ] DOC_EXAMPLES = [ - ExampleSchema("""\ - {"type": "record", - "name": "TestDoc", - "doc": "Doc string", - "fields": [{"name": "name", "type": "string", - "doc" : "Doc String"}]} - """, True), - ExampleSchema("""\ - {"type": "enum", "name": "Test", "symbols": ["A", "B"], - "doc": "Doc String"} - """, True), + ValidTestSchema({"type": "record", "name": "TestDoc", "doc": "Doc string", + "fields": [{"name": "name", "type": "string", "doc" : "Doc String"}]}), + ValidTestSchema({"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}), ] OTHER_PROP_EXAMPLES = [ - ExampleSchema("""\ - {"type": "record", - "name": "TestRecord", - "cp_string": "string", - "cp_int": 1, - "cp_array": [ 1, 2, 3, 4], - "fields": [ {"name": "f1", "type": "string", "cp_object": {"a":1,"b":2} }, - {"name": "f2", "type": "long", "cp_null": null} ]} - """, True), - ExampleSchema("""\ - {"type": "map", "values": "long", "cp_boolean": true} - """, True), - ExampleSchema("""\ - {"type": "enum", - "name": "TestEnum", - "symbols": [ "one", "two", "three" ], - "cp_float" : 1.0 } - """,True), - ExampleSchema("""\ - {"type": "long", - "date": "true"} - """, True) + ValidTestSchema({"type": "record", "name": "TestRecord", "cp_string": "string", + "cp_int": 1, "cp_array": [1, 2, 3, 4], + "fields": [{"name": "f1", "type": "string", "cp_object": {"a": 1,"b": 2}}, + {"name": "f2", "type": "long", "cp_null": None}]}), + ValidTestSchema({"type": "map", "values": "long", "cp_boolean": True}), + ValidTestSchema({"type": "enum", "name": "TestEnum", + "symbols": ["one", "two", "three"], "cp_float": 1.0}), ] DECIMAL_LOGICAL_TYPE = [ - ExampleSchema("""{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 4, - "size": 10, - "scale": 2}""", True), - ExampleSchema("""{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4, - "scale": 2}""", True) + ValidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 4, "size": 10, "scale": 2}), + ValidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}), + InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": -2}), + InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": -2, "scale": 2}), + InvalidTestSchema({"type": "bytes", "logicalType": "decimal", "precision": 2, "scale": 3}), + InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": -10, "scale": 2, "size": 5}), + InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 2, "scale": 3, "size": 2}), + InvalidTestSchema({"type": "fixed", "logicalType": "decimal", "name": "TestDecimal", "precision": 2, "scale": 2, "size": -2}), ] DATE_LOGICAL_TYPE = [ - ExampleSchema("""{ - "type": "int", - "logicalType": "date"} """, True), - ExampleSchema("""{ - "type": "int", - "logicalType": "date1"} """, False), - ExampleSchema("""{ - "type": "long", - "logicalType": "date"} """, False), + ValidTestSchema({"type": "int", "logicalType": "date"}), + InvalidTestSchema({"type": "int", "logicalType": "date1"}), + InvalidTestSchema({"type": "long", "logicalType": "date"}), ] TIMEMILLIS_LOGICAL_TYPE = [ - ExampleSchema("""{ - "type": "int", - "logicalType": "time-millis"} """, True), - ExampleSchema("""{ - "type": "int", - "logicalType": "time-milis"} """, False), - ExampleSchema("""{ - "type": "long", - "logicalType": "time-millis"} """, False), + ValidTestSchema({"type": "int", "logicalType": "time-millis"}), + InvalidTestSchema({"type": "int", "logicalType": "time-milis"}), + InvalidTestSchema({"type": "long", "logicalType": "time-millis"}), ] TIMEMICROS_LOGICAL_TYPE = [ - ExampleSchema("""{ - "type": "long", - "logicalType": "time-micros"} """, True), - ExampleSchema("""{ - "type": "long", - "logicalType": "time-micro"} """, False), - ExampleSchema("""{ - "type": "int", - "logicalType": "time-micros"} """, False), + ValidTestSchema({"type": "long", "logicalType": "time-micros"}), + InvalidTestSchema({"type": "long", "logicalType": "time-micro"}), + InvalidTestSchema({"type": "int", "logicalType": "time-micros"}), ] TIMESTAMPMILLIS_LOGICAL_TYPE = [ - ExampleSchema("""{ - "type": "long", - "logicalType": "timestamp-millis"} """, True), - ExampleSchema("""{ - "type": "long", - "logicalType": "timestamp-milis"} """, False), - ExampleSchema("""{ - "type": "int", - "logicalType": "timestamp-millis"} """, False), + ValidTestSchema({"type": "long", "logicalType": "timestamp-millis"}), + InvalidTestSchema({"type": "long", "logicalType": "timestamp-milis"}), + InvalidTestSchema({"type": "int", "logicalType": "timestamp-millis"}), ] TIMESTAMPMICROS_LOGICAL_TYPE = [ - ExampleSchema("""{ - "type": "long", - "logicalType": "timestamp-micros"} """, True), - ExampleSchema("""{ - "type": "long", - "logicalType": "timestamp-micro"} """, False), - ExampleSchema("""{ - "type": "int", - "logicalType": "timestamp-micros"} """, False), + ValidTestSchema({"type": "long", "logicalType": "timestamp-micros"}), + InvalidTestSchema({"type": "long", "logicalType": "timestamp-micro"}), + InvalidTestSchema({"type": "int", "logicalType": "timestamp-micros"}), ] - EXAMPLES = PRIMITIVE_EXAMPLES EXAMPLES += FIXED_EXAMPLES EXAMPLES += ENUM_EXAMPLES @@ -389,81 +247,22 @@ EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE VALID_EXAMPLES = [e for e in EXAMPLES if e.valid] +INVALID_EXAMPLES = [e for e in EXAMPLES if not e.valid] -# TODO(hammer): refactor into harness for examples -# TODO(hammer): pretty-print detailed output -# TODO(hammer): make verbose flag -# TODO(hammer): show strack trace to user -# TODO(hammer): use logging module? class TestSchema(unittest.TestCase): + """Miscellaneous tests for schema""" def test_correct_recursive_extraction(self): + """A recursive reference within a schema should be the same type every time.""" s = schema.parse('{"type": "record", "name": "X", "fields": [{"name": "y", "type": {"type": "record", "name": "Y", "fields": [{"name": "Z", "type": "X"}]}}]}') t = schema.parse(str(s.fields[0].type)) # If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed. self.assertEqual("X", t.fields[0].type.name) - def test_parse(self): - correct = 0 - for example in EXAMPLES: - try: - schema.parse(example.schema_string) - if example.valid: - correct += 1 - else: - self.fail("Invalid schema was parsed: " + example.schema_string) - except: - if not example.valid: - correct += 1 - else: - self.fail("Valid schema failed to parse: " + example.schema_string) - - fail_msg = "Parse behavior correct on %d out of %d schemas." % \ - (correct, len(EXAMPLES)) - self.assertEqual(correct, len(EXAMPLES), fail_msg) - - def test_valid_cast_to_string_after_parse(self): - """ - Test that the string generated by an Avro Schema object - is, in fact, a valid Avro schema. - """ - print_test_name('TEST CAST TO STRING AFTER PARSE') - correct = 0 - for example in VALID_EXAMPLES: - schema_data = schema.parse(example.schema_string) - schema.parse(str(schema_data)) - correct += 1 - - fail_msg = "Cast to string success on %d out of %d schemas" % \ - (correct, len(VALID_EXAMPLES)) - self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg) - - def test_equivalence_after_round_trip(self): - """ - 1. Given a string, parse it to get Avro schema "original". - 2. Serialize "original" to a string and parse that string - to generate Avro schema "round trip". - 3. Ensure "original" and "round trip" schemas are equivalent. - """ - print_test_name('TEST ROUND TRIP') - correct = 0 - for example in VALID_EXAMPLES: - original_schema = schema.parse(example.schema_string) - round_trip_schema = schema.parse(str(original_schema)) - if original_schema == round_trip_schema: - correct += 1 - debug_msg = "%s: ROUND TRIP SUCCESS" % example.name - else: - debug_msg = "%s: ROUND TRIP FAILURE" % example.name - self.fail("Round trip failure: %s, %s, %s" % (example.name, original_schema, str(original_schema))) - - fail_msg = "Round trip success on %d out of %d schemas" % \ - (correct, len(VALID_EXAMPLES)) - self.assertEqual(correct, len(VALID_EXAMPLES), fail_msg) - # TODO(hammer): more tests def test_fullname(self): - """ + """Test schema full names + The fullname is determined in one of the following ways: * A name and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" @@ -490,16 +289,15 @@ class TestSchema(unittest.TestCase): multiple definitions of a fullname if the definitions are equivalent. """ - print_test_name('TEST FULLNAME') - # name and namespace specified + # name and namespace specified fullname = schema.Name('a', 'o.a.h', None).fullname self.assertEqual(fullname, 'o.a.h.a') # fullname and namespace specified fullname = schema.Name('a.b.c.d', 'o.a.h', None).fullname self.assertEqual(fullname, 'a.b.c.d') - + # name and default namespace specified fullname = schema.Name('a', None, 'b.c.d').fullname self.assertEqual(fullname, 'b.c.d.a') @@ -516,58 +314,12 @@ class TestSchema(unittest.TestCase): fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname self.assertEqual(fullname, 'o.a.a.a') - def test_doc_attributes(self): - print_test_name('TEST DOC ATTRIBUTES') - correct = 0 - for example in DOC_EXAMPLES: - original_schema = schema.parse(example.schema_string) - if original_schema.doc is not None: - correct += 1 - if original_schema.type == 'record': - for f in original_schema.fields: - if f.doc is None: - self.fail("Failed to preserve 'doc' in fields: " + example.schema_string) - self.assertEqual(correct,len(DOC_EXAMPLES)) - - def test_other_attributes(self): - print_test_name('TEST OTHER ATTRIBUTES') - correct = 0 - props = {} - for example in OTHER_PROP_EXAMPLES: - original_schema = schema.parse(example.schema_string) - round_trip_schema = schema.parse(str(original_schema)) - self.assertEqual(original_schema.other_props,round_trip_schema.other_props) - if original_schema.type == "record": - field_props = 0 - for f in original_schema.fields: - if f.other_props: - props.update(f.other_props) - field_props += 1 - self.assertEqual(field_props,len(original_schema.fields)) - if original_schema.other_props: - props.update(original_schema.other_props) - correct += 1 - for k in props: - v = props[k] - if k == "cp_boolean": - self.assertEqual(type(v), bool) - elif k == "cp_int": - self.assertEqual(type(v), int) - elif k == "cp_object": - self.assertEqual(type(v), dict) - elif k == "cp_float": - self.assertEqual(type(v), float) - elif k == "cp_array": - self.assertEqual(type(v), list) - self.assertEqual(correct,len(OTHER_PROP_EXAMPLES)) - def test_exception_is_not_swallowed_on_parse_error(self): - print_test_name('TEST EXCEPTION NOT SWALLOWED ON PARSE ERROR') - + """A specific exception message should appear on a json parse error.""" try: schema.parse('/not/a/real/file') caught_exception = False - except schema.SchemaParseException, e: + except schema.SchemaParseException as e: expected_message = 'Error parsing JSON: /not/a/real/file, error = ' \ 'No JSON object could be decoded' self.assertEqual(expected_message, e.args[0]) @@ -575,78 +327,145 @@ class TestSchema(unittest.TestCase): self.assertTrue(caught_exception, 'Exception was not caught') - def test_decimal_invalid_schema(self): - invalid_schemas = [ - ExampleSchema("""{ - "type": "bytes", - "logicalType": "decimal", - "precision": 2, - "scale": -2}""", True), - - ExampleSchema("""{ - "type": "bytes", - "logicalType": "decimal", - "precision": -2, - "scale": 2}""", True), - - ExampleSchema("""{ - "type": "bytes", - "logicalType": "decimal", - "precision": 2, - "scale": 3}""", True), - - ExampleSchema("""{ + def test_decimal_valid_type(self): + fixed_decimal_schema = ValidTestSchema({ "type": "fixed", "logicalType": "decimal", "name": "TestDecimal", - "precision": -10, + "precision": 4, "scale": 2, - "size": 5}""", True), - + "size": 2}) - ExampleSchema("""{ - "type": "fixed", + bytes_decimal_schema = ValidTestSchema({ + "type": "bytes", "logicalType": "decimal", - "name": "TestDecimal", - "precision": 2, - "scale": 3, - "size": 2}""", True) - ] - - for invalid_schema in invalid_schemas: - self.assertRaises(SchemaParseException, schema.parse, invalid_schema.schema_string) - - fixed_invalid_schema_size = ExampleSchema("""{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 2, - "scale": 2, - "size": -2}""", True) - self.assertRaises(AvroException, schema.parse, fixed_invalid_schema_size.schema_string) + "precision": 4}) - def test_decimal_valid_type(self): - fixed_decimal_schema = ExampleSchema("""{ - "type": "fixed", - "logicalType": "decimal", - "name": "TestDecimal", - "precision": 4, - "scale": 2, - "size": 2}""", True) - - bytes_decimal_schema = ExampleSchema("""{ - "type": "bytes", - "logicalType": "decimal", - "precision": 4}""", True) - - fixed_decimal = schema.parse(fixed_decimal_schema.schema_string) + fixed_decimal = fixed_decimal_schema.parse() self.assertEqual(4, fixed_decimal.get_prop('precision')) self.assertEqual(2, fixed_decimal.get_prop('scale')) self.assertEqual(2, fixed_decimal.get_prop('size')) - bytes_decimal = schema.parse(bytes_decimal_schema.schema_string) + bytes_decimal = bytes_decimal_schema.parse() self.assertEqual(4, bytes_decimal.get_prop('precision')) self.assertEqual(0, bytes_decimal.get_prop('scale')) +class SchemaParseTestCase(unittest.TestCase): + """Enable generating parse test cases over all the valid and invalid example schema.""" + + def __init__(self, test_schema): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super(SchemaParseTestCase, self).__init__( + 'parse_valid' if test_schema.valid else 'parse_invalid') + self.test_schema = test_schema + + def parse_valid(self): + """Parsing a valid schema should not error.""" + try: + self.test_schema.parse() + except (schema.AvroException, schema.SchemaParseException): + self.fail("Valid schema failed to parse: {!s}".format(self.test_schema)) + + def parse_invalid(self): + """Parsing an invalid schema should error.""" + try: + self.test_schema.parse() + except (schema.AvroException, schema.SchemaParseException): + pass + else: + self.fail("Invalid schema should not have parsed: {!s}".format(self.test_schema)) + +class RoundTripParseTestCase(unittest.TestCase): + """Enable generating round-trip parse test cases over all the valid test schema.""" + + def __init__(self, test_schema): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super(RoundTripParseTestCase, self).__init__('parse_round_trip') + self.test_schema = test_schema + + def parse_round_trip(self): + """The string of a Schema should be parseable to the same Schema.""" + parsed = self.test_schema.parse() + round_trip = schema.parse(str(parsed)) + self.assertEqual(parsed, round_trip) + +class DocAttributesTestCase(unittest.TestCase): + """Enable generating document attribute test cases over all the document test schema.""" + + def __init__(self, test_schema): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super(DocAttributesTestCase, self).__init__('check_doc_attributes') + self.test_schema = test_schema + + def check_doc_attributes(self): + """Documentation attributes should be preserved.""" + sch = self.test_schema.parse() + self.assertIsNotNone(sch.doc, "Failed to preserve 'doc' in schema: {!s}".format(self.test_schema)) + if sch.type == 'record': + for f in sch.fields: + self.assertIsNotNone(f.doc, "Failed to preserve 'doc' in fields: {!s}".format(self.test_schema)) + + +class OtherAttributesTestCase(unittest.TestCase): + """Enable generating attribute test cases over all the other-prop test schema.""" + _type_map = { + "cp_array": list, + "cp_boolean": bool, + "cp_float": float, + "cp_int": int, + "cp_null": type(None), + "cp_object": dict, + "cp_string": basestring, + } + + def __init__(self, test_schema): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super(OtherAttributesTestCase, self).__init__('check_attributes') + self.test_schema = test_schema + + def _check_props(self, props): + for k, v in props.items(): + self.assertIsInstance(v, self._type_map[k]) + + def check_attributes(self): + """Other attributes and their types on a schema should be preserved.""" + sch = self.test_schema.parse() + round_trip = schema.parse(str(sch)) + self.assertEqual(sch.other_props, round_trip.other_props, + "Properties were not preserved in a round-trip parse.") + self._check_props(sch.other_props) + if sch.type == "record": + field_props = [f.other_props for f in sch.fields if f.other_props] + self.assertEqual(len(field_props), len(sch.fields)) + for p in field_props: + self._check_props(p) + + +def load_tests(loader, default_tests, pattern): + """Generate test cases across many test schema.""" + suite = unittest.TestSuite() + suite.addTests(loader.loadTestsFromTestCase(TestSchema)) + suite.addTests(SchemaParseTestCase(ex) for ex in EXAMPLES) + suite.addTests(RoundTripParseTestCase(ex) for ex in VALID_EXAMPLES) + suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES) + suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES) + return suite + if __name__ == '__main__': unittest.main()
