This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new c769c43 AVRO-1938: Generate Parsing Canonical Forms of Schema (#1167)
c769c43 is described below
commit c769c4343e24d2473417afe46b37a1b60d7ca84f
Author: Subhash Bhushan <[email protected]>
AuthorDate: Mon Apr 5 06:41:47 2021 -0700
AVRO-1938: Generate Parsing Canonical Forms of Schema (#1167)
* AVRO-1938: Generate Parsing Canonical Forms of Schema
This PR adds support for generating Parsing Canonical Forms of Avro Schemas
to the main `avro` package.
The bulk of work was done by @kojiromike and @forsberg. This PR cleans up
code
where necessary, adds more test cases, and clarifies on transformations
where
not applicable in Python (ex. Transformation of integers with leading zeros)
Closes: https://issues.apache.org/jira/browse/AVRO-1938
* AVRO-1938 Fix typo of repeating test method name
* AVRO-1938: Makes `names` argument to `to_canonical_json` optional
This commit ensures that `names` is an optional parameter in all subclass
implementations of `to_canonical_json`. It also addresses minor formatting
issues and review comments to previous commits.
* AVRO-1938 Use `dict.get` to return value in dict or default
---
lang/py/avro/protocol.py | 6 +-
lang/py/avro/schema.py | 197 ++++++++++++++----
lang/py/avro/test/test_schema.py | 419 +++++++++++++++++++++++++++++++++++++++
3 files changed, 580 insertions(+), 42 deletions(-)
diff --git a/lang/py/avro/protocol.py b/lang/py/avro/protocol.py
index 483f63e..afe6d63 100644
--- a/lang/py/avro/protocol.py
+++ b/lang/py/avro/protocol.py
@@ -209,13 +209,14 @@ class Message:
return json.dumps(self.to_json())
def to_json(self, names=None):
- if names is None:
- names = avro.schema.Names()
+ names = names or avro.schema.Names()
+
to_dump = {}
to_dump['request'] = self.request.to_json(names)
to_dump['response'] = self.response.to_json(names)
if self.errors:
to_dump['errors'] = self.errors.to_json(names)
+
return to_dump
def __eq__(self, that):
@@ -231,6 +232,7 @@ def make_avpr_object(json_data):
messages = json_data.get('messages')
except AttributeError:
raise avro.errors.ProtocolParseException('Not a JSON object: %s' %
json_data)
+
return Protocol(name, namespace, types, messages)
diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py
index 2795268..e4021fa 100644
--- a/lang/py/avro/schema.py
+++ b/lang/py/avro/schema.py
@@ -41,6 +41,7 @@ A schema may be one of:
"""
import abc
+import collections
import datetime
import decimal
import json
@@ -113,6 +114,16 @@ VALID_FIELD_SORT_ORDERS = (
'ignore',
)
+CANONICAL_FIELD_ORDER = (
+ 'name',
+ 'type',
+ 'fields',
+ 'symbols',
+ 'items',
+ 'values',
+ 'size',
+)
+
INT_MIN_VALUE = -(1 << 31)
INT_MAX_VALUE = (1 << 31) - 1
LONG_MIN_VALUE = -(1 << 63)
@@ -136,7 +147,18 @@ def _is_timezone_aware_datetime(dt):
# Base Classes
#
-class Schema(abc.ABC):
+class CanonicalPropertiesMixin(object):
+ """A Mixin that provides canonical properties to Schema and Field types."""
+ @property
+ def canonical_properties(self):
+ props = self.props
+ return collections.OrderedDict(
+ (key, props[key])
+ for key in CANONICAL_FIELD_ORDER
+ if key in props)
+
+
+class Schema(abc.ABC, CanonicalPropertiesMixin):
"""Base class for all Schema classes."""
_props = None
@@ -202,9 +224,12 @@ class Schema(abc.ABC):
in the parameter names.
"""
+ @abc.abstractmethod
def validate(self, datum):
"""Returns the appropriate schema object if datum is valid for that
schema, else None.
+ To be implemented in subclasses.
+
Validation concerns only shape and type of data in the top level of
the current schema.
In most cases, the returned schema object will be self. However, for
UnionSchema objects,
the returned Schema will be the first branch schema for which
validation passes.
@@ -212,7 +237,20 @@ class Schema(abc.ABC):
@arg datum: The data to be checked for validity according to this
schema
@return Optional[Schema]
"""
- raise Exception("Must be implemented by subclasses.")
+
+ @abc.abstractmethod
+ def to_canonical_json(self, names=None):
+ """
+ Converts the schema object into its Canonical Form
+
http://avro.apache.org/docs/current/spec.html#Parsing+Canonical+Form+for+Schemas
+
+ To be implemented in subclasses.
+ """
+
+ @property
+ def canonical_form(self):
+ # The separators eliminate whitespace around commas and colons.
+ return json.dumps(self.to_canonical_json(), separators=(",", ":"))
class Name:
@@ -223,22 +261,24 @@ class Name:
def __init__(self, name_attr, space_attr, default_space):
"""The fullname is determined in one of the following ways:
- - A name and namespace are both specified. For example, one might use
"name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X.
+ - A name and namespace are both specified. For example, one might use
"name": "X",
+ "namespace": "org.foo" to indicate the fullname org.foo.X.
- A fullname is specified. If the name specified contains a dot,
- then it is assumed to be a fullname, and any namespace also
specified is ignored.
- For example, use "name": "org.foo.X" to indicate the fullname
org.foo.X.
+ then it is assumed to be a fullname, and any namespace also
specified is ignored.
+ For example, use "name": "org.foo.X" to indicate the fullname
org.foo.X.
- A name only is specified, i.e., a name that contains no dots.
- In this case the namespace is taken from the most tightly enclosing
schema or protocol.
- For example, if "name": "X" is specified, and this occurs within a
field of
- the record definition of org.foo.Y, then the fullname is org.foo.X.
- If there is no enclosing namespace then the null namespace is used.
+ In this case the namespace is taken from the most tightly
enclosing schema or protocol.
+ For example, if "name": "X" is specified, and this occurs within a
field of
+ the record definition of org.foo.Y, then the fullname is org.foo.X.
+ If there is no enclosing namespace then the null namespace is used.
References to previously defined names are as in the latter two cases
above:
if they contain a dot they are a fullname,
if they do not contain a dot, the namespace is the namespace of the
enclosing definition.
@arg name_attr: name value read in schema or None.
- @arg space_attr: namespace value read in schema or None. The empty
string may be used as a namespace to indicate the null namespace.
+ @arg space_attr: namespace value read in schema or None. The empty
string may be used as a namespace
+ to indicate the null namespace.
@arg default_space: the current default space or None.
"""
if name_attr is None:
@@ -291,9 +331,7 @@ class Names:
def get_name(self, name_attr, space_attr):
test = Name(name_attr, space_attr, self.default_namespace).fullname
- if test not in self.names:
- return None
- return self.names[test]
+ return self.names.get(test)
def prune_namespace(self, properties):
"""given a properties, return properties with namespace removed if
@@ -301,12 +339,15 @@ class Names:
if self.default_namespace is None:
# I have no default -- no change
return properties
+
if 'namespace' not in properties:
# he has no namespace - no change
return properties
+
if properties['namespace'] != self.default_namespace:
# we're different - leave his stuff alone
return properties
+
# we each have a namespace and it's redundant. delete his.
prunable = properties.copy()
del(prunable['namespace'])
@@ -316,10 +357,10 @@ class Names:
"""
Add a new schema object to the name set.
- @arg name_attr: name value read in schema
- @arg space_attr: namespace value read in schema.
+ @arg name_attr: name value read in schema
+ @arg space_attr: namespace value read in schema.
- @return: the Name that was just added.
+ @return: the Name that was just added.
"""
to_add = Name(name_attr, space_attr, self.default_namespace)
@@ -407,7 +448,7 @@ class DecimalLogicalSchema(LogicalSchema):
super(DecimalLogicalSchema, self).__init__('decimal')
-class Field:
+class Field(CanonicalPropertiesMixin):
def __init__(self, type, name, has_default, default=None,
order=None, names=None, doc=None, other_props=None):
# Ensure valid ctor args
@@ -469,10 +510,19 @@ class Field:
return json.dumps(self.to_json())
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
to_dump = self.props.copy()
to_dump['type'] = self.type.to_json(names)
+
+ return to_dump
+
+ def to_canonical_json(self, names=None):
+ names = names or Names()
+
+ to_dump = self.canonical_properties
+ to_dump["type"] = self.type.to_canonical_json(names)
+
return to_dump
def __eq__(self, that):
@@ -526,6 +576,9 @@ class PrimitiveSchema(Schema):
else:
return self.props
+ def to_canonical_json(self, names=None):
+ return self.fullname if len(self.props) == 1 else
self.canonical_properties
+
def validate(self, datum):
"""Return self if datum is a valid representation of this type of
primitive schema, else None
@@ -593,13 +646,19 @@ class FixedSchema(NamedSchema):
return self.type == writer.type and self.check_props(writer,
['fullname', 'size'])
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
if self.fullname in names.names:
return self.name_ref(names)
- else:
- names.names[self.fullname] = self
- return names.prune_namespace(self.props)
+
+ names.names[self.fullname] = self
+ return names.prune_namespace(self.props)
+
+ def to_canonical_json(self, names=None):
+ to_dump = self.canonical_properties
+ to_dump["name"] = self.fullname
+
+ return to_dump
def validate(self, datum):
"""Return self if datum is a valid representation of this schema, else
None."""
@@ -673,13 +732,24 @@ class EnumSchema(NamedSchema):
return self.type == writer.type and self.check_props(writer,
['fullname'])
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
if self.fullname in names.names:
return self.name_ref(names)
+
+ names.names[self.fullname] = self
+ return names.prune_namespace(self.props)
+
+ def to_canonical_json(self, names=None):
+ names_as_json = self.to_json(names)
+
+ if isinstance(names_as_json, str):
+ to_dump = self.fullname
else:
- names.names[self.fullname] = self
- return names.prune_namespace(self.props)
+ to_dump = self.canonical_properties
+ to_dump["name"] = self.fullname
+
+ return to_dump
def validate(self, datum):
"""Return self if datum is a valid member of this Enum, else None."""
@@ -722,11 +792,21 @@ class ArraySchema(Schema):
return self.type == writer.type and
self.items.check_props(writer.items, ['type'])
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
to_dump = self.props.copy()
item_schema = self.get_prop('items')
to_dump['items'] = item_schema.to_json(names)
+
+ return to_dump
+
+ def to_canonical_json(self, names=None):
+ names = names or Names()
+
+ to_dump = self.canonical_properties
+ item_schema = self.get_prop("items")
+ to_dump["items"] = item_schema.to_canonical_json(names)
+
return to_dump
def validate(self, datum):
@@ -768,10 +848,19 @@ class MapSchema(Schema):
return writer.type == self.type and
self.values.check_props(writer.values, ['type'])
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
to_dump = self.props.copy()
to_dump['values'] = self.get_prop('values').to_json(names)
+
+ return to_dump
+
+ def to_canonical_json(self, names=None):
+ names = names or Names()
+
+ to_dump = self.canonical_properties
+ to_dump["values"] = self.get_prop("values").to_canonical_json(names)
+
return to_dump
def validate(self, datum):
@@ -829,13 +918,19 @@ class UnionSchema(Schema):
return writer.type in {'union', 'error_union'} or any(s.match(writer)
for s in self.schemas)
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
to_dump = []
for schema in self.schemas:
to_dump.append(schema.to_json(names))
+
return to_dump
+ def to_canonical_json(self, names=None):
+ names = names or Names()
+
+ return [schema.to_canonical_json(names) for schema in self.schemas]
+
def validate(self, datum):
"""Return the first branch schema of which datum is a valid example,
else None."""
for branch in self.schemas:
@@ -853,14 +948,15 @@ class ErrorUnionSchema(UnionSchema):
UnionSchema.__init__(self, ['string'] + schemas, names)
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
to_dump = []
for schema in self.schemas:
# Don't print the system error schema
if schema.type == 'string':
continue
to_dump.append(schema.to_json(names))
+
return to_dump
@@ -948,8 +1044,8 @@ class RecordSchema(NamedSchema):
return fields_dict
def to_json(self, names=None):
- if names is None:
- names = Names()
+ names = names or Names()
+
# Request records don't have names
if self.type == 'request':
return [f.to_json(names) for f in self.fields]
@@ -961,6 +1057,24 @@ class RecordSchema(NamedSchema):
to_dump = names.prune_namespace(self.props.copy())
to_dump['fields'] = [f.to_json(names) for f in self.fields]
+
+ return to_dump
+
+ def to_canonical_json(self, names=None):
+ names = names or Names()
+
+ if self.type == 'request':
+ raise NotImplementedError("Canonical form (probably) does not make
sense on type request")
+
+ to_dump = self.canonical_properties
+ to_dump["name"] = self.fullname
+
+ if self.fullname in names.names:
+ return self.name_ref(names)
+
+ names.names[self.fullname] = self
+ to_dump["fields"] = [f.to_canonical_json(names) for f in self.fields]
+
return to_dump
def validate(self, datum):
@@ -1124,18 +1238,19 @@ def make_avsc_object(json_data, names=None,
validate_enum_symbols=True):
@arg names: A Names object (tracks seen names and default space)
@arg validate_enum_symbols: If False, will allow enum symbols that are not
valid Avro names.
"""
- if names is None:
- names = Names()
+ names = names or Names()
# JSON object (non-union)
if callable(getattr(json_data, 'get', None)):
type = json_data.get('type')
other_props = get_other_props(json_data, SCHEMA_RESERVED_PROPS)
logical_type = json_data.get('logicalType')
+
if logical_type:
logical_schema = make_logical_schema(logical_type, type,
other_props or {})
if logical_schema is not None:
return logical_schema
+
if type in NAMED_TYPES:
name = json_data.get('name')
namespace = json_data.get('namespace', names.default_namespace)
@@ -1159,8 +1274,10 @@ def make_avsc_object(json_data, names=None,
validate_enum_symbols=True):
return RecordSchema(name, namespace, fields, names, type, doc,
other_props)
else:
raise avro.errors.SchemaParseException('Unknown Named Type:
%s' % type)
+
if type in PRIMITIVE_TYPES:
return PrimitiveSchema(type, other_props)
+
if type in VALID_TYPES:
if type == 'array':
items = json_data.get('items')
diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py
index 7ce5a1c..fceb973 100644
--- a/lang/py/avro/test/test_schema.py
+++ b/lang/py/avro/test/test_schema.py
@@ -105,6 +105,28 @@ UNION_EXAMPLES = [
{"type": "array", "items": "string"}]),
]
+NAMED_IN_UNION_EXAMPLES = [
+ ValidTestSchema({
+ "namespace": "org.apache.avro.test",
+ "type": "record",
+ "name": "Test",
+ "fields": [
+ {
+ "type": {
+ "symbols": ["one", "two"],
+ "type": "enum",
+ "name": "NamedEnum"
+ },
+ "name": "thenamedenum"
+ },
+ {
+ "type": ["null", "NamedEnum"],
+ "name": "unionwithreftoenum"
+ }
+ ]
+ })
+]
+
RECORD_EXAMPLES = [
ValidTestSchema({"type": "record", "name": "Test", "fields": [{"name":
"f", "type": "long"}]}),
ValidTestSchema({"type": "error", "name": "Test", "fields": [{"name": "f",
"type": "long"}]}),
@@ -295,6 +317,7 @@ EXAMPLES += ENUM_EXAMPLES
EXAMPLES += ARRAY_EXAMPLES
EXAMPLES += MAP_EXAMPLES
EXAMPLES += UNION_EXAMPLES
+EXAMPLES += NAMED_IN_UNION_EXAMPLES
EXAMPLES += RECORD_EXAMPLES
EXAMPLES += DOC_EXAMPLES
EXAMPLES += DECIMAL_LOGICAL_TYPE
@@ -583,6 +606,401 @@ class OtherAttributesTestCase(unittest.TestCase):
self._check_props(p)
+class CanonicalFormTestCase(unittest.TestCase):
+ r"""Enable generating canonical-form test cases over the valid schema.
+ Transforming into Parsing Canonical Form
+ Assuming an input schema (in JSON form) that's already UTF-8 text for
a valid Avro schema (including all
+ quotes as required by JSON), the following transformations will
produce its Parsing Canonical Form:
+ - [PRIMITIVES] Convert primitive schemas to their simple form
(e.g., int instead of {"type":"int"}).
+ - [FULLNAMES] Replace short names with fullnames, using applicable
namespaces to do so. Then eliminate
+ namespace attributes, which are now redundant.
+ - [STRIP] Keep only attributes that are relevant to parsing data,
which are: type, name, fields, symbols,
+ items, values, size. Strip all others (e.g., doc and aliases).
+ - [ORDER] Order the appearance of fields of JSON objects as
follows: name, type, fields, symbols, items,
+ values, size. For example, if an object has type, name, and
size fields, then the name field should
+ appear first, followed by the type and then the size fields.
+ - [STRINGS] For all JSON string literals in the schema text,
replace any escaped characters
+ (e.g., \uXXXX escapes) with their UTF-8 equivalents.
+ - [INTEGERS] Eliminate quotes around and any leading zeros in
front of JSON integer literals
+ (which appear in the size attributes of fixed schemas).
+ - [WHITESPACE] Eliminate all whitespace in JSON outside of string
literals.
+ We depend on the Python json parser to properly handle the STRINGS and
INTEGERS rules, so
+ we don't test them here.
+ """
+
+ def compact_json_string(self, json_doc):
+ """Returns compact-encoded JSON string representation for supplied
document.
+
+ Args:
+ json_doc (json): JSON Document
+
+ Returns:
+ str: Compact-encoded, stringified JSON document
+ """
+ return json.dumps(json_doc, separators=(',', ':'))
+
+ def test_primitive_int(self):
+ """
+ Convert primitive schemas to their simple form (e.g., int instead of
{"type":"int"}).
+ """
+ s = avro.schema.parse(json.dumps('int'))
+ self.assertEqual(s.canonical_form, '"int"')
+
+ s = avro.schema.parse(json.dumps({"type": "int"}))
+ self.assertEqual(s.canonical_form, '"int"')
+
+ def test_primitive_float(self):
+ s = avro.schema.parse(json.dumps('float'))
+ self.assertEqual(s.canonical_form, '"float"')
+
+ s = avro.schema.parse(json.dumps({"type": "float"}))
+ self.assertEqual(s.canonical_form, '"float"')
+
+ def test_primitive_double(self):
+ s = avro.schema.parse(json.dumps('double'))
+ self.assertEqual(s.canonical_form, '"double"')
+
+ s = avro.schema.parse(json.dumps({"type": "double"}))
+ self.assertEqual(s.canonical_form, '"double"')
+
+ def test_primitive_null(self):
+ s = avro.schema.parse(json.dumps('null'))
+ self.assertEqual(s.canonical_form, '"null"')
+
+ s = avro.schema.parse(json.dumps({"type": "null"}))
+ self.assertEqual(s.canonical_form, '"null"')
+
+ def test_primitive_bytes(self):
+ s = avro.schema.parse(json.dumps('bytes'))
+ self.assertEqual(s.canonical_form, '"bytes"')
+
+ s = avro.schema.parse(json.dumps({"type": "bytes"}))
+ self.assertEqual(s.canonical_form, '"bytes"')
+
+ def test_primitive_long(self):
+ s = avro.schema.parse(json.dumps('long'))
+ self.assertEqual(s.canonical_form, '"long"')
+
+ s = avro.schema.parse(json.dumps({"type": "long"}))
+ self.assertEqual(s.canonical_form, '"long"')
+
+ def test_primitive_boolean(self):
+ s = avro.schema.parse(json.dumps('boolean'))
+ self.assertEqual(s.canonical_form, '"boolean"')
+
+ s = avro.schema.parse(json.dumps({"type": "boolean"}))
+ self.assertEqual(s.canonical_form, '"boolean"')
+
+ def test_primitive_string(self):
+ s = avro.schema.parse(json.dumps('string'))
+ self.assertEqual(s.canonical_form, '"string"')
+
+ s = avro.schema.parse(json.dumps({"type": "string"}))
+ self.assertEqual(s.canonical_form, '"string"')
+
+ def test_integer_canonical_form(self):
+ """
+ Integer literals starting with 0 are illegal in python, because of
ambiguity. This is a placeholder test
+ for INTEGERS canonical form, which should generally succeed provided a
valid integer has been supplied.
+ """
+ s = avro.schema.parse('{"name":"md5","type":"fixed","size":16}')
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "name": "md5",
+ "type": "fixed",
+ "size": 16}))
+
+ def test_string_with_escaped_characters(self):
+ """
+ Replace any escaped characters (e.g., \u0031 escapes) with their UTF-8
equivalents.
+ """
+ s = avro.schema.parse('{"name":"\u0041","type":"fixed","size":16}')
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "name": "A",
+ "type": "fixed",
+ "size": 16}))
+
+ def test_fullname(self):
+ """
+ Replace short names with fullnames, using applicable namespaces to do
so. Then eliminate namespace attributes, which are now redundant.
+ """
+ s = avro.schema.parse(json.dumps({
+ "namespace": "avro",
+ "name": "example",
+ "type": "enum",
+ "symbols": ["a", "b"]}))
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "name": "avro.example",
+ "type": "enum",
+ "symbols": ["a", "b"]}))
+
+ def test_strip(self):
+ """
+ Keep only attributes that are relevant to parsing data, which are:
type, name, fields, symbols, items, values,
+ size. Strip all others (e.g., doc and aliases).
+ """
+ s = avro.schema.parse(json.dumps({
+ "name": "foo",
+ "type": "enum",
+ "doc": "test",
+ "aliases": ["bar"],
+ "symbols": ["a", "b"]}))
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "name": "foo",
+ "type": "enum",
+ "symbols": ["a", "b"]}))
+
+ def test_order(self):
+ """
+ Order the appearance of fields of JSON objects as follows: name, type,
fields, symbols, items, values, size.
+ For example, if an object has type, name, and size fields, then the
name field should appear first, followed
+ by the type and then the size fields.
+ """
+ s = avro.schema.parse(json.dumps({
+ "symbols": ["a", "b"],
+ "type": "enum",
+ "name": "example"}))
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "name": "example",
+ "type": "enum",
+ "symbols": ["a", "b"]}))
+
+ def test_whitespace(self):
+ """
+ Eliminate all whitespace in JSON outside of string literals.
+ """
+ s = avro.schema.parse(
+ '''{"type": "fixed",
+ "size": 16,
+ "name": "md5"}
+ ''')
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "name": "md5",
+ "type": "fixed",
+ "size": 16}))
+
+ def test_record_field(self):
+ """
+ Ensure that record fields produce the correct parsing canonical form.
+ """
+ s = avro.schema.parse(json.dumps({
+ "type": "record",
+ "name": "Test",
+ "doc": "This is a test schema",
+ "aliases": ["also", "known", "as"],
+ "fields": [
+ {
+ "type": {
+ "symbols": ["one", "two"],
+ "type": "enum",
+ "name": "NamedEnum"},
+ "name": "thenamedenum",
+ "doc": "This is a named enum"
+ },
+ {
+ "type": ["null", "NamedEnum"],
+ "name": "unionwithreftoenum"
+ }
+ ]
+ }))
+ expected = self.compact_json_string({
+ "name": "Test",
+ "type": "record",
+ "fields": [
+ {
+ "name": "thenamedenum",
+ "type": {
+ "name": "NamedEnum",
+ "type": "enum",
+ "symbols": ["one", "two"]
+ }
+ },
+ {
+ "name": "unionwithreftoenum",
+ "type": ["null", "NamedEnum"]
+ }
+ ]
+ })
+ self.assertEqual(s.canonical_form, expected)
+
+ def test_array(self):
+ """
+ Ensure that array schema produce the correct parsing canonical form.
+ """
+ s = avro.schema.parse(json.dumps({
+ "items": "long",
+ "type": "array"}))
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "type": "array",
+ "items": "long"}))
+
+ def test_map(self):
+ """
+ Ensure that map schema produce the correct parsing canonical form.
+ """
+ s = avro.schema.parse(json.dumps({
+ "values": "long",
+ "type": "map"}))
+ self.assertEqual(
+ s.canonical_form,
+ self.compact_json_string({
+ "type": "map",
+ "values": "long"}))
+
+ def test_union(self):
+ """
+ Ensure that a union schema produces the correct parsing canonical form.
+ """
+ s = avro.schema.parse(json.dumps(["string", "null", "long"]))
+ self.assertEqual(
+ s.canonical_form,
+ '["string","null","long"]')
+
+ def test_large_record_handshake_request(self):
+ s = avro.schema.parse("""
+ {
+ "type": "record",
+ "name": "HandshakeRequest",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [
+ {
+ "name": "clientHash",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}
+ },
+ {"name": "clientProtocol", "type": ["null", "string"]},
+ {"name": "serverHash", "type": "MD5"},
+ {
+ "name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]
+ }
+ ]
+ }
+ """)
+ self.assertEqual(
+ s.canonical_form,
+ ('{"name":"org.apache.avro.ipc.HandshakeRequest","type":"record",'
+
'"fields":[{"name":"clientHash","type":{"name":"org.apache.avro.ipc.MD5",'
+
'"type":"fixed","size":16}},{"name":"clientProtocol","type":["null","string"]},'
+
'{"name":"serverHash","type":{"name":"org.apache.avro.ipc.MD5","type":"fixed","size":16}},'
+
'{"name":"meta","type":["null",{"type":"map","values":"bytes"}]}]}'))
+
+ def test_large_record_handshake_response(self):
+ s = avro.schema.parse("""
+ {
+ "type": "record",
+ "name": "HandshakeResponse",
+ "namespace": "org.apache.avro.ipc",
+ "fields": [
+ {
+ "name": "match",
+ "type": {
+ "type": "enum",
+ "name": "HandshakeMatch",
+ "symbols": ["BOTH", "CLIENT", "NONE"]
+ }
+ },
+ {"name": "serverProtocol", "type": ["null", "string"]},
+ {
+ "name": "serverHash",
+ "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]
+ },
+ {
+ "name": "meta",
+ "type": ["null", {"type": "map", "values": "bytes"}]}]
+ }
+ """)
+ self.assertEqual(
+ s.canonical_form,
+ ('{"name":"org.apache.avro.ipc.HandshakeResponse","type":"rec'
+ 'ord","fields":[{"name":"match","type":{"name":"org.apache.a'
+ 'vro.ipc.HandshakeMatch","type":"enum","symbols":["BOTH","CL'
+ 'IENT","NONE"]}},{"name":"serverProtocol","type":["null","st'
+ 'ring"]},{"name":"serverHash","type":["null",{"name":"org.ap'
+ 'ache.avro.ipc.MD5","type":"fixed","size":16}]},{"name":"met'
+ 'a","type":["null",{"type":"map","values":"bytes"}]}]}'))
+
+ def test_large_record_interop(self):
+ s = avro.schema.parse("""
+ {
+ "type": "record",
+ "name": "Interop",
+ "namespace": "org.apache.avro",
+ "fields": [
+ {"name": "intField", "type": "int"},
+ {"name": "longField", "type": "long"},
+ {"name": "stringField", "type": "string"},
+ {"name": "boolField", "type": "boolean"},
+ {"name": "floatField", "type": "float"},
+ {"name": "doubleField", "type": "double"},
+ {"name": "bytesField", "type": "bytes"},
+ {"name": "nullField", "type": "null"},
+ {"name": "arrayField", "type": {"type": "array", "items":
"double"}},
+ {
+ "name": "mapField",
+ "type": {
+ "type": "map",
+ "values": {"name": "Foo",
+ "type": "record",
+ "fields": [{"name": "label", "type": "string"}]}
+ }
+ },
+ {
+ "name": "unionField",
+ "type": ["boolean", "double", {"type": "array", "items":
"bytes"}]
+ },
+ {
+ "name": "enumField",
+ "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B",
"C"]}
+ },
+ {
+ "name": "fixedField",
+ "type": {"type": "fixed", "name": "MD5", "size": 16}
+ },
+ {
+ "name": "recordField",
+ "type": {"type": "record",
+ "name": "Node",
+ "fields": [{"name": "label", "type": "string"},
+ {"name": "children",
+ "type": {"type": "array",
+ "items": "Node"}}]}
+ }
+ ]
+ }
+ """)
+ self.assertEqual(
+ s.canonical_form,
+ ('{"name":"org.apache.avro.Interop","type":"record","fields":[{"na'
+ 'me":"intField","type":"int"},{"name":"longField","type":"long"},'
+ '{"name":"stringField","type":"string"},{"name":"boolField","type'
+ '":"boolean"},{"name":"floatField","type":"float"},{"name":"doubl'
+ 'eField","type":"double"},{"name":"bytesField","type":"bytes"},{"'
+ 'name":"nullField","type":"null"},{"name":"arrayField","type":{"t'
+ 'ype":"array","items":"double"}},{"name":"mapField","type":{"type'
+ '":"map","values":{"name":"org.apache.avro.Foo","type":"record","'
+ 'fields":[{"name":"label","type":"string"}]}}},{"name":"unionFiel'
+ 'd","type":["boolean","double",{"type":"array","items":"bytes"}]}'
+ ',{"name":"enumField","type":{"name":"org.apache.avro.Kind","type'
+ '":"enum","symbols":["A","B","C"]}},{"name":"fixedField","type":{'
+ '"name":"org.apache.avro.MD5","type":"fixed","size":16}},{"name":'
+ '"recordField","type":{"name":"org.apache.avro.Node","type":"reco'
+ 'rd","fields":[{"name":"label","type":"string"},{"name":"children'
+ '","type":{"type":"array","items":"org.apache.avro.Node"}}]}}]}'))
+
+
def load_tests(loader, default_tests, pattern):
"""Generate test cases across many test schema."""
suite = unittest.TestSuite()
@@ -591,6 +1009,7 @@ def load_tests(loader, default_tests, pattern):
suite.addTests(RoundTripParseTestCase(ex) for ex in VALID_EXAMPLES)
suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES)
suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES)
+ suite.addTests(loader.loadTestsFromTestCase(CanonicalFormTestCase))
return suite