This is an automated email from the ASF dual-hosted git repository. penghui pushed a commit to branch branch-2.8 in repository https://gitbox.apache.org/repos/asf/pulsar.git
commit 9ec4f909625036ea7bb67eac9fcda620418cd027 Author: Matteo Merli <[email protected]> AuthorDate: Wed Sep 29 04:16:08 2021 -0600 [Python] Do not sort schema fields by default (#12232) ### Motivation In Avro schema, the order of fields is used in the validation process, so if we are sorting the fields, that will generate an unexpected schema for a python producer/consumer and it will make it not interoperable with Java and other clients. (cherry picked from commit 2f3ad4d369e8a2ae558c6f9ee85f0b407e5e78b2) --- .../python/pulsar/schema/definition.py | 19 ++++++++--- pulsar-client-cpp/python/schema_test.py | 38 ++++++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/pulsar-client-cpp/python/pulsar/schema/definition.py b/pulsar-client-cpp/python/pulsar/schema/definition.py index 9335176..fd778f3 100644 --- a/pulsar-client-cpp/python/pulsar/schema/definition.py +++ b/pulsar-client-cpp/python/pulsar/schema/definition.py @@ -60,6 +60,9 @@ class Record(with_metaclass(RecordMeta, object)): # This field is used to set namespace for Avro Record schema. _avro_namespace = None + # Generate a schema where fields are sorted alphabetically + _sorted_fields = False + def __init__(self, default=None, required_default=False, required=False, *args, **kwargs): self._required_default = required_default self._default = default @@ -114,20 +117,26 @@ class Record(with_metaclass(RecordMeta, object)): defined_names.add(namespace_name) - schema = {'name': str(cls.__name__)} + schema = { + 'type': 'record', + 'name': str(cls.__name__) + } if cls._avro_namespace is not None: schema['namespace'] = cls._avro_namespace - schema['type'] = 'record' schema['fields'] = [] - for name in sorted(cls._fields.keys()): + if cls._sorted_fields: + fields = sorted(cls._fields.keys()) + else: + fields = cls._fields.keys() + for name in fields: field = cls._fields[name] field_type = field.schema_info(defined_names) \ if field._required else ['null', field.schema_info(defined_names)] schema['fields'].append({ 'name': name, - 'type': field_type, - 'default': field.default() + 'default': field.default(), + 'type': field_type }) if field.required_default() else schema['fields'].append({ 'name': name, 'type': field_type, diff --git a/pulsar-client-cpp/python/schema_test.py b/pulsar-client-cpp/python/schema_test.py index 40497ad..7adbcbe 100755 --- a/pulsar-client-cpp/python/schema_test.py +++ b/pulsar-client-cpp/python/schema_test.py @@ -38,6 +38,7 @@ class SchemaTest(TestCase): blue = 3 class Example(Record): + _sorted_fields = True a = String() b = Integer() c = Array(String()) @@ -78,11 +79,13 @@ class SchemaTest(TestCase): def test_complex(self): class MySubRecord(Record): + _sorted_fields = True x = Integer() y = Long() z = String() class Example(Record): + _sorted_fields = True a = String() sub = MySubRecord # Test with class sub2 = MySubRecord() # Test with instance @@ -348,6 +351,34 @@ class SchemaTest(TestCase): self.assertEqual(r2.__class__.__name__, 'Example') self.assertEqual(r2, r) + def test_non_sorted_fields(self): + class T1(Record): + a = Integer() + b = Integer() + c = Double() + d = String() + + class T2(Record): + b = Integer() + a = Integer() + d = String() + c = Double() + + self.assertNotEqual(T1.schema()['fields'], T2.schema()['fields']) + + def test_sorted_fields(self): + class T1(Record): + _sorted_fields = True + a = Integer() + b = Integer() + + class T2(Record): + _sorted_fields = True + b = Integer() + a = Integer() + + self.assertEqual(T1.schema()['fields'], T2.schema()['fields']) + def test_schema_version(self): class Example(Record): a = Integer() @@ -691,6 +722,7 @@ class SchemaTest(TestCase): def test_avro_required_default(self): class MySubRecord(Record): + _sorted_fields = True x = Integer() y = Long() z = String() @@ -707,7 +739,9 @@ class SchemaTest(TestCase): i = Map(String()) j = MySubRecord() + class ExampleRequiredDefault(Record): + _sorted_fields = True a = Integer(required_default=True) b = Boolean(required=True, required_default=True) c = Long(required_default=True) @@ -879,10 +913,12 @@ class SchemaTest(TestCase): def test_serialize_schema_complex(self): class NestedObj1(Record): + _sorted_fields = True na1 = String() nb1 = Double() class NestedObj2(Record): + _sorted_fields = True na2 = Integer() nb2 = Boolean() nc2 = NestedObj1() @@ -892,6 +928,7 @@ class SchemaTest(TestCase): class NestedObj4(Record): _avro_namespace = 'xxx4' + _sorted_fields = True na4 = String() nb4 = Integer() @@ -902,6 +939,7 @@ class SchemaTest(TestCase): class ComplexRecord(Record): _avro_namespace = 'xxx.xxx' + _sorted_fields = True a = Integer() b = Integer() color = Color
