[
https://issues.apache.org/jira/browse/AVRO-2226?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16703745#comment-16703745
]
ASF GitHub Bot commented on AVRO-2226:
--------------------------------------
dkulp closed pull request #331: [AVRO-2226] Fixes UnionSchema specificity
URL: https://github.com/apache/avro/pull/331
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
index b944904ee..7b56dff0f 100644
--- a/lang/py3/avro/io.py
+++ b/lang/py3/avro/io.py
@@ -135,9 +135,17 @@ def Validate(expected_schema, datum):
return any(Validate(union_branch, datum)
for union_branch in expected_schema.schemas)
elif schema_type in ['record', 'error', 'request']:
- return (isinstance(datum, dict)
- and all(Validate(field.type, datum.get(field.name))
- for field in expected_schema.fields))
+ if not isinstance(datum, dict):
+ return False
+ expected_schema_field_names = set()
+ for field in expected_schema.fields:
+ expected_schema_field_names.add(field.name)
+ if not Validate(field.type, datum.get(field.name)):
+ return False
+ for datum_field in datum.keys():
+ if datum_field not in expected_schema_field_names:
+ return False
+ return True
else:
raise AvroTypeException('Unknown Avro schema type: %r' % schema_type)
diff --git a/lang/py3/avro/tests/test_io.py b/lang/py3/avro/tests/test_io.py
index 8349ce5c7..3d5868384 100644
--- a/lang/py3/avro/tests/test_io.py
+++ b/lang/py3/avro/tests/test_io.py
@@ -346,6 +346,30 @@ def testTypeException(self):
self.assertRaises(
avro_io.AvroTypeException, write_datum, datum_to_write, writer_schema)
+ def testUnionSchemaSpecificity(self):
+ union_schema = schema.Parse("""
+ [{
+ "type" : "record",
+ "name" : "A",
+ "fields" : [{"name" : "foo", "type" : ["string", "null"]}]
+ },
+ {
+ "type" : "record",
+ "name" : "B",
+ "fields" : [{"name" : "bar", "type" : ["string", "null"]}]
+ },
+ {
+ "type" : "record",
+ "name" : "AOrB",
+ "fields" : [{"name" : "entity", "type" : ["A", "B"]}]
+ }]
+ """)
+ sch = {s.name: s for s in union_schema.schemas}.get('AOrB')
+ datum_to_read = {'entity': {'foo': 'this is an instance of schema A'}}
+ writer, encoder, datum_writer = write_datum(datum_to_read, sch)
+ datum_read = read_datum(writer, sch, sch)
+ self.assertEqual(datum_to_read, datum_read)
+
if __name__ == '__main__':
raise Exception('Use run_tests.py')
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> UnionSchema deduction is too permissive
> ---------------------------------------
>
> Key: AVRO-2226
> URL: https://issues.apache.org/jira/browse/AVRO-2226
> Project: Apache Avro
> Issue Type: Bug
> Components: python
> Affects Versions: 1.8.2
> Reporter: Andrew Kelleher
> Priority: Major
> Attachments: AVRO-2226.patch
>
> Original Estimate: 24h
> Remaining Estimate: 24h
>
> When given a schema of the form
> {code:java}
> {
> "type" : "record",
> "name" : "A",
> "namespace" : "com.example",
> "fields" : [
> {
> "name" : "foo",
> "type" : ["string", "null"]
> }
> ]
> }
> {
> "type" : "record",
> "name" : "B",
> "namespace" : "com.example",
> "fields" : [
> {
> "name" : "bar",
> "type" : ["string", "null"]
> }
> ]
> }
> {
> "type" : "record",
> "name" : "AOrB",
> "namespace" : "com.example",
> "fields" : [
> {
> "name" : "entity",
> "type" : [
> "com.example.A",
> "com.example.B"
> ]
> }
> ]
> }
> {code}
> And a datum of the form
> {code}
> {'entity': {'foo': 'this is an instance of schema A'}}{code}
> Converting to a message, and then from a message chooses the incorrect
> `entity` schema:
> {code}
> {'entity': {'bar': None}}{code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)