[
https://issues.apache.org/jira/browse/AVRO-2468?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16911171#comment-16911171
]
Hudson commented on AVRO-2468:
------------------------------
SUCCESS: Integrated in Jenkins build AvroJava #725 (See
[https://builds.apache.org/job/AvroJava/725/])
AVRO-2468: Fix broken data interoperability on the Perl bindings (#582) (fokko:
[https://github.com/apache/avro/commit/02dbe25e35d5ffc644f049cf602f88c1b137ea29])
* (edit) lang/perl/build.sh
* (add) lang/perl/xt/interop.t
* (edit) build.sh
* (add) lang/perl/xt/schema.t
* (add) lang/perl/share/interop-data-generate
* (edit) lang/perl/lib/Avro/Schema.pm
> Fix broken data interoperability on the Perl bindings
> -----------------------------------------------------
>
> Key: AVRO-2468
> URL: https://issues.apache.org/jira/browse/AVRO-2468
> Project: Apache Avro
> Issue Type: Bug
> Components: interop, perl
> Reporter: Kengo Seki
> Assignee: Kengo Seki
> Priority: Critical
> Fix For: 1.10.0
>
>
> I found some data interop problems on the Perl bindings.
> 1. They fail to parse a schema if there's an array/map/union which contains
> named types with a simple (not fully-qualified) name in it. For example, they
> can't parse {{share/test/schemas/interop.avsc}} or
> {{share/schemas/org/apache/avro/data/Json.avsc}}, because they have a named
> type called "Node" or "Json" respectively in arrays/maps. This seems because
> the parser doesn't take namespace into consideration in parsing
> array/map/union.
> {code}
> $ cd lang/perl
> $ perl -Ilib -de 1
> (snip)
> DB<1> open FH, '../../share/test/schemas/interop.avsc'; local $/ = undef;
> $s = <FH>; close FH; print $s
> {"type": "record", "name":"Interop", "namespace": "org.apache.avro",
> "fields": [
> {"name": "intField", "type": "int"},
> {"name": "longField", "type": "long"},
> {"name": "stringField", "type": "string"},
> {"name": "boolField", "type": "boolean"},
> {"name": "floatField", "type": "float"},
> {"name": "doubleField", "type": "double"},
> {"name": "bytesField", "type": "bytes"},
> {"name": "nullField", "type": "null"},
> {"name": "arrayField", "type": {"type": "array", "items": "double"}},
> {"name": "mapField", "type":
> {"type": "map", "values":
> {"type": "record", "name": "Foo",
> "fields": [{"name": "label", "type": "string"}]}}},
> {"name": "unionField", "type":
> ["boolean", "double", {"type": "array", "items": "bytes"}]},
> {"name": "enumField", "type":
> {"type": "enum", "name": "Kind", "symbols": ["A","B","C"]}},
> {"name": "fixedField", "type":
> {"type": "fixed", "name": "MD5", "size": 16}},
> {"name": "recordField", "type":
> {"type": "record", "name": "Node",
> "fields": [
> {"name": "label", "type": "string"},
> {"name": "children", "type": {"type": "array", "items":
> "Node"}}]}}
> ]
> }
> DB<2> use Avro::Schema; Avro::Schema->parse($s)
> Not a primitive type Node at lib/Avro/Schema.pm line 257.
> {code}
> 2. They encode the size for a fixed type as a string rather than a number, so
> other language bindings fail to parse it.
> {code}
> $ cd lang/perl
> $ perl -Ilib -de 1
> (snip)
> DB<1> use Avro::Schema; $s = Avro::Schema->parse('{"type": "fixed", "size":
> 16, "name": "md5"}')
> DB<2> open($fh, '>/tmp/output')
> DB<3> use Avro::DataFileWriter; $w = Avro::DataFileWriter->new(fh => $fh,
> writer_schema => $s)
> DB<4> $w->print('0123456789abcdef')
> DB<5> $w->close
> {code}
> {code}
> $ ipython
> (snip)
> In [1]: from avro.datafile import DataFileReader
> In [2]: from avro.io import DatumReader
> In [3]: DataFileReader(datum_reader=DatumReader(), reader=open("/tmp/output"))
> ---------------------------------------------------------------------------
> AvroException Traceback (most recent call last)
> <ipython-input-3-13da25c7d572> in <module>()
> ----> 1 DataFileReader(datum_reader=DatumReader(), reader=open("/tmp/output"))
> /home/sekikn/repo/avro/lang/py/src/avro/datafile.pyc in __init__(self,
> reader, datum_reader)
> 255 # get ready to read
> 256 self._block_count = 0
> --> 257 self.datum_reader.writers_schema =
> schema.parse(self.get_meta(SCHEMA_KEY))
> 258
> 259 def __enter__(self):
> /home/sekikn/repo/avro/lang/py/src/avro/schema.pyc in parse(json_string)
> 984
> 985 # construct the Avro Schema object
> --> 986 return make_avsc_object(json_data, names)
> /home/sekikn/repo/avro/lang/py/src/avro/schema.pyc in
> make_avsc_object(json_data, names)
> 931 scale = 0 if json_data.get('scale') is None else
> json_data.get('scale')
> 932 return FixedDecimalSchema(size, name, precision, scale,
> namespace, names, other_props)
> --> 933 return FixedSchema(name, namespace, size, names, other_props)
> 934 elif type == 'enum':
> 935 symbols = json_data.get('symbols')
> /home/sekikn/repo/avro/lang/py/src/avro/schema.pyc in __init__(self, name,
> namespace, size, names, other_props)
> 482 if not isinstance(size, int) or size < 0:
> 483 fail_msg = 'Fixed Schema requires a valid positive integer for
> size property.'
> --> 484 raise AvroException(fail_msg)
> 485
> 486 # Call parent ctor
> AvroException: Fixed Schema requires a valid positive integer for size
> property.
> {code}
> {code}
> $ strings /tmp/output
> avro.schemaR{"size":"16","type":"fixed","name":"md5"}
> (snip)
> {code}
--
This message was sent by Atlassian Jira
(v8.3.2#803003)