Dear all,
I am new in this world, so apologize if it is a trivial question.
I have created a serialized Avro file using C++. When I try to read the file
using python I get an error. I am able to read the file in C++ correctly.
The details are:
SCHEMA
[
{
"type" : "record",
"namespace" : "Results",
"name" : "Statistics",
"fields" : [
{ "name" : "pcketsGenerated" , "type" : "int" },
{ "name" : "avdDelay" , "type" : "double" }
]
},
{
"type" : "record",
"namespace" : "Results",
"name" : "PathStatistics",
"fields" : [
{ "name" : "src" , "type" : "int" },
{ "name" : "dst" , "type" : "int" },
{ "name" : "statistics" , "type" : "Results.Statistics"},
{ "name" : "flowStatVec" , "type" : {"type":"array", "items":
"Results.Statistics"}}
]
},
{
"type" : "record",
"namespace" : "Results",
"name" : "SimResults",
"fields" : [
{ "name" : "NetSize" , "type" : "int" },
{ "name" : "ItResVec" , "type" : {
"type": "array",
"items" : {
"type" : "record",
"namespace":"Results",
"name" : "itResults",
"fields" : [
{"name" : "PathStatVec", "type" : {
"type": "array",
"items": "Results.PathStatistics"}}
]
}
}}
]
}
]
ERROR
{'NetSize': 2, 'ItResVec': [{'PathStatVec': []}, {'PathStatVec': []},
{'PathStatVec': []}, {'PathStatVec': []}]}
-----------------------
{'pcketsGenerated': 0, 'avdDelay': 1.67723666e-316}
-----------------------
{'pcketsGenerated': 0, 'avdDelay': 1.37603e-318}
-----------------------
{'pcketsGenerated': 0, 'avdDelay': 7.905e-321}
-----------------------
Traceback (most recent call last):
File "./process.py", line 12, in <module>
for steps in reader:
File "/usr/lib/python3/dist-packages/avro/datafile.py", line 526, in
__next__
datum = self.datum_reader.read(self.datum_decoder)
File "/usr/lib/python3/dist-packages/avro/io.py", line 481, in read
return self.read_data(self.writer_schema, self.reader_schema, decoder)
File "/usr/lib/python3/dist-packages/avro/io.py", line 524, in read_data
return self.read_union(writer_schema, reader_schema, decoder)
File "/usr/lib/python3/dist-packages/avro/io.py", line 686, in read_union
raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
avro.io.SchemaResolutionException: Can't access branch index 4 for union
with 3 branches
Writer's Schema: [
{
"type": "record",
"name": "Statistics",
"namespace": "Results",
"fields": [
{
"type": "int",
"name": "pcketsGenerated"
},
{
"type": "double",
"name": "avdDelay"
}
]
},
{
"type": "record",
"name": "PathStatistics",
"namespace": "Results",
"fields": [
{
"type": "int",
"name": "src"
},
{
"type": "int",
"name": "dst"
},
{
"type": "Results.Statistics",
"name": "statistics"
},
{
"type": {
"type": "array",
"items": "Results.Statistics"
},
"name": "flowStatVec"
}
]
},
{
"type": "record",
"name": "SimResults",
"namespace": "Results",
"fields": [
{
"type": "int",
"name": "NetSize"
},
{
"type": {
"type": "array",
"items": {
"type": "record",
"name": "itResults",
"namespace": "Results",
"fields": [
{
"type": {
"type": "array",
"items": "Results.PathStatistics"
},
"name": "PathStatVec"
}
]
}
},
"name": "ItResVec"
}
]
}
]
Reader's Schema: [
{
"type": "record",
"name": "Statistics",
"namespace": "Results",
"fields": [
{
"type": "int",
"name": "pcketsGenerated"
},
{
"type": "double",
"name": "avdDelay"
}
]
},
{
"type": "record",
"name": "PathStatistics",
"namespace": "Results",
"fields": [
{
"type": "int",
"name": "src"
},
{
"type": "int",
"name": "dst"
},
{
"type": "Results.Statistics",
"name": "statistics"
},
{
"type": {
"type": "array",
"items": "Results.Statistics"
},
"name": "flowStatVec"
}
]
},
{
"type": "record",
"name": "SimResults",
"namespace": "Results",
"fields": [
{
"type": "int",
"name": "NetSize"
},
{
"type": {
"type": "array",
"items": {
"type": "record",
"name": "itResults",
"namespace": "Results",
"fields": [
{
"type": {
"type": "array",
"items": "Results.PathStatistics"
},
"name": "PathStatVec"
}
]
}
},
"name": "ItResVec"
}
]
}
]
C++ code : Write and read
#include "schema.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/ValidSchema.hh"
#include "avro/Compiler.hh"
#include "avro/DataFile.hh"
#include <fstream>
#include <vector>
avro::ValidSchema loadSchema(const char* filename)
{
std::ifstream ifs(filename);
avro::ValidSchema result;
avro::compileJsonSchema(ifs, result);
return result;
}
int
main()
{
avro::ValidSchema resultsSchema = loadSchema("schema.avdl");
avro::DataFileWriter<SimResults> dfw("test.bin", resultsSchema);
SimResults net;
net.NetSize = 2;
int i = 0;
for (int it = 0; it < 2; it ++){
itResults ir;
for (int src = 0; src < 2 ; src ++){
for (int dst = 0; dst < 2 ; dst ++){
Statistics gstat;
gstat.pcketsGenerated = i;
gstat.avdDelay = i;
PathStatistics pst;
pst.src = src;
pst.dst = dst;
pst.statistics = gstat;
i++;
for (int flow = 0; flow < 3 ; flow++){
Statistics stFlow;
stFlow.pcketsGenerated = i;
stFlow.avdDelay = i;
pst.flowStatVec.push_back(stFlow);
i++;
}
ir.PathStatVec.push_back(pst);
}
}
net.ItResVec.push_back(ir);
}
dfw.write(net);
dfw.close();
avro::DataFileReader<SimResults> dfr("test.bin", resultsSchema);
SimResults net2;
dfr.read(net2);
std::cout << net2.NetSize << std::endl;
for (int it = 0; it < net2.ItResVec.size(); it ++){
itResults itr1 = net2.ItResVec[it];
for (int src = 0; src < 2 ; src ++){
for (int dst = 0; dst < 2 ; dst ++){
PathStatistics pvst1 = itr1.PathStatVec[src*2 + dst];
for (int flow = 0; flow < 3 ; flow++){
printf("it: %d, src: %d, dst: %d, flow: %d, stat: %d\n",
it,src,dst,flow,pvst1.flowStatVec[flow].pcketsGenerated);
}
}
}
}
PYTHON code: Only read
#!/usr/bin/python3
from avro import schema, datafile, io
schema = schema.Parse(open("schema.avdl", "r").read())
reader = datafile.DataFileReader(open("test.bin", "rb"),
io.DatumReader(reader_schema=schema))
for steps in reader:
print (steps)
print ("-----------------------")
Thanks for your comments
--
Sent from: http://apache-avro.679487.n3.nabble.com/Avro-Users-f679479.html