File created with c++ can not be read with Python

alopez Mon, 07 Oct 2019 00:40:50 -0700

Dear all,

I am new in this world, so apologize if it is a trivial question.


I have created a serialized Avro file using C++. When I try to read the file
using python I get an error. I am able to read the file in C++ correctly.
The details are:

 SCHEMA 


[
{
   "type" : "record",
   "namespace" : "Results",
   "name" : "Statistics",
   "fields" : [
      { "name" : "pcketsGenerated" , "type" : "int" },
      { "name" : "avdDelay" , "type" : "double" }
   ]
},
{
   "type" : "record",
   "namespace" : "Results",
   "name" : "PathStatistics",
   "fields" : [
      { "name" : "src" , "type" : "int" },
      { "name" : "dst" , "type" : "int" },
      { "name" : "statistics" , "type" : "Results.Statistics"},
      { "name" : "flowStatVec" , "type" : {"type":"array", "items":
"Results.Statistics"}}
   ]
},
{
    "type" : "record",
    "namespace" : "Results",
    "name" : "SimResults",
    "fields" : [
        { "name" : "NetSize" , "type" : "int" },
        { "name" : "ItResVec" , "type" : {
            "type": "array",
            "items" : {
                "type" : "record",
                "namespace":"Results",
                "name" : "itResults",
                "fields" : [
                    {"name" : "PathStatVec", "type" : {
                          "type": "array",
                          "items": "Results.PathStatistics"}}
                ]
            }
        }}
   ]
}
]

 ERROR


{'NetSize': 2, 'ItResVec': [{'PathStatVec': []}, {'PathStatVec': []},
{'PathStatVec': []}, {'PathStatVec': []}]}
-----------------------
{'pcketsGenerated': 0, 'avdDelay': 1.67723666e-316}
-----------------------
{'pcketsGenerated': 0, 'avdDelay': 1.37603e-318}
-----------------------
{'pcketsGenerated': 0, 'avdDelay': 7.905e-321}
-----------------------
Traceback (most recent call last):
  File "./process.py", line 12, in <module>
    for steps in reader:
  File "/usr/lib/python3/dist-packages/avro/datafile.py", line 526, in
__next__
    datum = self.datum_reader.read(self.datum_decoder)
  File "/usr/lib/python3/dist-packages/avro/io.py", line 481, in read
    return self.read_data(self.writer_schema, self.reader_schema, decoder)
  File "/usr/lib/python3/dist-packages/avro/io.py", line 524, in read_data
    return self.read_union(writer_schema, reader_schema, decoder)
  File "/usr/lib/python3/dist-packages/avro/io.py", line 686, in read_union
    raise SchemaResolutionException(fail_msg, writer_schema, reader_schema)
avro.io.SchemaResolutionException: Can't access branch index 4 for union
with 3 branches
Writer's Schema: [
  {
    "type": "record",
    "name": "Statistics",
    "namespace": "Results",
    "fields": [
      {
        "type": "int",
        "name": "pcketsGenerated"
      },
      {
        "type": "double",
        "name": "avdDelay"
      }
    ]
  },
  {
    "type": "record",
    "name": "PathStatistics",
    "namespace": "Results",
    "fields": [
      {
        "type": "int",
        "name": "src"
      },
      {
        "type": "int",
        "name": "dst"
      },
      {
        "type": "Results.Statistics",
        "name": "statistics"
      },
      {
        "type": {
          "type": "array",
          "items": "Results.Statistics"
        },
        "name": "flowStatVec"
      }
    ]
  },
  {
    "type": "record",
    "name": "SimResults",
    "namespace": "Results",
    "fields": [
      {
        "type": "int",
        "name": "NetSize"
      },
      {
        "type": {
          "type": "array",
          "items": {
            "type": "record",
            "name": "itResults",
            "namespace": "Results",
            "fields": [
              {
                "type": {
                  "type": "array",
                  "items": "Results.PathStatistics"
                },
                "name": "PathStatVec"
              }
            ]
          }
        },
        "name": "ItResVec"
      }
    ]
  }
]
Reader's Schema: [
  {
    "type": "record",
    "name": "Statistics",
    "namespace": "Results",
    "fields": [
      {
        "type": "int",
        "name": "pcketsGenerated"
      },
      {
        "type": "double",
        "name": "avdDelay"
      }
    ]
  },
  {
    "type": "record",
    "name": "PathStatistics",
    "namespace": "Results",
    "fields": [
      {
        "type": "int",
        "name": "src"
      },
      {
        "type": "int",
        "name": "dst"
      },
      {
        "type": "Results.Statistics",
        "name": "statistics"
      },
      {
        "type": {
          "type": "array",
          "items": "Results.Statistics"
        },
        "name": "flowStatVec"
      }
    ]
  },
  {
    "type": "record",
    "name": "SimResults",
    "namespace": "Results",
    "fields": [
      {
        "type": "int",
        "name": "NetSize"
      },
      {
        "type": {
          "type": "array",
          "items": {
            "type": "record",
            "name": "itResults",
            "namespace": "Results",
            "fields": [
              {
                "type": {
                  "type": "array",
                  "items": "Results.PathStatistics"
                },
                "name": "PathStatVec"
              }
            ]
          }
        },
        "name": "ItResVec"
      }
    ]
  }
]

 C++ code : Write and read


#include "schema.hh"
#include "avro/Encoder.hh"
#include "avro/Decoder.hh"
#include "avro/ValidSchema.hh"
#include "avro/Compiler.hh"
#include "avro/DataFile.hh"
#include <fstream>
#include <vector>


avro::ValidSchema loadSchema(const char* filename)
{
    std::ifstream ifs(filename);
    avro::ValidSchema result;
    avro::compileJsonSchema(ifs, result);
    return result;
}

int
main()
{
    avro::ValidSchema resultsSchema = loadSchema("schema.avdl");
    avro::DataFileWriter<SimResults> dfw("test.bin", resultsSchema);

    SimResults net;

    net.NetSize = 2;
    int i = 0;

    for (int it = 0; it < 2; it ++){
        itResults ir;
        for (int src = 0; src < 2 ; src ++){
            for (int dst = 0; dst < 2 ; dst ++){
                Statistics gstat;
                gstat.pcketsGenerated = i;
                gstat.avdDelay = i;
                PathStatistics pst;
                pst.src = src;
                pst.dst = dst;
                pst.statistics = gstat;
                i++;
                for (int flow = 0; flow < 3 ; flow++){
                    Statistics stFlow;
                    stFlow.pcketsGenerated = i;
                    stFlow.avdDelay = i;
                    pst.flowStatVec.push_back(stFlow);
                    i++;
                }
                ir.PathStatVec.push_back(pst);
            }
        }
        net.ItResVec.push_back(ir);
    }

    dfw.write(net);
    dfw.close();


    avro::DataFileReader<SimResults> dfr("test.bin", resultsSchema);
    SimResults net2;
    dfr.read(net2);

    std::cout << net2.NetSize << std::endl;
    for (int it = 0; it < net2.ItResVec.size(); it ++){
        itResults itr1 = net2.ItResVec[it];
        for (int src = 0; src < 2 ; src ++){
            for (int dst = 0; dst < 2 ; dst ++){
                PathStatistics pvst1 = itr1.PathStatVec[src*2 + dst];
                for (int flow = 0; flow < 3 ; flow++){
                    printf("it: %d, src: %d, dst: %d, flow: %d, stat: %d\n",
                           
it,src,dst,flow,pvst1.flowStatVec[flow].pcketsGenerated);
                }
            }
        }
    }

PYTHON code: Only read 


#!/usr/bin/python3


from avro import schema, datafile, io

schema = schema.Parse(open("schema.avdl", "r").read())

reader = datafile.DataFileReader(open("test.bin", "rb"),
io.DatumReader(reader_schema=schema))

for steps in reader:
    print (steps)
    print ("-----------------------")


Thanks for your comments



--
Sent from: http://apache-avro.679487.n3.nabble.com/Avro-Users-f679479.html

File created with c++ can not be read with Python

Reply via email to