Hi,
I'm writing a batch of records to a stream and I want to read them later. I
notice that if I use the RecordBatchStreamWriter class to write them and
then ReadRecordBatch function to read them, I get a Segmentation Fault.
On the other hand, if I use the RecordBatchFileWriter class to write them,
the reading works fine.
So, is the arrow::ipc::ReadRecordBatch function intended to only work if
the records were written by RecordBatchFileWriter?
Below is a complete example, showing the two cases. I tried this on Ubuntu
Trusty with Arrow 0.9.0-1
Thanks!
Rares
// g++-4.9 -ggdb -std=c++11 foo.cpp -larrow
#include <arrow/builder.h>
#include <arrow/io/memory.h>
#include <arrow/ipc/reader.h>
#include <arrow/ipc/writer.h>
#include <arrow/memory_pool.h>
#include <arrow/record_batch.h>
#include <arrow/type.h>
int main()
{
arrow::MemoryPool* pool = arrow::default_memory_pool();
std::shared_ptr<arrow::PoolBuffer> buffer(new arrow::PoolBuffer(pool));
arrow::Int64Builder builder(pool);
builder.Append(1);
std::shared_ptr<arrow::Array> array;
builder.Finish(&array);
std::vector<std::shared_ptr<arrow::Field>> schema_vector =
{arrow::field("id", arrow::int64())};
auto schema = std::make_shared<arrow::Schema>(schema_vector);
// Write
std::shared_ptr<arrow::RecordBatch> batchOut;
batchOut = arrow::RecordBatch::Make(schema, 10, {array});
std::unique_ptr<arrow::io::BufferOutputStream> stream;
stream.reset(new arrow::io::BufferOutputStream(buffer));
std::shared_ptr<arrow::ipc::RecordBatchWriter> writer;
// #1 - Segmentation fault (core dumped)
arrow::ipc::RecordBatchStreamWriter::Open(stream.get(), schema,
&writer);
// #2 - OK
//arrow::ipc::RecordBatchStreamWriter::Open(stream.get(), schema,
&writer);
writer->WriteRecordBatch(*batchOut);
writer->Close();
stream->Close();
// Read
arrow::io::BufferReader reader(buffer);
std::shared_ptr<arrow::RecordBatch> batchIn;
arrow::ipc::ReadRecordBatch(schema, &reader, &batchIn);
}