Sure, here is briefly what I'm doing: bool append = false; std::shared_ptr<arrow::io::OutputStream> arrowStream; auto arrowResult = arrow::io::FileOutputStream::Open(fileName, append); arrowStream = arrowResult.ValueOrDie();
std::shared_ptr<arrow::ipc::RecordBatchWriter> arrowWriter; std::shared_ptr<arrow::RecordBatch> arrowBatch; std::shared_ptr<arrow::RecordBatchReader> arrowReader; std::shared_ptr<arrow::Schema> arrowSchema = attributes2ArrowSchema( inputSchema, settings.isAttsOnly()); ARROW_RETURN_NOT_OK( arrow::ipc::RecordBatchStreamWriter::Open( arrowStream.get(), arrowSchema, &arrowWriter)); // Setup "arrowReader" using BufferReader and RecordBatchStreamReader ARROW_RETURN_NOT_OK(arrowReader->ReadNext(&arrowBatch)); ARROW_RETURN_NOT_OK( arrowWriter->WriteRecordBatch(*arrowBatch)); ARROW_RETURN_NOT_OK(arrowWriter->Close()); ARROW_RETURN_NOT_OK(arrowStream->Close()); On Mon, Jun 15, 2020 at 6:26 AM Wes McKinney <wesmck...@gmail.com> wrote: > Can you show the code you are writing? The first thing the stream writer > does before writing any record batch is write the schema. It sounds like > you are using arrow::ipc::WriteRecordBatch somewhere. > > On Sun, Jun 14, 2020, 11:44 PM Rares Vernica <rvern...@gmail.com> wrote: > > > Hello, > > > > I have a RecordBatch that I would like to write to a file. I'm using > > FileOutputStream::Open to open the file and RecordBatchStreamWriter::Open > > to open the stream. I write a record batch with WriteRecordBatch. > Finally, > > I close the RecordBatchWriter and OutputStream. > > > > The resulting file size is exactly the size of the Buffer used to store > the > > RecordBatch. It looks like it is missing the schema. When I try to open > the > > resulting file from PyArrow I get: > > > > >>> pa.ipc.open_file('/tmp/1') > > pyarrow.lib.ArrowInvalid: File is too small: 6 > > > > $ ll /tmp/1 > > -rw-r--r--. 1 root root 720 Jun 15 03:54 /tmp/1 > > > > How can I write the schema as well? > > > > I was browsing the documentation at > > https://arrow.apache.org/docs/cpp/index.html but I can't locate any C++ > > documentation about RecordBatchStreamWriter or RecordBatchWriter. Is this > > intentional? > > > > Thank you! > > Rares > > >