[ 
https://issues.apache.org/jira/browse/ARROW-7522?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Neal Richardson updated ARROW-7522:
-----------------------------------
    Summary: [C++][Plasma] Broken Record Batch returned from a function call  
(was: Broken Record Batch returned from a function call)

> [C++][Plasma] Broken Record Batch returned from a function call
> ---------------------------------------------------------------
>
>                 Key: ARROW-7522
>                 URL: https://issues.apache.org/jira/browse/ARROW-7522
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++, C++ - Plasma
>    Affects Versions: 0.15.1
>         Environment: macOS
>            Reporter: Chengxin Ma
>            Priority: Minor
>
> Scenario: retrieving Record Batch from Plasma with known Object ID.
> The following code snippet works well:
> {code:java}
> int main(int argc, char **argv)
> {
>     plasma::ObjectID object_id = 
> plasma::ObjectID::from_binary("0FF1CE00C0FFEE00BEEF");
>     // Start up and connect a Plasma client.
>     plasma::PlasmaClient client;
>     ARROW_CHECK_OK(client.Connect("/tmp/store"));
>     plasma::ObjectBuffer object_buffer;
>     ARROW_CHECK_OK(client.Get(&object_id, 1, -1, &object_buffer));
>     // Retrieve object data.
>     auto buffer = object_buffer.data;
>     arrow::io::BufferReader buffer_reader(buffer); 
>     std::shared_ptr<arrow::ipc::RecordBatchReader> record_batch_stream_reader;
>     ARROW_CHECK_OK(arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader, 
> &record_batch_stream_reader));
>     std::shared_ptr<arrow::RecordBatch> record_batch;
>     arrow::Status status = 
> record_batch_stream_reader->ReadNext(&record_batch);
>     std::cout << "record_batch->column_name(0): " << 
> record_batch->column_name(0) << std::endl;
>     std::cout << "record_batch->num_columns(): " << 
> record_batch->num_columns() << std::endl;
>     std::cout << "record_batch->num_rows(): " << record_batch->num_rows() << 
> std::endl;
>     std::cout << "record_batch->column(0)->length(): "
>               << record_batch->column(0)->length() << std::endl;
>     std::cout << "record_batch->column(0)->ToString(): "
>               << record_batch->column(0)->ToString() << std::endl;
> }
> {code}
> {{record_batch->column(0)->ToString()}} would incur a segmentation fault if 
> retrieving Record Batch is wrapped in a function:
> {code:java}
> std::shared_ptr<arrow::RecordBatch> GetRecordBatchFromPlasma(plasma::ObjectID 
> object_id)
> {
>     // Start up and connect a Plasma client.
>     plasma::PlasmaClient client;
>     ARROW_CHECK_OK(client.Connect("/tmp/store"));
>     plasma::ObjectBuffer object_buffer;
>     ARROW_CHECK_OK(client.Get(&object_id, 1, -1, &object_buffer));
>     // Retrieve object data.
>     auto buffer = object_buffer.data;
>     arrow::io::BufferReader buffer_reader(buffer);
>     std::shared_ptr<arrow::ipc::RecordBatchReader> record_batch_stream_reader;
>     ARROW_CHECK_OK(arrow::ipc::RecordBatchStreamReader::Open(&buffer_reader, 
> &record_batch_stream_reader));
>     std::shared_ptr<arrow::RecordBatch> record_batch;
>     arrow::Status status = 
> record_batch_stream_reader->ReadNext(&record_batch);
>     // Disconnect the client.
>     ARROW_CHECK_OK(client.Disconnect());
>     return record_batch;
> }
> int main(int argc, char **argv)
> {
>     plasma::ObjectID object_id = 
> plasma::ObjectID::from_binary("0FF1CE00C0FFEE00BEEF");
>     std::shared_ptr<arrow::RecordBatch> record_batch = 
> GetRecordBatchFromPlasma(object_id);
>     std::cout << "record_batch->column_name(0): " << 
> record_batch->column_name(0) << std::endl;
>     std::cout << "record_batch->num_columns(): " << 
> record_batch->num_columns() << std::endl;
>     std::cout << "record_batch->num_rows(): " << record_batch->num_rows() << 
> std::endl;
>     std::cout << "record_batch->column(0)->length(): "
>               << record_batch->column(0)->length() << std::endl;
>     std::cout << "record_batch->column(0)->ToString(): "
>               << record_batch->column(0)->ToString() << std::endl;
> }
> {code}
> The meta info of the Record Batch such as number of columns and rows is still 
> available, but I can't see the content of the columns.
> {{lldb}} says that the stop reason is {{EXC_BAD_ACCESS}}, so I think the 
> Record Batch is destroyed after {{GetRecordBatchFromPlasma}} finishes. But 
> why can I still see the meta info of this Record Batch?
>  What is the proper way to get the Record Batch if we insist using a function?



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to