hu6360567 opened a new issue #11846:
URL: https://github.com/apache/arrow/issues/11846
The following code works fine, when it is compiled in cmake debug mode.
But crashes when it is compiled in cmake release mode.
As far as what I found, the underlying array data is released after
`import_array` consumed.
environment:
MacOS 10.15, Apple clang version 12.0.0, rustc 1.56.0
```cpp
#include <arrow/api.h>
#include <arrow/c/abi.h>
#include <arrow/c/bridge.h>
#include <arrow/c/helpers.h>
#include <iostream>
// rust ffi method
/*
#[no_mangle]
pub extern "C" fn import_array(content: *const FFI_ArrowArray, schema:
*const FFI_ArrowSchema) {
let array = match unsafe { ArrowArray::try_from_raw(content, schema) } {
Ok(arr) => { arr },
Err(e) => {
eprintln!("{}", e);
return;
}
};
let array = StructArray::from(array.to_data().unwrap());
let array = RecordBatch::from(&array);
println!("{:?}", array);
#[test]
fn test_import() {
let col1 = StringArray::from(vec!["a", "b", "c"]);
let col2 = Int8Array::from(vec![1, 2, 3]);
let rb_schema = Schema::new(
vec![
Field::new("col1", col1.data_type().clone(), false),
Field::new("col2", col2.data_type().clone(), false),
]
);
let rb = match RecordBatch::try_new(Arc::new(rb_schema),
vec![Arc::new(col1), Arc::new(col2)]) {
Ok(rb) => rb,
Err(e) => {
eprintln!("{}", e);
return;
}
};
let (content, schema) = StructArray::from(rb).to_raw().unwrap();
import_array(content, schema);
}
}
*/
extern "C" void import_array(const ArrowArray *content, const ArrowSchema
*schema);
#define DEBUG_C_INTERFACE(msg, content_ptr, schema_ptr) \
do {\
printf(msg ": ArrayIsRelease[%s], SchemaIsRelease[%s]\n", \
ArrowArrayIsReleased(content_ptr) ? "TRUE" : "FALSE", \
ArrowSchemaIsReleased(schema_ptr) ? "TRUE" : "FALSE"); \
} while(0)
std::shared_ptr<arrow::RecordBatch> generateRB() {
auto key_builder = arrow::StringBuilder();
auto value_builder = arrow::StringBuilder();
key_builder.Append("key0");
value_builder.Append("value0");
auto key_array = *key_builder.Finish();
auto value_array = *value_builder.Finish();
auto schema = arrow::schema({
arrow::field("key",
key_array->type(), false),
arrow::field("value",
value_array->type(), false)
});
return arrow::RecordBatch::Make(schema, 1, {key_array, value_array});
}
void test1() {
auto property = generateRB();
ArrowArray array{};
ArrowSchema schema{};
DEBUG_C_INTERFACE("TEST1 Before Export", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
arrow::ExportRecordBatch(*property, &array, &schema);
DEBUG_C_INTERFACE("TEST1 After Export", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
DEBUG_C_INTERFACE("TEST1 Before Import", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
auto arrow_array = *arrow::ImportArray(&array, &schema);
DEBUG_C_INTERFACE("TEST1 After Import", &array, &schema);
std::cout << arrow_array->ToString() << std::endl;
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
}
void test2() {
auto property = generateRB();
ArrowArray array{};
ArrowSchema schema{};
DEBUG_C_INTERFACE("TEST2 Before Export", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
arrow::ExportRecordBatch(*property, &array, &schema);
DEBUG_C_INTERFACE("TEST2 After Export", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
DEBUG_C_INTERFACE("TEST2 Before Import", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
import_array(&array, &schema);
DEBUG_C_INTERFACE("TEST2 After Import", &array, &schema);
std::cout << property->column_data(0).use_count() << "," <<
property->column_data(1).use_count() << std::endl;
}
int main() {
std::cout << "TEST1" << std::endl;
test1();
std::cout << "TEST2" << std::endl;
test2();
}
```
OUTPUT
```
TEST1
TEST1 Before Export: ArrayIsRelease[TRUE], SchemaIsRelease[TRUE]
3,3
TEST1 After Export: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
5,5
TEST1 Before Import: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
5,5
TEST1 After Import: ArrayIsRelease[TRUE], SchemaIsRelease[TRUE]
-- is_valid: all not null
-- child 0 type: string
[
"key0"
]
-- child 1 type: string
[
"value0"
]
5,5
TEST2
TEST2 Before Export: ArrayIsRelease[TRUE], SchemaIsRelease[TRUE]
3,3
TEST2 After Export: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
5,5
TEST2 Before Import: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
5,5
RecordBatch { schema: Schema { fields: [Field { name: "key", data_type:
Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: None },
Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0,
dict_is_ordered: false, metadata: None }], metadata: {} }, columns: [StringArray
[
"key0",
], StringArray
[
"value0",
]] }
TEST2 After Import: ArrayIsRelease[FALSE], SchemaIsRelease[FALSE]
Process finished with exit code 139 (interrupted by signal 11: SIGSEGV)
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]