bkietz commented on a change in pull request #9615:
URL: https://github.com/apache/arrow/pull/9615#discussion_r585768620
##########
File path: r/src/r_to_arrow.cpp
##########
@@ -1023,12 +1060,87 @@ std::shared_ptr<arrow::Array> vec_to_arrow(SEXP x,
options.type, options, gc_memory_pool()));
StopIfNotOk(converter->Extend(x, options.size));
+
return ValueOrStop(converter->ToArray());
}
} // namespace r
} // namespace arrow
+// [[arrow::export]]
+std::shared_ptr<arrow::Table> Table__from_dots(SEXP lst, SEXP schema_sxp) {
+ bool infer_schema = !Rf_inherits(schema_sxp, "Schema");
+
+ int num_fields;
+ StopIfNotOk(arrow::r::count_fields(lst, &num_fields));
+
+ // schema + metadata
+ std::shared_ptr<arrow::Schema> schema;
+ StopIfNotOk(arrow::r::InferSchemaFromDots(lst, schema_sxp, num_fields,
schema));
+ StopIfNotOk(arrow::r::AddMetadataFromDots(lst, num_fields, schema));
+
+ // table
+ std::vector<std::shared_ptr<arrow::ChunkedArray>> columns(num_fields);
+
+ // for now the parallel task does not work,
+ // presumably because some ->Extend() can't actually run in parallel
+ //
+ // auto parallel_tasks =
+ //
arrow::internal::TaskGroup::MakeThreaded(arrow::internal::GetCpuThreadPool());
+ auto parallel_tasks =
+ arrow::internal::TaskGroup::MakeSerial();
+
+ std::vector<std::function<arrow::Status()>> delayed_serial_tasks;
+
+ auto extract_one_column = [&](int j, SEXP x, cpp11::r_string) {
+ if (Rf_inherits(x, "ChunkedArray")) {
+ columns[j] = cpp11::as_cpp<std::shared_ptr<arrow::ChunkedArray>>(x);
+ } else if (Rf_inherits(x, "Array")) {
+ columns[j] = std::make_shared<arrow::ChunkedArray>(
+ cpp11::as_cpp<std::shared_ptr<arrow::Array>>(x));
+ } else {
+ arrow::r::RConversionOptions options;
+ options.strict = !infer_schema;
+ options.type = schema->field(j)->type();
+ options.size = vctrs::short_vec_size(x);
+
+ // maybe short circuit when zero-copy is possible
+ if (arrow::r::can_reuse_memory(x, options.type)) {
+ columns[j] = std::make_shared<arrow::ChunkedArray>(
+ arrow::r::vec_to_arrow__reuse_memory(x));
+ } else {
+ auto converter = ValueOrStop(
+ arrow::MakeConverter<arrow::r::RConverter,
arrow::r::RConverterTrait>(
+ options.type, options, gc_memory_pool()));
+
+ auto task = [&]() {
Review comment:
This reference capture of `converter` is not safe since `task` outlives
the scope of `converter`
```suggestion
auto task = [=]() {
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]