Hi,
I have implemented a function that copies host data (through wrapping it
into arrow::Array object) to the gpu device using
arrow::gpu::SerializeRecordBatch:
...
#define MY_COLUMN_SCHEMA(DTYPE) ::arrow::schema({arrow::field("data",
DTYPE)})
arrow::Status ToRecordBatch(const my_column* column,
std::shared_ptr<arrow::RecordBatch>* out) {
// zero-copy
std::shared_ptr<arrow::Array> arr;
std::shared_ptr<arrow::DataType> dtype = GetDataType(column);
ToArray(column, &arr);
*out = arrow::RecordBatch::Make(MY_COLUMN_SCHEMA(dtype), column->size,
{arr});
return arrow::Status::OK();
}
// Use it on host
arrow::Status ToDevice(const my_column *column,
std::shared_ptr<arrow::gpu::CudaBuffer> *buffer) {
constexpr int kGpuNumber = 0;
arrow::gpu::CudaDeviceManager* manager_;
std::shared_ptr<arrow::gpu::CudaContext> context_;
arrow::gpu::CudaDeviceManager::GetInstance(&manager_);
manager_->GetContext(kGpuNumber, &context_);
std::shared_ptr<arrow::RecordBatch> batch;
auto status = ToRecordBatch(column, &batch);
if (!status.ok()) return status;
return arrow::gpu::SerializeRecordBatch(*batch, context_.get(), buffer);
}
To implement the reverse of ToDevice, a schema is needed by
arrow::gpu::.ReadRecordBatch.
Is the schema is included in CudaBuffer object?
If yes, what would be the easiest way to get it?
If not, what is the recommended strategy of passing schema+data to
gpu device, and back?
Best regards,
Pearu