AlenkaF commented on code in PR #40064:
URL: https://github.com/apache/arrow/pull/40064#discussion_r1508491813


##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,97 @@ Result<std::shared_ptr<StructArray>> 
RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  auto* out_values = reinterpret_cast<CType*>(out);
+
+  // Loop through all of the columns
+  for (int i = 0; i < batch.num_columns(); ++i) {
+    const auto* in_values = batch.column(i)->data()->GetValues<CType>(1);
+
+    // Copy data of each column
+    memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+    out_values += batch.num_rows();
+  }  // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor(MemoryPool* pool) const {
+  if (num_columns() == 0) {
+    return Status::TypeError(
+        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "supported.");
+  }
+  const auto& type = column(0)->type();
+  // Check for supported data types
+  if (!is_integer(type->id()) && !is_floating(type->id())) {
+    return Status::TypeError("DataType is not supported: ", type->ToString());
+  }
+  // Check for uniform data type
+  // Check for no validity bitmap of each field
+  for (int i = 0; i < num_columns(); ++i) {
+    if (column(i)->null_count() > 0) {
+      return Status::TypeError("Can only convert a RecordBatch with no 
nulls.");
+    }
+    if (column(i)->type() != type) {

Review Comment:
   I will work on this in a separate issue, see 
https://github.com/apache/arrow/issues/40060.
   There is already a working branch, with a diff (compared to this PR's 
branch) available here: 
https://github.com/apache/arrow/commit/9090b5ca5e7ead83ba0d4145c48189b53b34c70b
   
   There are two separate code paths in the proposed change I linked here, one 
for uniform data types and separate for mixed which includes casting.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to