bkietz commented on code in PR #40064:
URL: https://github.com/apache/arrow/pull/40064#discussion_r1488703881


##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,111 @@ Result<std::shared_ptr<StructArray>> 
RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  auto* out_values = reinterpret_cast<CType*>(out);
+
+  // Loop through all of the columns
+  for (int i = 0; i < batch.num_columns(); ++i) {
+    const auto& arr = *batch.column(i);
+    auto data = arr.data();
+    const auto data_offset = data->offset * sizeof(CType);
+    const auto& in_values =
+        reinterpret_cast<const CType*>(data->buffers[1]->data() + data_offset);
+
+    // Copy data of each column
+    memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+    out_values += batch.num_rows();
+  }  // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor() const {
+  if (num_columns() == 0) {
+    return Status::TypeError(
+        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "supported.");
+  } else {
+    const auto type = column(0)->type();

Review Comment:
   ```suggestion
       const auto& type = column(0)->type();
   ```



##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,111 @@ Result<std::shared_ptr<StructArray>> 
RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  auto* out_values = reinterpret_cast<CType*>(out);
+
+  // Loop through all of the columns
+  for (int i = 0; i < batch.num_columns(); ++i) {
+    const auto& arr = *batch.column(i);
+    auto data = arr.data();
+    const auto data_offset = data->offset * sizeof(CType);
+    const auto& in_values =
+        reinterpret_cast<const CType*>(data->buffers[1]->data() + data_offset);
+
+    // Copy data of each column
+    memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+    out_values += batch.num_rows();
+  }  // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor() const {
+  if (num_columns() == 0) {
+    return Status::TypeError(
+        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "supported.");
+  } else {

Review Comment:
   ```suggestion
     }
   ```



##########
cpp/src/arrow/record_batch.cc:
##########
@@ -247,6 +248,111 @@ Result<std::shared_ptr<StructArray>> 
RecordBatch::ToStructArray() const {
                                        /*offset=*/0);
 }
 
+template <typename DataType>
+inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out) {
+  using CType = typename arrow::TypeTraits<DataType>::CType;
+  auto* out_values = reinterpret_cast<CType*>(out);
+
+  // Loop through all of the columns
+  for (int i = 0; i < batch.num_columns(); ++i) {
+    const auto& arr = *batch.column(i);
+    auto data = arr.data();
+    const auto data_offset = data->offset * sizeof(CType);
+    const auto& in_values =
+        reinterpret_cast<const CType*>(data->buffers[1]->data() + data_offset);
+
+    // Copy data of each column
+    memcpy(out_values, in_values, sizeof(CType) * batch.num_rows());
+    out_values += batch.num_rows();
+  }  // End loop through columns
+}
+
+Result<std::shared_ptr<Tensor>> RecordBatch::ToTensor() const {
+  if (num_columns() == 0) {
+    return Status::TypeError(
+        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "supported.");
+  } else {
+    const auto type = column(0)->type();
+    // Check for supported data types
+    if (!is_integer(type->id()) && !is_floating(type->id())) {
+      return Status::TypeError("DataType is not supported: ", 
type->ToString());
+    }
+    // Check for uniform data type
+    // Check for no validity bitmap of each field
+    for (int i = 0; i < num_columns(); ++i) {
+      if (column(i)->null_count() > 0) {
+        return Status::TypeError("Can only convert a RecordBatch with no 
nulls.");
+      }
+      if (column(i)->type() != type) {
+        return Status::TypeError(
+            "Can only convert a RecordBatch with uniform data type.");
+      }
+    }
+
+    // Empty tensors
+    if (num_rows() == 0) {
+      // Construct empty Tensor object
+      ARROW_ASSIGN_OR_RAISE(auto empty_buffer, AllocateBuffer(0));
+      ARROW_ASSIGN_OR_RAISE(auto empty_tensor, Tensor::Make(type, 
std::move(empty_buffer),
+                                                            {0, 
num_columns()}, {0, 0}));
+      return empty_tensor;
+    }
+
+    // Allocate memory
+    ARROW_ASSIGN_OR_RAISE(const std::shared_ptr<Buffer> result,

Review Comment:
   ```suggestion
       ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Buffer> result,
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to