rok commented on code in PR #41870:
URL: https://github.com/apache/arrow/pull/41870#discussion_r3502502521


##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {

Review Comment:
   ```suggestion
     const int num_columns = container.num_columns();
   
     for (int col_idx = 0; col_idx < num_columns; ++col_idx) {
   ```



##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int64_t chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {

Review Comment:
   ```suggestion
         for (const auto& chunk : container.columns()[col_idx]->chunks()) {
   ```



##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int64_t chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {
+        if (row_major) {
+          ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+              out_values, *chunk->data(), container.num_columns(), col_idx, 
chunk_idx};

Review Comment:
   ```suggestion
                 out_values, *chunk->data(), num_columns, col_idx, chunk_idx};
   ```



##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int64_t chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {
+        if (row_major) {
+          ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+              out_values, *chunk->data(), container.num_columns(), col_idx, 
chunk_idx};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+          chunk_idx += chunk->length();
+        } else {
+          ConvertArrayToTensorVisitor<CType> visitor{out_values, 
*chunk->data()};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        }
+      }
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      const auto& array_data = container.column_data(col_idx);

Review Comment:
   ```suggestion
         const auto& array_data = container.column_data()[col_idx];
   ```



##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int64_t chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {
+        if (row_major) {
+          ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+              out_values, *chunk->data(), container.num_columns(), col_idx, 
chunk_idx};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+          chunk_idx += chunk->length();
+        } else {
+          ConvertArrayToTensorVisitor<CType> visitor{out_values, 
*chunk->data()};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        }
+      }
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      const auto& array_data = container.column_data(col_idx);
+
+      if (row_major) {
+        ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+            out_values, *array_data, container.num_columns(), col_idx, 0};
+        DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
+      } else {
+        ConvertArrayToTensorVisitor<CType> visitor{out_values, *array_data};
+        DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
+      }
     }
   }
 }
 
-Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool 
row_major,
-                           MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
-  if (batch.num_columns() == 0) {
+template <typename Container>
+Status ToTensorImpl(const Container& container, bool null_to_nan, bool 
row_major,
+                    MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+  if (container.num_columns() == 0) {
     return Status::TypeError(
-        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "Conversion to Tensor for Tables or RecordBatches without 
columns/schema is not "
         "supported.");
   }
   // Check for no validity bitmap of each field
   // if null_to_nan conversion is set to false
-  for (int i = 0; i < batch.num_columns(); ++i) {
-    if (batch.column(i)->null_count() > 0 && !null_to_nan) {
+  for (int i = 0; i < container.num_columns(); ++i) {
+    int64_t null_count;
+    if constexpr (std::is_same_v<Container, Table>) {
+      null_count = container.column(i)->null_count();
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      null_count = container.column_data(i)->GetNullCount();
+    }
+    if (null_count > 0 && !null_to_nan) {
       return Status::TypeError(
-          "Can only convert a RecordBatch with no nulls. Set null_to_nan to 
true to "
-          "convert nulls to NaN");
+          "Can only convert a Table or RecordBatch with no nulls. Set 
null_to_nan to "
+          "true to convert nulls to NaN");
     }
   }
 
   // Check for supported data types and merge fields
   // to get the resulting uniform data type
-  if (!is_integer(batch.column(0)->type()->id()) &&
-      !is_floating(batch.column(0)->type()->id())) {
-    return Status::TypeError("DataType is not supported: ",
-                             batch.column(0)->type()->ToString());
+  const auto& col_0_type = container.schema()->field(0)->type();
+  if (!is_integer(col_0_type->id()) && !is_floating(col_0_type->id())) {
+    return Status::TypeError("DataType is not supported: ", 
col_0_type->ToString());
   }
-  std::shared_ptr<Field> result_field = batch.schema()->field(0);
+  std::shared_ptr<Field> result_field = container.schema()->field(0);
   std::shared_ptr<DataType> result_type = result_field->type();
 
   Field::MergeOptions options;
   options.promote_integer_to_float = true;
   options.promote_integer_sign = true;
   options.promote_numeric_width = true;
 
-  if (batch.num_columns() > 1) {
-    for (int i = 1; i < batch.num_columns(); ++i) {
-      if (!is_numeric(batch.column(i)->type()->id())) {
-        return Status::TypeError("DataType is not supported: ",
-                                 batch.column(i)->type()->ToString());
+  if (container.num_columns() > 1) {
+    for (int i = 1; i < container.num_columns(); ++i) {
+      const auto& col_type = container.schema()->field(i)->type();
+
+      if (!is_numeric(col_type->id())) {
+        return Status::TypeError("DataType is not supported: ", 
col_type->ToString());
       }
 
       // Casting of float16 is not supported, throw an error in this case
-      if ((batch.column(i)->type()->id() == Type::HALF_FLOAT ||
+      if ((col_type->id() == Type::HALF_FLOAT ||
            result_field->type()->id() == Type::HALF_FLOAT) &&
-          batch.column(i)->type()->id() != result_field->type()->id()) {
+          col_type->id() != result_field->type()->id()) {
         return Status::NotImplemented("Casting from or to halffloat is not 
supported.");
       }
 
       ARROW_ASSIGN_OR_RAISE(
           result_field,
           result_field->MergeWith(

Review Comment:
   ```suggestion
        if (!col_type->Equals(result_field->type())) {
          ARROW_ASSIGN_OR_RAISE(
              result_field,
              result_field->MergeWith(
   ```



##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int64_t chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {
+        if (row_major) {
+          ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+              out_values, *chunk->data(), container.num_columns(), col_idx, 
chunk_idx};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+          chunk_idx += chunk->length();
+        } else {
+          ConvertArrayToTensorVisitor<CType> visitor{out_values, 
*chunk->data()};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        }
+      }
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      const auto& array_data = container.column_data(col_idx);
+
+      if (row_major) {
+        ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+            out_values, *array_data, container.num_columns(), col_idx, 0};

Review Comment:
   ```suggestion
               out_values, *array_data, num_columns, col_idx, 0};
   ```



##########
cpp/src/arrow/tensor.cc:
##########
@@ -284,75 +301,99 @@ struct ConvertColumnsToTensorRowMajorVisitor {
   }
 };
 
-template <typename DataType>
-inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out,
+template <typename DataType, typename Container>
+inline void ConvertColumnsToTensor(const Container& container, uint8_t* out,
                                    bool row_major) {
   using CType = typename arrow::TypeTraits<DataType>::CType;
   auto* out_values = reinterpret_cast<CType*>(out);
 
-  int i = 0;
-  for (const auto& column : batch.columns()) {
-    if (row_major) {
-      ConvertColumnsToTensorRowMajorVisitor<CType> visitor{out_values, 
*column->data(),
-                                                           
batch.num_columns(), i++};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
-    } else {
-      ConvertColumnsToTensorVisitor<CType> visitor{out_values, 
*column->data()};
-      DCHECK_OK(VisitTypeInline(*column->type(), &visitor));
+  for (int col_idx = 0; col_idx < container.num_columns(); ++col_idx) {
+    if constexpr (std::is_same_v<Container, Table>) {
+      int64_t chunk_idx = 0;
+
+      for (const auto& chunk : container.column(col_idx)->chunks()) {
+        if (row_major) {
+          ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+              out_values, *chunk->data(), container.num_columns(), col_idx, 
chunk_idx};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+          chunk_idx += chunk->length();
+        } else {
+          ConvertArrayToTensorVisitor<CType> visitor{out_values, 
*chunk->data()};
+          DCHECK_OK(VisitTypeInline(*chunk->type(), &visitor));
+        }
+      }
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      const auto& array_data = container.column_data(col_idx);
+
+      if (row_major) {
+        ConvertArrayToTensorRowMajorVisitor<CType> visitor{
+            out_values, *array_data, container.num_columns(), col_idx, 0};
+        DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
+      } else {
+        ConvertArrayToTensorVisitor<CType> visitor{out_values, *array_data};
+        DCHECK_OK(VisitTypeInline(*array_data->type, &visitor));
+      }
     }
   }
 }
 
-Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool 
row_major,
-                           MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
-  if (batch.num_columns() == 0) {
+template <typename Container>
+Status ToTensorImpl(const Container& container, bool null_to_nan, bool 
row_major,
+                    MemoryPool* pool, std::shared_ptr<Tensor>* tensor) {
+  if (container.num_columns() == 0) {
     return Status::TypeError(
-        "Conversion to Tensor for RecordBatches without columns/schema is not "
+        "Conversion to Tensor for Tables or RecordBatches without 
columns/schema is not "
         "supported.");
   }
   // Check for no validity bitmap of each field
   // if null_to_nan conversion is set to false
-  for (int i = 0; i < batch.num_columns(); ++i) {
-    if (batch.column(i)->null_count() > 0 && !null_to_nan) {
+  for (int i = 0; i < container.num_columns(); ++i) {
+    int64_t null_count;
+    if constexpr (std::is_same_v<Container, Table>) {
+      null_count = container.column(i)->null_count();
+    } else if constexpr (std::is_same_v<Container, RecordBatch>) {
+      null_count = container.column_data(i)->GetNullCount();
+    }
+    if (null_count > 0 && !null_to_nan) {
       return Status::TypeError(
-          "Can only convert a RecordBatch with no nulls. Set null_to_nan to 
true to "
-          "convert nulls to NaN");
+          "Can only convert a Table or RecordBatch with no nulls. Set 
null_to_nan to "
+          "true to convert nulls to NaN");
     }
   }
 
   // Check for supported data types and merge fields
   // to get the resulting uniform data type
-  if (!is_integer(batch.column(0)->type()->id()) &&
-      !is_floating(batch.column(0)->type()->id())) {
-    return Status::TypeError("DataType is not supported: ",
-                             batch.column(0)->type()->ToString());
+  const auto& col_0_type = container.schema()->field(0)->type();
+  if (!is_integer(col_0_type->id()) && !is_floating(col_0_type->id())) {
+    return Status::TypeError("DataType is not supported: ", 
col_0_type->ToString());
   }
-  std::shared_ptr<Field> result_field = batch.schema()->field(0);
+  std::shared_ptr<Field> result_field = container.schema()->field(0);
   std::shared_ptr<DataType> result_type = result_field->type();
 
   Field::MergeOptions options;
   options.promote_integer_to_float = true;
   options.promote_integer_sign = true;
   options.promote_numeric_width = true;
 
-  if (batch.num_columns() > 1) {
-    for (int i = 1; i < batch.num_columns(); ++i) {
-      if (!is_numeric(batch.column(i)->type()->id())) {
-        return Status::TypeError("DataType is not supported: ",
-                                 batch.column(i)->type()->ToString());
+  if (container.num_columns() > 1) {
+    for (int i = 1; i < container.num_columns(); ++i) {
+      const auto& col_type = container.schema()->field(i)->type();
+
+      if (!is_numeric(col_type->id())) {
+        return Status::TypeError("DataType is not supported: ", 
col_type->ToString());
       }
 
       // Casting of float16 is not supported, throw an error in this case
-      if ((batch.column(i)->type()->id() == Type::HALF_FLOAT ||
+      if ((col_type->id() == Type::HALF_FLOAT ||
            result_field->type()->id() == Type::HALF_FLOAT) &&
-          batch.column(i)->type()->id() != result_field->type()->id()) {
+          col_type->id() != result_field->type()->id()) {
         return Status::NotImplemented("Casting from or to halffloat is not 
supported.");
       }
 
       ARROW_ASSIGN_OR_RAISE(
           result_field,
           result_field->MergeWith(
-              batch.schema()->field(i)->WithName(result_field->name()), 
options));
+              container.schema()->field(i)->WithName(result_field->name()), 
options));

Review Comment:
   ```suggestion
                   
container.schema()->field(i)->WithName(result_field->name()), options));
        }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to