mapleFU commented on code in PR #37896:
URL: https://github.com/apache/arrow/pull/37896#discussion_r1358276774


##########
cpp/src/arrow/record_batch.cc:
##########
@@ -432,4 +433,34 @@ RecordBatchReader::~RecordBatchReader() {
   ARROW_WARN_NOT_OK(this->Close(), "Implicitly called RecordBatchReader::Close 
failed");
 }
 
+Result<std::shared_ptr<RecordBatch>> ConcatenateRecordBatches(
+    const RecordBatchVector& batches, MemoryPool* pool) {
+  int64_t length = 0;
+  size_t n = batches.size();
+  if (n == 0) {
+    return Status::Invalid("Must pass at least one recordbatch");
+  }
+  int cols = batches[0]->num_columns();
+  auto schema = batches[0]->schema();
+  for (size_t i = 0; i < batches.size(); ++i) {
+    length += batches[i]->num_rows();
+    if (!schema->Equals(batches[i]->schema())) {
+      return Status::Invalid(
+          "Schema of RecordBatch index ", i, " is ", 
batches[i]->schema()->ToString(),
+          ", which does not match index 0 recordbatch schema: ", 
schema->ToString());
+    }
+  }
+
+  std::vector<std::shared_ptr<Array>> concatenated_columns;

Review Comment:
   should we call `reserve` here?



##########
cpp/src/arrow/record_batch_test.cc:
##########
@@ -555,4 +555,38 @@ TEST_F(TestRecordBatch, ReplaceSchema) {
   ASSERT_RAISES(Invalid, b1->ReplaceSchema(schema));
 }
 
+TEST_F(TestRecordBatch, ConcatenateRecordBatches) {
+  int length = 10;
+
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8());
+
+  auto schema = ::arrow::schema({f0, f1});
+
+  random::RandomArrayGenerator gen(42);
+
+  auto b1 = gen.BatchOf(schema->fields(), length);
+
+  length = 5;
+
+  auto b2 = gen.BatchOf(schema->fields(), length);
+
+  ASSERT_OK_AND_ASSIGN(auto batch, ConcatenateRecordBatches({b1, b2}));
+  ASSERT_EQ(batch->num_rows(), b1->num_rows() + b2->num_rows());

Review Comment:
   Would you mind test content equals `{b1 concat b2}` ? (using `Slice` or 
`Concatenate` )



##########
cpp/src/arrow/record_batch.cc:
##########
@@ -432,4 +433,34 @@ RecordBatchReader::~RecordBatchReader() {
   ARROW_WARN_NOT_OK(this->Close(), "Implicitly called RecordBatchReader::Close 
failed");
 }
 
+Result<std::shared_ptr<RecordBatch>> ConcatenateRecordBatches(
+    const RecordBatchVector& batches, MemoryPool* pool) {
+  int64_t length = 0;
+  size_t n = batches.size();
+  if (n == 0) {
+    return Status::Invalid("Must pass at least one recordbatch");
+  }
+  int cols = batches[0]->num_columns();
+  auto schema = batches[0]->schema();
+  for (size_t i = 0; i < batches.size(); ++i) {
+    length += batches[i]->num_rows();
+    if (!schema->Equals(batches[i]->schema())) {
+      return Status::Invalid(
+          "Schema of RecordBatch index ", i, " is ", 
batches[i]->schema()->ToString(),
+          ", which does not match index 0 recordbatch schema: ", 
schema->ToString());
+    }
+  }
+
+  std::vector<std::shared_ptr<Array>> concatenated_columns;
+  for (int col = 0; col < cols; ++col) {
+    ArrayVector column_arrays;

Review Comment:
   ditto



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to