zhixingheyi-tian commented on a change in pull request #11763:
URL: https://github.com/apache/arrow/pull/11763#discussion_r807997252



##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -381,6 +381,81 @@ TEST(TestAdapterRead, ReadIntAndStringFileMultipleStripes) 
{
   }
 }
 
+TEST(TestAdapterRead, ReadIntAndStringFileMultipleBatches) {
+  MemoryOutputStream mem_stream(kDefaultMemStreamSize);
+  ORC_UNIQUE_PTR<liborc::Type> type(
+      liborc::Type::buildTypeFromString("struct<col1:int,col2:string>"));
+
+  constexpr uint64_t stripe_size = 1024;  // 1K
+  constexpr uint64_t stripe_count = 10;
+  constexpr uint64_t stripe_row_count = 16384;
+  constexpr uint64_t reader_batch_size = 1024;
+
+  auto writer = CreateWriter(stripe_size, *type, &mem_stream);
+  auto batch = writer->createRowBatch(stripe_row_count);
+  auto struct_batch = 
internal::checked_cast<liborc::StructVectorBatch*>(batch.get());
+  auto long_batch =
+      
internal::checked_cast<liborc::LongVectorBatch*>(struct_batch->fields[0]);
+  auto str_batch =
+      
internal::checked_cast<liborc::StringVectorBatch*>(struct_batch->fields[1]);
+  int64_t accumulated = 0;
+
+  for (uint64_t j = 0; j < stripe_count; ++j) {
+    std::string data_buffer(stripe_row_count * 5, '\0');
+    uint64_t offset = 0;
+    for (uint64_t i = 0; i < stripe_row_count; ++i) {
+      std::string str_data = std::to_string(accumulated % stripe_row_count);
+      long_batch->data[i] = static_cast<int64_t>(accumulated % 
stripe_row_count);
+      str_batch->data[i] = &data_buffer[offset];
+      str_batch->length[i] = static_cast<int64_t>(str_data.size());
+      memcpy(&data_buffer[offset], str_data.c_str(), str_data.size());
+      accumulated++;
+      offset += str_data.size();
+    }
+    struct_batch->numElements = stripe_row_count;
+    long_batch->numElements = stripe_row_count;
+    str_batch->numElements = stripe_row_count;
+
+    writer->add(*batch);
+  }
+
+  writer->close();

Review comment:
       Done and have refined.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to