wjones127 commented on code in PR #14219:
URL: https://github.com/apache/arrow/pull/14219#discussion_r982793017


##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -730,4 +735,55 @@ TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
   AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
 }
 
+class TestORCWriterMultipleWrite : public ::testing::Test {
+ public:
+  TestORCWriterMultipleWrite() : rand(kRandomSeed) {}
+
+ protected:
+  random::RandomArrayGenerator rand;
+};
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIntField) {
+  const int64_t num_rows = 1234;
+  const int nb_writes = 5;
+  auto array_int = rand.ArrayOf(int32(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema = schema({field("col0", 
array_int->type())});
+  ArrayVector vect;
+  for (int i = 0; i < nb_writes; i++) {
+    vect.push_back(array_int);
+  }
+  auto input_chunked_array = std::make_shared<ChunkedArray>(array_int),
+       expected_output_chunked_array = std::make_shared<ChunkedArray>(vect);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{input_chunked_array}),
+                         expected_output_table =
+                             Table::Make(input_schema, 
{expected_output_chunked_array});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize * 100, nb_writes);
+}
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIncoherentSchema) {
+  const int64_t num_rows = 1234;
+  auto array_int = rand.ArrayOf(int32(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema = schema({field("col0", 
array_int->type())});
+  auto array_int2 = rand.ArrayOf(int64(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema2 = schema({field("col0", 
array_int2->type())});
+
+  auto input_chunked_array = std::make_shared<ChunkedArray>(array_int),
+       input_chunked_array2 = std::make_shared<ChunkedArray>(array_int2);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{input_chunked_array}),
+                         input_table2 =
+                             Table::Make(input_schema2, 
{input_chunked_array2});

Review Comment:
   There is an overload of `Table::Make()` that takes Arrays, so we can just 
use that. Also, we generally repeat the type if we are declaring and assigning.
   
   ```suggestion
     std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{array_int});
     std::shared_ptr<Table> input_table2 = Table::Make(input_schema2, 
{array_int2});
   ```



##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -730,4 +735,55 @@ TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
   AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
 }
 
+class TestORCWriterMultipleWrite : public ::testing::Test {
+ public:
+  TestORCWriterMultipleWrite() : rand(kRandomSeed) {}
+
+ protected:
+  random::RandomArrayGenerator rand;
+};
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIntField) {
+  const int64_t num_rows = 1234;
+  const int nb_writes = 5;

Review Comment:
   We generally use `num_` instead of `nb_` in Arrow, so consider changing this.



##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -730,4 +735,55 @@ TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
   AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
 }
 
+class TestORCWriterMultipleWrite : public ::testing::Test {
+ public:
+  TestORCWriterMultipleWrite() : rand(kRandomSeed) {}
+
+ protected:
+  random::RandomArrayGenerator rand;
+};
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIntField) {
+  const int64_t num_rows = 1234;
+  const int nb_writes = 5;
+  auto array_int = rand.ArrayOf(int32(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema = schema({field("col0", 
array_int->type())});
+  ArrayVector vect;
+  for (int i = 0; i < nb_writes; i++) {
+    vect.push_back(array_int);
+  }
+  auto input_chunked_array = std::make_shared<ChunkedArray>(array_int),
+       expected_output_chunked_array = std::make_shared<ChunkedArray>(vect);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{input_chunked_array}),
+                         expected_output_table =
+                             Table::Make(input_schema, 
{expected_output_chunked_array});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize * 100, nb_writes);
+}
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIncoherentSchema) {
+  const int64_t num_rows = 1234;
+  auto array_int = rand.ArrayOf(int32(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema = schema({field("col0", 
array_int->type())});
+  auto array_int2 = rand.ArrayOf(int64(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema2 = schema({field("col0", 
array_int2->type())});
+
+  auto input_chunked_array = std::make_shared<ChunkedArray>(array_int),
+       input_chunked_array2 = std::make_shared<ChunkedArray>(array_int2);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{input_chunked_array}),
+                         input_table2 =
+                             Table::Make(input_schema2, 
{input_chunked_array2});
+  EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+                       
io::BufferOutputStream::Create(kDefaultSmallMemStreamSize));
+  auto write_options = adapters::orc::WriteOptions();
+  EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
+                                        buffer_output_stream.get(), 
write_options));
+  ARROW_EXPECT_OK(writer->Write(*input_table));
+
+  // This should not pass
+  ASSERT_NOT_OK(writer->Write(*input_table2));

Review Comment:
   How about this?
   ```suggestion
     ASSERT_RAISES(Status::Invalid, writer->Write(*input_table2));
   ```



##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -239,7 +241,9 @@ void AssertTableWriteReadEqual(const 
std::shared_ptr<Table>& input_table,
   write_options.row_index_stride = 5000;
   EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
                                         buffer_output_stream.get(), 
write_options));
-  ARROW_EXPECT_OK(writer->Write(*input_table));
+  for (int iWrite = 0; iWrite < nb_writes; iWrite++) {

Review Comment:
   We take the convention from the Google Style Guide that variable names 
should be lower case.
   
   ```suggestion
     for (int i_write = 0; i_write < nb_writes; i_write++) {
   ```



##########
cpp/src/arrow/adapters/orc/adapter_test.cc:
##########
@@ -730,4 +735,55 @@ TEST_F(TestORCWriterSingleArray, WriteListOfMap) {
   AssertArrayWriteReadEqual(array, array, kDefaultSmallMemStreamSize * 10);
 }
 
+class TestORCWriterMultipleWrite : public ::testing::Test {
+ public:
+  TestORCWriterMultipleWrite() : rand(kRandomSeed) {}
+
+ protected:
+  random::RandomArrayGenerator rand;
+};
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIntField) {
+  const int64_t num_rows = 1234;
+  const int nb_writes = 5;
+  auto array_int = rand.ArrayOf(int32(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema = schema({field("col0", 
array_int->type())});
+  ArrayVector vect;
+  for (int i = 0; i < nb_writes; i++) {
+    vect.push_back(array_int);
+  }
+  auto input_chunked_array = std::make_shared<ChunkedArray>(array_int),
+       expected_output_chunked_array = std::make_shared<ChunkedArray>(vect);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{input_chunked_array}),
+                         expected_output_table =
+                             Table::Make(input_schema, 
{expected_output_chunked_array});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize * 100, nb_writes);
+}
+
+TEST_F(TestORCWriterMultipleWrite, MultipleWritesIncoherentSchema) {
+  const int64_t num_rows = 1234;
+  auto array_int = rand.ArrayOf(int32(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema = schema({field("col0", 
array_int->type())});
+  auto array_int2 = rand.ArrayOf(int64(), num_rows, 0);
+  std::shared_ptr<Schema> input_schema2 = schema({field("col0", 
array_int2->type())});
+
+  auto input_chunked_array = std::make_shared<ChunkedArray>(array_int),
+       input_chunked_array2 = std::make_shared<ChunkedArray>(array_int2);
+  std::shared_ptr<Table> input_table = Table::Make(input_schema, 
{input_chunked_array}),
+                         input_table2 =
+                             Table::Make(input_schema2, 
{input_chunked_array2});
+  EXPECT_OK_AND_ASSIGN(auto buffer_output_stream,
+                       
io::BufferOutputStream::Create(kDefaultSmallMemStreamSize));
+  auto write_options = adapters::orc::WriteOptions();
+  EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open(
+                                        buffer_output_stream.get(), 
write_options));
+  ARROW_EXPECT_OK(writer->Write(*input_table));
+
+  // This should not pass
+  ASSERT_NOT_OK(writer->Write(*input_table2));
+
+  ARROW_EXPECT_OK(writer->Close());
+  EXPECT_OK_AND_ASSIGN(auto buffer, buffer_output_stream->Finish());

Review Comment:
   Were you planning on doing something with this buffer?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to