mathyingzhou commented on a change in pull request #8648:
URL: https://github.com/apache/arrow/pull/8648#discussion_r567596202



##########
File path: cpp/src/arrow/adapters/orc/adapter_test.cc
##########
@@ -157,4 +197,1930 @@ TEST(TestAdapter, readIntAndStringFileMultipleStripes) {
     EXPECT_TRUE(stripe_reader->ReadNext(&record_batch).ok());
   }
 }
+
+// WriteORC tests
+
+// Trivial
+TEST(TestAdapterWriteTrivial, writeZeroRowsNoConversion) {
+  std::shared_ptr<Table> table = TableFromJSON(
+      schema({field("bool", boolean()), field("int8", int8()), field("int16", 
int16()),
+              field("int32", int32()), field("int64", int64()), field("float", 
float32()),
+              field("double", float64()), field("decimal128nz", decimal(25, 
6)),
+              field("decimal128z", decimal(32, 0)), field("date32", date32()),
+              field("ts3", timestamp(TimeUnit::NANO)), field("string", utf8()),
+              field("binary", binary()),
+              field("struct", struct_({field("a", utf8()), field("b", 
int64())})),
+              field("list", list(int32())),
+              field("lsl", list(struct_({field("lsl0", list(int32()))})))}),
+      {R"([])"});
+  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
+}
+TEST(TestAdapterWriteTrivial, writeChunklessNoConversion) {
+  std::shared_ptr<Table> table = TableFromJSON(
+      schema({field("bool", boolean()), field("int8", int8()), field("int16", 
int16()),
+              field("int32", int32()), field("int64", int64()), field("float", 
float32()),
+              field("double", float64()), field("decimal128nz", decimal(25, 
6)),
+              field("decimal128z", decimal(32, 0)), field("date32", date32()),
+              field("ts3", timestamp(TimeUnit::NANO)), field("string", utf8()),
+              field("binary", binary()),
+              field("struct", struct_({field("a", utf8()), field("b", 
int64())})),
+              field("list", list(int32())),
+              field("lsl", list(struct_({field("lsl0", list(int32()))})))}),
+      {});
+  AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize / 16);
+}
+TEST(TestAdapterWriteTrivial, writeZeroRowsWithConversion) {
+  std::shared_ptr<Table>
+      input_table = TableFromJSON(
+          schema({field("date64", date64()), field("ts0", 
timestamp(TimeUnit::SECOND)),
+                  field("ts1", timestamp(TimeUnit::MILLI)),
+                  field("ts2", timestamp(TimeUnit::MICRO)),
+                  field("large_string", large_utf8()),
+                  field("large_binary", large_binary()),
+                  field("fixed_size_binary0", fixed_size_binary(0)),
+                  field("fixed_size_binary", fixed_size_binary(5)),
+                  field("large_list", large_list(int32())),
+                  field("fixed_size_list", fixed_size_list(int32(), 3)),
+                  field("map", map(utf8(), utf8()))}),
+          {R"([])"}),
+      expected_output_table = TableFromJSON(
+          schema({field("date64", timestamp(TimeUnit::NANO)),
+                  field("ts0", timestamp(TimeUnit::NANO)),
+                  field("ts1", timestamp(TimeUnit::NANO)),
+                  field("ts2", timestamp(TimeUnit::NANO)), 
field("large_string", utf8()),
+                  field("large_binary", binary()), field("fixed_size_binary0", 
binary()),
+                  field("fixed_size_binary", binary()),
+                  field("large_list", list(int32())),
+                  field("fixed_size_list", list(int32())),
+                  field("map",
+                        list(struct_({field("key", utf8()), field("value", 
utf8())})))}),
+          {R"([])"});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize / 16);
+}
+TEST(TestAdapterWriteTrivial, writeChunklessWithConversion) {
+  std::shared_ptr<Table>
+      input_table = TableFromJSON(
+          schema({field("date64", date64()), field("ts0", 
timestamp(TimeUnit::SECOND)),
+                  field("ts1", timestamp(TimeUnit::MILLI)),
+                  field("ts2", timestamp(TimeUnit::MICRO)),
+                  field("large_string", large_utf8()),
+                  field("large_binary", large_binary()),
+                  field("fixed_size_binary0", fixed_size_binary(0)),
+                  field("fixed_size_binary", fixed_size_binary(5)),
+                  field("large_list", large_list(int32())),
+                  field("fixed_size_list", fixed_size_list(int32(), 3)),
+                  field("map", map(utf8(), utf8()))}),
+          {}),
+      expected_output_table = TableFromJSON(
+          schema({field("date64", timestamp(TimeUnit::NANO)),
+                  field("ts0", timestamp(TimeUnit::NANO)),
+                  field("ts1", timestamp(TimeUnit::NANO)),
+                  field("ts2", timestamp(TimeUnit::NANO)), 
field("large_string", utf8()),
+                  field("large_binary", binary()), field("fixed_size_binary0", 
binary()),
+                  field("fixed_size_binary", binary()),
+                  field("large_list", list(int32())),
+                  field("fixed_size_list", list(int32())),
+                  field("map",
+                        list(struct_({field("key", utf8()), field("value", 
utf8())})))}),
+          {});
+  AssertTableWriteReadEqual(input_table, expected_output_table,
+                            kDefaultSmallMemStreamSize / 16);
+}
+
+// General
+TEST(TestAdapterWriteGeneral, writeAllNullsNew) {
+  std::vector<std::shared_ptr<Field>> table_fields{
+      field("bool", boolean()),
+      field("int8", int8()),
+      field("int16", int16()),
+      field("int32", int32()),
+      field("int64", int64()),
+      field("decimal128nz", decimal(33, 4)),
+      field("decimal128z", decimal(35, 0)),
+      field("date32", date32()),
+      field("ts3", timestamp(TimeUnit::NANO)),
+      field("string", utf8()),
+      field("binary", binary())};
+  std::shared_ptr<Schema> table_schema = schema(table_fields);
+  arrow::random::RandomArrayGenerator rand(kRandomSeed);
+
+  int64_t num_rows = 10000;
+  int64_t numCols = table_fields.size();
+
+  ArrayMatrix arrays(numCols, ArrayVector(5, NULLPTR));
+  for (int i = 0; i < numCols; i++) {
+    for (int j = 0; j < 5; j++) {
+      int row_count = j % 2 ? 0 : num_rows / 2;
+      arrays[i][j] = rand.ArrayOf(table_fields[i]->type(), row_count, 1);
+    }
+  }
+
+  ChunkedArrayVector cv;
+  cv.reserve(numCols);
+
+  for (int col = 0; col < numCols; col++) {
+    cv.push_back(std::make_shared<ChunkedArray>(arrays[col]));
+  }
+
+  std::shared_ptr<Table> table = Table::Make(table_schema, cv);
+  AssertTableWriteReadEqual(table, table);
+}
+
+TEST(TestAdapterWriteGeneral, writeAllNulls) {
+  std::vector<std::shared_ptr<Field>> table_fields{
+      field("bool", boolean()),
+      field("int8", int8()),
+      field("int16", int16()),
+      field("int32", int32()),
+      field("int64", int64()),
+      field("decimal128nz", decimal(33, 4)),
+      field("decimal128z", decimal(35, 0)),
+      field("date32", date32()),
+      field("ts3", timestamp(TimeUnit::NANO)),
+      field("string", utf8()),
+      field("binary", binary())};
+  std::shared_ptr<Schema> table_schema = 
std::make_shared<Schema>(table_fields);
+
+  int64_t num_rows = 10000;
+  int64_t numCols = table_fields.size();
+
+  ArrayBuilderMatrix builders(numCols, ArrayBuilderVector(5, NULLPTR));
+
+  for (int i = 0; i < 5; i++) {
+    builders[0][i] =
+        
std::static_pointer_cast<ArrayBuilder>(std::make_shared<BooleanBuilder>());
+    builders[1][i] =
+        
std::static_pointer_cast<ArrayBuilder>(std::make_shared<Int8Builder>());
+    builders[2][i] =
+        
std::static_pointer_cast<ArrayBuilder>(std::make_shared<Int16Builder>());
+    builders[3][i] =
+        
std::static_pointer_cast<ArrayBuilder>(std::make_shared<Int32Builder>());
+    builders[4][i] =
+        
std::static_pointer_cast<ArrayBuilder>(std::make_shared<Int64Builder>());
+    builders[5][i] = std::static_pointer_cast<ArrayBuilder>(
+        std::make_shared<Decimal128Builder>(decimal(33, 4)));
+    builders[6][i] = std::static_pointer_cast<ArrayBuilder>(
+        std::make_shared<Decimal128Builder>(decimal(35, 0)));

Review comment:
       Yup. It is not supported in ORC now so we can't. I can file an ORC 
ticket though.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to