pitrou commented on a change in pull request #11798:
URL: https://github.com/apache/arrow/pull/11798#discussion_r763100510



##########
File path: cpp/src/arrow/array/array_test.cc
##########
@@ -654,6 +654,44 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
   AssertAppendScalar(pool_, std::make_shared<MapScalar>(scalar));
 }
 
+TEST_F(TestArray, TestMakeEmptyArray) {
+  FieldVector union_fields1({field("a", utf8()), field("b", int32())});
+  FieldVector union_fields2({field("a", null()), field("b", 
list(large_utf8()))});
+  std::vector<int8_t> union_type_codes{7, 42};
+
+  std::shared_ptr<DataType> types[] = {null(),
+                                       boolean(),
+                                       int8(),
+                                       uint16(),
+                                       int32(),
+                                       uint64(),
+                                       float64(),
+                                       binary(),
+                                       large_binary(),
+                                       fixed_size_binary(3),
+                                       decimal(16, 4),
+                                       utf8(),
+                                       large_utf8(),
+                                       list(utf8()),
+                                       list(int64()),  // ARROW-9071

Review comment:
       The comment seems irrelevant here, can you remove it?

##########
File path: cpp/src/arrow/array/util.h
##########
@@ -54,6 +54,17 @@ ARROW_EXPORT
 Result<std::shared_ptr<Array>> MakeArrayFromScalar(
     const Scalar& scalar, int64_t length, MemoryPool* pool = 
default_memory_pool());
 
+/// \brief Create an empty Array of a given type
+///
+/// The output Array will be of the given type.
+///
+/// \param[in] type DataType of the empty Array
+/// \param[in] pool MemoryPool pointer

Review comment:
       This is not really informative. Make this for example "the memory pool 
to allocate from", like above?

##########
File path: cpp/src/arrow/record_batch.h
##########
@@ -60,6 +60,17 @@ class ARROW_EXPORT RecordBatch {
       std::shared_ptr<Schema> schema, int64_t num_rows,
       std::vector<std::shared_ptr<ArrayData>> columns);
 
+  /// \brief Create an empty RecordBatch of a given schema
+  ///
+  /// The output RecordBatch will be created with DataTypes from
+  /// the given schema.
+  ///
+  /// \param[in] schema Schema of the empty RecordBatch
+  /// \param[in] pool MemoryPool pointer

Review comment:
       Same comment here as well.

##########
File path: cpp/src/arrow/chunked_array.h
##########
@@ -89,6 +89,17 @@ class ARROW_EXPORT ChunkedArray {
   static Result<std::shared_ptr<ChunkedArray>> Make(
       ArrayVector chunks, std::shared_ptr<DataType> type = NULLPTR);
 
+  /// \brief Create an empty ChunkedArray of a given type
+  ///
+  /// The output ChunkedArray will have one chunk with an empty
+  /// array of the given type.
+  ///
+  /// \param[in] type DataType of the empty ChunkedArray
+  /// \param[in] pool MemoryPool pointer

Review comment:
       Same comment here.

##########
File path: cpp/src/arrow/compute/kernels/util_internal.h
##########
@@ -23,8 +23,11 @@
 
 #include "arrow/array/util.h"
 #include "arrow/buffer.h"
+#include "arrow/chunked_array.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/type_fwd.h"
+#include "arrow/record_batch.h"
+#include "arrow/table.h"

Review comment:
       @ArianaVillegas Again, there doesn't seem to be a reason for adding 
these includes while the code didn't change.

##########
File path: cpp/src/arrow/table_test.cc
##########
@@ -15,20 +15,21 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <cstdint>
-#include <memory>
-#include <vector>
+#include "arrow/table.h"
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include <cstdint>
+#include <memory>
+#include <vector>
+

Review comment:
       I'm a bit surprised that you needed to reorder these includes, though 
this is not a problem. Out of curiosity, did an automatic formatter do this?

##########
File path: cpp/src/arrow/compute/kernels/vector_selection.cc
##########
@@ -19,26 +19,21 @@
 #include <cstring>
 #include <limits>
 
-#include "arrow/array/array_base.h"
 #include "arrow/array/array_binary.h"
 #include "arrow/array/array_dict.h"
 #include "arrow/array/array_nested.h"
 #include "arrow/array/builder_primitive.h"
 #include "arrow/array/concatenate.h"
 #include "arrow/buffer_builder.h"
-#include "arrow/chunked_array.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/kernels/common.h"
 #include "arrow/compute/kernels/util_internal.h"
 #include "arrow/extension_type.h"
-#include "arrow/record_batch.h"
 #include "arrow/result.h"
-#include "arrow/table.h"

Review comment:
       I'm surprised those includes are not needed anymore. 
`vector_selection.cc` certainly processes tables, record batches and chunked 
arrays...

##########
File path: cpp/src/arrow/chunked_array_test.cc
##########
@@ -68,6 +69,13 @@ TEST_F(TestChunkedArray, Make) {
   ASSERT_RAISES(Invalid, ChunkedArray::Make({chunk0}, int16()));
 }
 
+TEST_F(TestChunkedArray, MakeEmpty) {
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<ChunkedArray> empty,
+                       ChunkedArray::MakeEmpty(int64()));
+  AssertTypeEqual(*int64(), *empty->type());
+  ASSERT_EQ(empty->length(), 0);

Review comment:
       Can you also add a validation test? Should probably be 
`ASSERT_OK(empty->ValidateFull())`.

##########
File path: cpp/src/arrow/table.h
##########
@@ -63,6 +63,17 @@ class ARROW_EXPORT Table {
                                      const 
std::vector<std::shared_ptr<Array>>& arrays,
                                      int64_t num_rows = -1);
 
+  /// \brief Create an empty Table of a given schema
+  ///
+  /// The output Table will be created from a ChunkedArrayVector
+  /// with DataTypes from the schema.
+  ///
+  /// \param[in] schema Schema of the empty Table
+  /// \param[in] pool MemoryPool pointer

Review comment:
       Same comment.

##########
File path: cpp/src/arrow/table.h
##########
@@ -63,6 +63,17 @@ class ARROW_EXPORT Table {
                                      const 
std::vector<std::shared_ptr<Array>>& arrays,
                                      int64_t num_rows = -1);
 
+  /// \brief Create an empty Table of a given schema
+  ///
+  /// The output Table will be created from a ChunkedArrayVector
+  /// with DataTypes from the schema.

Review comment:
       Hmm, the fact that a ChunkedArrayVector is used internally is 
irrelevant. Perhaps shorten this to:
   """The Table will be created with a single empty chunk per column""".

##########
File path: cpp/src/arrow/table_test.cc
##########
@@ -180,6 +181,19 @@ TEST_F(TestTable, Equals) {
   ASSERT_FALSE(table_->Equals(*other, /*check_metadata=*/true));
 }
 
+TEST_F(TestTable, MakeEmpty) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8());
+  auto f2 = field("f2", int16());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = ::arrow::schema({f0, f1, f2});
+
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<Table> empty, Table::MakeEmpty(schema));
+  AssertSchemaEqual(*schema, *empty->schema());
+  ASSERT_EQ(empty->num_rows(), 0);

Review comment:
       Can you add a validation check here as well?

##########
File path: cpp/src/arrow/record_batch_test.cc
##########
@@ -317,4 +317,18 @@ TEST_F(TestRecordBatch, FromStructArrayInvalidNullCount) {
   ASSERT_RAISES(Invalid, RecordBatch::FromStructArray(struct_array));
 }
 
+TEST_F(TestRecordBatch, MakeEmpty) {
+  auto f0 = field("f0", int32());
+  auto f1 = field("f1", uint8());
+  auto f2 = field("f2", int16());
+
+  std::vector<std::shared_ptr<Field>> fields = {f0, f1, f2};
+  auto schema = ::arrow::schema({f0, f1, f2});
+
+  ASSERT_OK_AND_ASSIGN(std::shared_ptr<RecordBatch> empty,
+                       RecordBatch::MakeEmpty(schema));
+  AssertSchemaEqual(*schema, *empty->schema());
+  ASSERT_EQ(empty->num_rows(), 0);

Review comment:
       Can you also validate the RecordBatch here, just like in the array tests?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to