pitrou commented on code in PR #46229:
URL: https://github.com/apache/arrow/pull/46229#discussion_r2176542746


##########
cpp/src/arrow/array/array_binary_test.cc:
##########
@@ -1007,11 +1013,97 @@ class TestBaseBinaryDataVisitor : public 
::testing::Test {
  protected:
   std::shared_ptr<DataType> type_;
 };
-
 TYPED_TEST_SUITE(TestBaseBinaryDataVisitor, 
BaseBinaryOrBinaryViewLikeArrowTypes);
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Basics) { this->TestBasics(); }
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Sliced) { this->TestSliced(); }
 
+template <typename Type>
+class TestCompactArray : public ::testing::Test {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ public:
+  void SetUp() override { type_ = TypeTraits<Type>::type_singleton(); }
+  void TestType() {
+    auto array = internal::checked_pointer_cast<ArrayType>(
+        ArrayFromJSON(type_, R"(["a","b","c"])"));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->type(), type_);
+  }
+
+  void TestBuffers() {
+    // Check Empty state
+    ASSERT_OK_AND_ASSIGN(auto array, MakeBinaryViewArray<Type>({}, {}));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    auto buffer_a = Buffer::FromString(std::string(1 << 5, 'a'));

Review Comment:
   Can this be less cryptic? `1 << 5` can just be written `32`.



##########
cpp/src/arrow/array/array_binary.cc:
##########
@@ -104,7 +105,14 @@ BinaryViewArray::BinaryViewArray(std::shared_ptr<DataType> 
type, int64_t length,
   SetData(
       ArrayData::Make(std::move(type), length, std::move(buffers), null_count, 
offset));
 }
-
+Result<std::shared_ptr<Array>> BinaryViewArray::CompactArray(MemoryPool* pool) 
const {
+  std::unique_ptr<ArrayBuilder> builder;
+  ARROW_RETURN_NOT_OK(MakeBuilder(pool, type(), &builder));
+  ArraySpan span(*data());
+  // ArraySpan handles offset
+  ARROW_RETURN_NOT_OK(builder->AppendArraySlice(span, 0, span.length));

Review Comment:
   Hmm, ok, but this will always copy the data even if it is already compact? 
Is there a way to avoid that by looking at the buffers directly?



##########
cpp/src/arrow/array/array_binary_test.cc:
##########
@@ -1007,11 +1013,97 @@ class TestBaseBinaryDataVisitor : public 
::testing::Test {
  protected:
   std::shared_ptr<DataType> type_;
 };
-
 TYPED_TEST_SUITE(TestBaseBinaryDataVisitor, 
BaseBinaryOrBinaryViewLikeArrowTypes);
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Basics) { this->TestBasics(); }
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Sliced) { this->TestSliced(); }
 
+template <typename Type>
+class TestCompactArray : public ::testing::Test {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ public:
+  void SetUp() override { type_ = TypeTraits<Type>::type_singleton(); }
+  void TestType() {

Review Comment:
   Nit: can you leave empty lines between methods?
   ```suggestion
   
     void TestType() {
   ```



##########
cpp/src/arrow/array/array_binary_test.cc:
##########
@@ -1007,11 +1013,97 @@ class TestBaseBinaryDataVisitor : public 
::testing::Test {
  protected:
   std::shared_ptr<DataType> type_;
 };
-
 TYPED_TEST_SUITE(TestBaseBinaryDataVisitor, 
BaseBinaryOrBinaryViewLikeArrowTypes);
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Basics) { this->TestBasics(); }
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Sliced) { this->TestSliced(); }
 
+template <typename Type>
+class TestCompactArray : public ::testing::Test {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ public:
+  void SetUp() override { type_ = TypeTraits<Type>::type_singleton(); }
+  void TestType() {
+    auto array = internal::checked_pointer_cast<ArrayType>(
+        ArrayFromJSON(type_, R"(["a","b","c"])"));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->type(), type_);
+  }
+
+  void TestBuffers() {
+    // Check Empty state
+    ASSERT_OK_AND_ASSIGN(auto array, MakeBinaryViewArray<Type>({}, {}));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    auto buffer_a = Buffer::FromString(std::string(1 << 5, 'a'));
+    auto buffer_b = Buffer::FromString(std::string(1 << 5, 'b'));
+
+    // Check the state of no elements in View Buffer
+    ASSERT_OK_AND_ASSIGN(array, MakeBinaryViewArray<Type>({buffer_a, 
buffer_b}, {}));
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    // Check the state of no reference to data buffers from view elements
+    ASSERT_OK_AND_ASSIGN(
+        array, MakeBinaryViewArray<Type>({buffer_a, buffer_b},
+                                         {
+                                             util::ToInlineBinaryView("inlined 
1"),
+                                             util::ToInlineBinaryView("inlined 
2"),
+                                         }))
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    // Check the state of there is one reference to whole data of one data 
buffer
+    ASSERT_OK_AND_ASSIGN(
+        array,
+        MakeBinaryViewArray<Type>(
+            {buffer_a, buffer_b},
+            {
+                util::ToBinaryView("bbbb", 
static_cast<int32_t>(buffer_b->size()), 1, 0),
+                util::ToInlineBinaryView("inlined 1"),
+            }));
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 3);
+    AssertArraysEqual(*array, *compacted_array);
+
+    // Check the state of there is one reference to partial data of one data 
buffer
+    ASSERT_OK_AND_ASSIGN(
+        array, MakeBinaryViewArray<Type>(
+                   {buffer_a, buffer_b},
+                   {
+                       util::ToBinaryView(
+                           "bbbb", static_cast<int32_t>(buffer_b->size() >> 
1), 1, 16),

Review Comment:
   `static_cast<int32_t>(buffer_b->size() >> 1)` is pointlessly cryptic, how 
about 16 or something?



##########
cpp/src/arrow/array/array_binary_test.cc:
##########
@@ -1007,11 +1013,97 @@ class TestBaseBinaryDataVisitor : public 
::testing::Test {
  protected:
   std::shared_ptr<DataType> type_;
 };
-
 TYPED_TEST_SUITE(TestBaseBinaryDataVisitor, 
BaseBinaryOrBinaryViewLikeArrowTypes);
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Basics) { this->TestBasics(); }
 
 TYPED_TEST(TestBaseBinaryDataVisitor, Sliced) { this->TestSliced(); }
 
+template <typename Type>
+class TestCompactArray : public ::testing::Test {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ public:
+  void SetUp() override { type_ = TypeTraits<Type>::type_singleton(); }
+  void TestType() {
+    auto array = internal::checked_pointer_cast<ArrayType>(
+        ArrayFromJSON(type_, R"(["a","b","c"])"));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->type(), type_);
+  }
+
+  void TestBuffers() {
+    // Check Empty state
+    ASSERT_OK_AND_ASSIGN(auto array, MakeBinaryViewArray<Type>({}, {}));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    auto buffer_a = Buffer::FromString(std::string(1 << 5, 'a'));
+    auto buffer_b = Buffer::FromString(std::string(1 << 5, 'b'));
+
+    // Check the state of no elements in View Buffer
+    ASSERT_OK_AND_ASSIGN(array, MakeBinaryViewArray<Type>({buffer_a, 
buffer_b}, {}));
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    // Check the state of no reference to data buffers from view elements
+    ASSERT_OK_AND_ASSIGN(
+        array, MakeBinaryViewArray<Type>({buffer_a, buffer_b},
+                                         {
+                                             util::ToInlineBinaryView("inlined 
1"),
+                                             util::ToInlineBinaryView("inlined 
2"),
+                                         }))
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 2);
+    AssertArraysEqual(*array, *compacted_array);
+
+    // Check the state of there is one reference to whole data of one data 
buffer
+    ASSERT_OK_AND_ASSIGN(
+        array,
+        MakeBinaryViewArray<Type>(
+            {buffer_a, buffer_b},
+            {
+                util::ToBinaryView("bbbb", 
static_cast<int32_t>(buffer_b->size()), 1, 0),
+                util::ToInlineBinaryView("inlined 1"),
+            }));
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 3);
+    AssertArraysEqual(*array, *compacted_array);
+
+    // Check the state of there is one reference to partial data of one data 
buffer
+    ASSERT_OK_AND_ASSIGN(
+        array, MakeBinaryViewArray<Type>(
+                   {buffer_a, buffer_b},
+                   {
+                       util::ToBinaryView(
+                           "bbbb", static_cast<int32_t>(buffer_b->size() >> 
1), 1, 16),
+                       util::ToInlineBinaryView("inlined 1"),
+                   }));
+    ASSERT_OK_AND_ASSIGN(compacted_array, array->CompactArray());
+    ASSERT_EQ(compacted_array->data()->buffers.size(), 3);
+    ASSERT_EQ(compacted_array->data()->buffers[2]->size(), 16);
+    AssertArraysEqual(*array, *compacted_array);
+  }
+  void TestSliced() {
+    auto array = internal::checked_pointer_cast<ArrayType>(
+        ArrayFromJSON(type_, R"(["a","b","c"])"));
+    auto sliced_array = 
internal::checked_pointer_cast<ArrayType>(array->Slice(1, 2));
+    ASSERT_OK_AND_ASSIGN(auto compacted_array, sliced_array->CompactArray());
+    AssertArraysEqual(*sliced_array, *compacted_array);
+  }
+
+ private:
+  std::shared_ptr<DataType> type_;
+};
+
+// Should We move this to gtest_util.h

Review Comment:
   Please do!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to