pitrou commented on code in PR #35814:
URL: https://github.com/apache/arrow/pull/35814#discussion_r1210419480


##########
cpp/src/arrow/util/hashing_test.cc:
##########
@@ -486,5 +488,170 @@ TEST(BinaryMemoTable, Empty) {
   EXPECT_EQ(offsets[0], 0);
 }
 
+hash_t HashDataBitmap(const ArraySpan& array) {
+  EXPECT_EQ(array.type->id(), Type::BOOL);
+  const auto& bitmap = array.buffers[1];
+  return ComputeBitmapHash(bitmap.data, bitmap.size,
+                           /*seed=*/0,
+                           /*bit_offset=*/array.offset,
+                           /*num_bits=*/array.length);
+}
+
+std::shared_ptr<BooleanArray> BuildBooleanArray(int len, bool start) {
+  // This could be memoized in the future to speed up tests.
+  BooleanBuilder builder;
+  for (int i = 0; i < len; ++i) {
+    EXPECT_TRUE(builder.Append(((i % 2) ^ start) == 1).ok());
+  }
+  std::shared_ptr<BooleanArray> array;
+  EXPECT_TRUE(builder.Finish(&array).ok());
+  return array;
+}
+
+hash_t HashConcatenation(const ArrayVector& arrays, int64_t bits_offset = -1,
+                         int64_t num_bits = -1) {
+  EXPECT_OK_AND_ASSIGN(auto concat, Concatenate(arrays));
+  EXPECT_EQ(concat->type()->id(), Type::BOOL);
+  if (bits_offset == -1 || num_bits == -1) {
+    return HashDataBitmap(*concat->data());
+  }
+  auto slice = concat->Slice(bits_offset, num_bits);
+  return HashDataBitmap(*slice->data());
+}
+
+TEST(SmallBitmapHash, Empty) {
+  for (bool start : {false, true}) {
+    auto block = BuildBooleanArray(64, start);
+    for (int len = 0; len < 64; len++) {
+      auto prefix = BuildBooleanArray(len, start);
+      auto expected_hash = HashDataBitmap(*prefix->data());
+
+      auto slice = block->Slice(0, len);
+      auto slice_hash = HashDataBitmap(*slice->data());
+      ASSERT_EQ(expected_hash, slice_hash);
+
+      for (int j = 1; j < len; j++) {
+        auto fragment = BuildBooleanArray(len - j, start ^ (j % 2));
+        expected_hash = HashDataBitmap(*fragment->data());
+
+        slice = block->Slice(j, len - j);
+        slice_hash = HashDataBitmap(*slice->data());
+        ASSERT_EQ(expected_hash, slice_hash);
+      }
+    }
+  }
+}
+
+TEST(TestBitmapHash, Empty) {
+  BooleanBuilder builder;
+  std::shared_ptr<BooleanArray> block_of_bools;
+  {
+    ASSERT_OK(builder.AppendValues(2, true));
+    ASSERT_OK(builder.AppendValues(3, false));
+    ASSERT_OK(builder.AppendValues(5, true));
+    ASSERT_OK(builder.AppendValues(7, false));
+    ASSERT_OK(builder.AppendValues(11, true));
+    ASSERT_OK(builder.AppendValues(13, false));
+    ASSERT_OK(builder.AppendValues(17, true));
+    ASSERT_OK(builder.AppendValues(5, false));
+    ASSERT_OK(builder.AppendValues(1, true));
+    ASSERT_OK(builder.Finish(&block_of_bools));
+    ASSERT_EQ(block_of_bools->length(), 64);
+  }
+  const auto hash_of_block = HashDataBitmap(*block_of_bools->data());
+
+  std::shared_ptr<BooleanArray> negated_block_of_bools;
+  {
+    ASSERT_OK(builder.AppendValues(2, false));
+    ASSERT_OK(builder.AppendValues(3, true));
+    ASSERT_OK(builder.AppendValues(5, false));
+    ASSERT_OK(builder.AppendValues(7, true));
+    ASSERT_OK(builder.AppendValues(11, false));
+    ASSERT_OK(builder.AppendValues(13, true));
+    ASSERT_OK(builder.AppendValues(17, false));
+    ASSERT_OK(builder.AppendValues(5, true));
+    ASSERT_OK(builder.AppendValues(1, false));
+    ASSERT_OK(builder.Finish(&negated_block_of_bools));
+    ASSERT_EQ(negated_block_of_bools->length(), 64);
+  }

Review Comment:
   You can take a look at `InvertBitmap`. More generally, I don't think you 
need to generate a full-blown boolean array. You can instead use 
`TypedBufferBuilder<bool>`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to