felipecrv commented on code in PR #35814:
URL: https://github.com/apache/arrow/pull/35814#discussion_r1210717160
##########
cpp/src/arrow/util/hashing_test.cc:
##########
@@ -486,5 +488,170 @@ TEST(BinaryMemoTable, Empty) {
EXPECT_EQ(offsets[0], 0);
}
+hash_t HashDataBitmap(const ArraySpan& array) {
+ EXPECT_EQ(array.type->id(), Type::BOOL);
+ const auto& bitmap = array.buffers[1];
+ return ComputeBitmapHash(bitmap.data, bitmap.size,
+ /*seed=*/0,
+ /*bit_offset=*/array.offset,
+ /*num_bits=*/array.length);
+}
+
+std::shared_ptr<BooleanArray> BuildBooleanArray(int len, bool start) {
+ // This could be memoized in the future to speed up tests.
+ BooleanBuilder builder;
+ for (int i = 0; i < len; ++i) {
+ EXPECT_TRUE(builder.Append(((i % 2) ^ start) == 1).ok());
+ }
+ std::shared_ptr<BooleanArray> array;
+ EXPECT_TRUE(builder.Finish(&array).ok());
+ return array;
+}
+
+hash_t HashConcatenation(const ArrayVector& arrays, int64_t bits_offset = -1,
+ int64_t num_bits = -1) {
+ EXPECT_OK_AND_ASSIGN(auto concat, Concatenate(arrays));
+ EXPECT_EQ(concat->type()->id(), Type::BOOL);
+ if (bits_offset == -1 || num_bits == -1) {
+ return HashDataBitmap(*concat->data());
+ }
+ auto slice = concat->Slice(bits_offset, num_bits);
+ return HashDataBitmap(*slice->data());
+}
+
+TEST(SmallBitmapHash, Empty) {
+ for (bool start : {false, true}) {
+ auto block = BuildBooleanArray(64, start);
+ for (int len = 0; len < 64; len++) {
+ auto prefix = BuildBooleanArray(len, start);
+ auto expected_hash = HashDataBitmap(*prefix->data());
+
+ auto slice = block->Slice(0, len);
+ auto slice_hash = HashDataBitmap(*slice->data());
+ ASSERT_EQ(expected_hash, slice_hash);
+
+ for (int j = 1; j < len; j++) {
+ auto fragment = BuildBooleanArray(len - j, start ^ (j % 2));
+ expected_hash = HashDataBitmap(*fragment->data());
+
+ slice = block->Slice(j, len - j);
+ slice_hash = HashDataBitmap(*slice->data());
+ ASSERT_EQ(expected_hash, slice_hash);
+ }
+ }
+ }
+}
+
+TEST(TestBitmapHash, Empty) {
+ BooleanBuilder builder;
+ std::shared_ptr<BooleanArray> block_of_bools;
+ {
+ ASSERT_OK(builder.AppendValues(2, true));
+ ASSERT_OK(builder.AppendValues(3, false));
+ ASSERT_OK(builder.AppendValues(5, true));
+ ASSERT_OK(builder.AppendValues(7, false));
+ ASSERT_OK(builder.AppendValues(11, true));
+ ASSERT_OK(builder.AppendValues(13, false));
+ ASSERT_OK(builder.AppendValues(17, true));
+ ASSERT_OK(builder.AppendValues(5, false));
+ ASSERT_OK(builder.AppendValues(1, true));
+ ASSERT_OK(builder.Finish(&block_of_bools));
+ ASSERT_EQ(block_of_bools->length(), 64);
+ }
+ const auto hash_of_block = HashDataBitmap(*block_of_bools->data());
+
+ std::shared_ptr<BooleanArray> negated_block_of_bools;
+ {
+ ASSERT_OK(builder.AppendValues(2, false));
+ ASSERT_OK(builder.AppendValues(3, true));
+ ASSERT_OK(builder.AppendValues(5, false));
+ ASSERT_OK(builder.AppendValues(7, true));
+ ASSERT_OK(builder.AppendValues(11, false));
+ ASSERT_OK(builder.AppendValues(13, true));
+ ASSERT_OK(builder.AppendValues(17, false));
+ ASSERT_OK(builder.AppendValues(5, true));
+ ASSERT_OK(builder.AppendValues(1, false));
+ ASSERT_OK(builder.Finish(&negated_block_of_bools));
+ ASSERT_EQ(negated_block_of_bools->length(), 64);
+ }
Review Comment:
I ended up just removing the negated block to simplify the tests.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]