viirya commented on code in PR #3284:
URL: https://github.com/apache/arrow-rs/pull/3284#discussion_r1043078196
##########
parquet/src/arrow/arrow_writer/byte_array.rs:
##########
@@ -555,6 +561,13 @@ where
}
}
+ // encode the values into bloom filter if enabled
+ if let Some(bloom_filter) = &mut encoder.bloom_filter {
+ for idx in 0..values.len() {
+ bloom_filter.insert(&values.value(idx));
Review Comment:
Skipped null slots now.
##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -1528,6 +1594,33 @@ mod tests {
values_required::<BinaryArray, _>(many_vecs_iter);
}
+ #[test]
+ fn i32_column_bloom_filter() {
+ let positive_values: Vec<i32> = (0..SMALL_SIZE as i32).collect();
+ let files = values_required::<Int32Array, _>(positive_values);
+ check_bloom_filter(
+ files,
+ "col".to_string(),
+ (0..SMALL_SIZE as i32).collect(),
+ (SMALL_SIZE as i32 + 1..SMALL_SIZE as i32 + 10).collect(),
+ );
+ }
+
+ #[test]
+ fn binary_column_bloom_filter() {
Review Comment:
Added one test for that.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]