geoffreyclaude commented on code in PR #23035:
URL: https://github.com/apache/datafusion/pull/23035#discussion_r3483020046
##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -29,113 +29,108 @@ use std::hash::{Hash, Hasher};
use super::result::build_in_list_result;
use super::static_filter::{StaticFilter, handle_dictionary};
-/// Bitmap filter for O(1) set membership via single bit test.
-///
-/// `UInt8` has only 256 possible values, so the filter stores membership in a
-/// 256-bit bitmap instead of using a hash table.
-pub(super) struct UInt8BitmapFilter {
- null_count: usize,
- bits: [u64; 4],
+pub(super) trait BitmapStorage: Send + Sync {
+ fn new_zeroed() -> Self;
+ fn set_bit(&mut self, index: usize);
+ fn get_bit(&self, index: usize) -> bool;
}
-impl UInt8BitmapFilter {
- pub(super) fn try_new(in_array: &ArrayRef) -> Result<Self> {
- let prim_array =
in_array.as_primitive_opt::<UInt8Type>().ok_or_else(|| {
- exec_datafusion_err!("UInt8BitmapFilter: expected UInt8 array")
- })?;
- let mut bits = [0u64; 4];
- let mut set_bit = |v: u8| {
- let index = usize::from(v);
- bits[index / 64] |= 1u64 << (index % 64);
- };
-
- let values = prim_array.values();
- match prim_array.nulls() {
- None => {
- for &v in values {
- set_bit(v);
- }
- }
- Some(nulls) => {
- for i in
- BitIndexIterator::new(nulls.validity(), nulls.offset(),
nulls.len())
- {
- set_bit(values[i]);
- }
- }
- }
- Ok(Self {
- null_count: prim_array.null_count(),
- bits,
- })
+impl BitmapStorage for [u64; 4] {
+ #[inline]
+ fn new_zeroed() -> Self {
+ [0u64; 4]
+ }
+ #[inline]
+ fn set_bit(&mut self, index: usize) {
+ self[index / 64] |= 1u64 << (index % 64);
}
+ #[inline(always)]
+ fn get_bit(&self, index: usize) -> bool {
+ (self[index / 64] >> (index % 64)) & 1 != 0
+ }
+}
+impl BitmapStorage for Box<[u64; 1024]> {
+ #[inline]
+ fn new_zeroed() -> Self {
+ Box::new([0u64; 1024])
+ }
+ #[inline]
+ fn set_bit(&mut self, index: usize) {
+ self[index / 64] |= 1u64 << (index % 64);
+ }
#[inline(always)]
- fn check(&self, needle: u8) -> bool {
- let index = needle as usize;
- (self.bits[index / 64] >> (index % 64)) & 1 != 0
+ fn get_bit(&self, index: usize) -> bool {
+ (self[index / 64] >> (index % 64)) & 1 != 0
}
}
-impl StaticFilter for UInt8BitmapFilter {
- fn null_count(&self) -> usize {
- self.null_count
+pub(super) trait BitmapFilterConfig: Send + Sync + 'static {
Review Comment:
Still not perfect, but much better I'd say! The LOC diff of the PR dropped,
which is usually a good sign... I'm ok to merge as is 😃
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]