alamb commented on code in PR #23035:
URL: https://github.com/apache/datafusion/pull/23035#discussion_r3481847817


##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -29,113 +29,108 @@ use std::hash::{Hash, Hasher};
 use super::result::build_in_list_result;
 use super::static_filter::{StaticFilter, handle_dictionary};
 
-/// Bitmap filter for O(1) set membership via single bit test.
-///
-/// `UInt8` has only 256 possible values, so the filter stores membership in a
-/// 256-bit bitmap instead of using a hash table.
-pub(super) struct UInt8BitmapFilter {
-    null_count: usize,
-    bits: [u64; 4],
+pub(super) trait BitmapStorage: Send + Sync {
+    fn new_zeroed() -> Self;
+    fn set_bit(&mut self, index: usize);
+    fn get_bit(&self, index: usize) -> bool;
 }
 
-impl UInt8BitmapFilter {
-    pub(super) fn try_new(in_array: &ArrayRef) -> Result<Self> {
-        let prim_array = 
in_array.as_primitive_opt::<UInt8Type>().ok_or_else(|| {
-            exec_datafusion_err!("UInt8BitmapFilter: expected UInt8 array")
-        })?;
-        let mut bits = [0u64; 4];
-        let mut set_bit = |v: u8| {
-            let index = usize::from(v);
-            bits[index / 64] |= 1u64 << (index % 64);
-        };
-
-        let values = prim_array.values();
-        match prim_array.nulls() {
-            None => {
-                for &v in values {
-                    set_bit(v);
-                }
-            }
-            Some(nulls) => {
-                for i in
-                    BitIndexIterator::new(nulls.validity(), nulls.offset(), 
nulls.len())
-                {
-                    set_bit(values[i]);
-                }
-            }
-        }
-        Ok(Self {
-            null_count: prim_array.null_count(),
-            bits,
-        })
+impl BitmapStorage for [u64; 4] {
+    #[inline]
+    fn new_zeroed() -> Self {
+        [0u64; 4]
+    }
+    #[inline]
+    fn set_bit(&mut self, index: usize) {
+        self[index / 64] |= 1u64 << (index % 64);
     }
+    #[inline(always)]
+    fn get_bit(&self, index: usize) -> bool {
+        (self[index / 64] >> (index % 64)) & 1 != 0
+    }
+}
 
+impl BitmapStorage for Box<[u64; 1024]> {
+    #[inline]
+    fn new_zeroed() -> Self {
+        Box::new([0u64; 1024])
+    }
+    #[inline]
+    fn set_bit(&mut self, index: usize) {
+        self[index / 64] |= 1u64 << (index % 64);
+    }
     #[inline(always)]
-    fn check(&self, needle: u8) -> bool {
-        let index = needle as usize;
-        (self.bits[index / 64] >> (index % 64)) & 1 != 0
+    fn get_bit(&self, index: usize) -> bool {
+        (self[index / 64] >> (index % 64)) & 1 != 0
     }
 }
 
-impl StaticFilter for UInt8BitmapFilter {
-    fn null_count(&self) -> usize {
-        self.null_count
+pub(super) trait BitmapFilterConfig: Send + Sync + 'static {

Review Comment:
   a few comments explaining what this trait is and why it is needed, I think 
would help future readers (it looks like a package of other types to make the 
generics on bitmap implementations simpler)



##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -29,113 +29,108 @@ use std::hash::{Hash, Hasher};
 use super::result::build_in_list_result;
 use super::static_filter::{StaticFilter, handle_dictionary};
 
-/// Bitmap filter for O(1) set membership via single bit test.
-///
-/// `UInt8` has only 256 possible values, so the filter stores membership in a
-/// 256-bit bitmap instead of using a hash table.
-pub(super) struct UInt8BitmapFilter {
-    null_count: usize,
-    bits: [u64; 4],
+pub(super) trait BitmapStorage: Send + Sync {

Review Comment:
   Recommend a few comments explainng this trait, to help future readers
   



##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -29,113 +29,108 @@ use std::hash::{Hash, Hasher};
 use super::result::build_in_list_result;
 use super::static_filter::{StaticFilter, handle_dictionary};
 
-/// Bitmap filter for O(1) set membership via single bit test.
-///
-/// `UInt8` has only 256 possible values, so the filter stores membership in a
-/// 256-bit bitmap instead of using a hash table.
-pub(super) struct UInt8BitmapFilter {
-    null_count: usize,
-    bits: [u64; 4],
+pub(super) trait BitmapStorage: Send + Sync {
+    fn new_zeroed() -> Self;
+    fn set_bit(&mut self, index: usize);
+    fn get_bit(&self, index: usize) -> bool;
 }
 
-impl UInt8BitmapFilter {
-    pub(super) fn try_new(in_array: &ArrayRef) -> Result<Self> {
-        let prim_array = 
in_array.as_primitive_opt::<UInt8Type>().ok_or_else(|| {
-            exec_datafusion_err!("UInt8BitmapFilter: expected UInt8 array")
-        })?;
-        let mut bits = [0u64; 4];
-        let mut set_bit = |v: u8| {
-            let index = usize::from(v);
-            bits[index / 64] |= 1u64 << (index % 64);
-        };
-
-        let values = prim_array.values();
-        match prim_array.nulls() {
-            None => {
-                for &v in values {
-                    set_bit(v);
-                }
-            }
-            Some(nulls) => {
-                for i in
-                    BitIndexIterator::new(nulls.validity(), nulls.offset(), 
nulls.len())
-                {
-                    set_bit(values[i]);
-                }
-            }
-        }
-        Ok(Self {
-            null_count: prim_array.null_count(),
-            bits,
-        })
+impl BitmapStorage for [u64; 4] {

Review Comment:
   > / `UInt8` has only 256 possible values, so the filter stores membership in 
a
   /// 256-bit bitmap instead of using a hash table.
   
   This was nice rationale information that I think would be nice to add to 
comments



##########
datafusion/physical-expr/src/expressions/in_list/primitive_filter.rs:
##########
@@ -29,113 +29,108 @@ use std::hash::{Hash, Hasher};
 use super::result::build_in_list_result;
 use super::static_filter::{StaticFilter, handle_dictionary};
 
-/// Bitmap filter for O(1) set membership via single bit test.
-///
-/// `UInt8` has only 256 possible values, so the filter stores membership in a
-/// 256-bit bitmap instead of using a hash table.
-pub(super) struct UInt8BitmapFilter {
-    null_count: usize,
-    bits: [u64; 4],
+pub(super) trait BitmapStorage: Send + Sync {
+    fn new_zeroed() -> Self;
+    fn set_bit(&mut self, index: usize);
+    fn get_bit(&self, index: usize) -> bool;
 }
 
-impl UInt8BitmapFilter {
-    pub(super) fn try_new(in_array: &ArrayRef) -> Result<Self> {
-        let prim_array = 
in_array.as_primitive_opt::<UInt8Type>().ok_or_else(|| {
-            exec_datafusion_err!("UInt8BitmapFilter: expected UInt8 array")
-        })?;
-        let mut bits = [0u64; 4];
-        let mut set_bit = |v: u8| {
-            let index = usize::from(v);
-            bits[index / 64] |= 1u64 << (index % 64);
-        };
-
-        let values = prim_array.values();
-        match prim_array.nulls() {
-            None => {
-                for &v in values {
-                    set_bit(v);
-                }
-            }
-            Some(nulls) => {
-                for i in
-                    BitIndexIterator::new(nulls.validity(), nulls.offset(), 
nulls.len())
-                {
-                    set_bit(values[i]);
-                }
-            }
-        }
-        Ok(Self {
-            null_count: prim_array.null_count(),
-            bits,
-        })
+impl BitmapStorage for [u64; 4] {
+    #[inline]
+    fn new_zeroed() -> Self {
+        [0u64; 4]
+    }
+    #[inline]
+    fn set_bit(&mut self, index: usize) {
+        self[index / 64] |= 1u64 << (index % 64);
     }
+    #[inline(always)]
+    fn get_bit(&self, index: usize) -> bool {
+        (self[index / 64] >> (index % 64)) & 1 != 0
+    }
+}
 
+impl BitmapStorage for Box<[u64; 1024]> {

Review Comment:
   ditto here for comments about using a 8k bitmap rather than hahstable



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to