ODukhno commented on code in PR #8762:
URL: https://github.com/apache/arrow-rs/pull/8762#discussion_r2621241936


##########
parquet/src/bloom_filter/mod.rs:
##########
@@ -541,4 +599,49 @@ mod tests {
             assert_eq!(*num_bits, num_of_bits_from_ndv_fpp(*ndv, *fpp) as u64);
         }
     }
+
+    #[test]
+    fn test_sbbf_write_round_trip() {
+        // Create a bloom filter with a 32-byte bitset (minimum size)
+        let bitset_bytes = vec![0u8; 32];
+        let mut original = Sbbf::new(&bitset_bytes);
+
+        // Insert some test values
+        let test_values = ["hello", "world", "rust", "parquet", "bloom", 
"filter"];
+        for value in &test_values {
+            original.insert(value);
+        }
+
+        // Serialize to bytes
+        let mut output = Vec::new();
+        original.write(&mut output).unwrap();
+
+        // Validate header was written correctly
+        let mut protocol = ThriftSliceInputProtocol::new(&output);
+        let header = BloomFilterHeader::read_thrift(&mut protocol).unwrap();
+        assert_eq!(header.num_bytes, bitset_bytes.len() as i32);
+        assert_eq!(header.algorithm, BloomFilterAlgorithm::BLOCK);
+        assert_eq!(header.hash, BloomFilterHash::XXHASH);
+        assert_eq!(header.compression, BloomFilterCompression::UNCOMPRESSED);
+
+        // Deserialize using from_bytes
+        let reconstructed = Sbbf::from_bytes(&output).unwrap();
+
+        // Most importantly: verify the bloom filter WORKS correctly after 
round-trip
+        for value in &test_values {
+            assert!(
+                reconstructed.check(value),
+                "Value '{}' should be present after round-trip",
+                value
+            );
+        }
+
+        // Verify false negative check (values not inserted should not be 
found)
+        let missing_values = ["missing", "absent", "nothere"];
+        for value in &missing_values {
+            // Note: bloom filters can have false positives, but should never 
have false negatives
+            // So we can't assert !check(), but we should verify inserted 
values are found
+            let _ = reconstructed.check(value); // Just exercise the code path

Review Comment:
   Just pushed another change where I applied this one along with all other 
suggestions above.
   
   Thanks!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to