This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch bloom_cross_lang
in repository https://gitbox.apache.org/repos/asf/datasketches-java.git

commit 20b7666f7c5470d8fde35b2a6915c0453fa317ad
Author: jmalkin <[email protected]>
AuthorDate: Wed Aug 14 00:24:30 2024 -0700

    add cross-language bloom filter tests and fix typos in main BloomFilter.java
---
 .../filters/bloomfilter/BloomFilter.java           | 20 +++---
 .../bloomfilter/BloomFilterCrossLanguageTest.java  | 83 ++++++++++++++++++++++
 2 files changed, 93 insertions(+), 10 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java 
b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
index eafe8834..c03216bf 100644
--- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
+++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java
@@ -381,7 +381,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided long and
-   * returns the result from quering that value prior to the update.
+   * returns the result from querying that value prior to the update.
    * @param item an item with which to update the filter
    * @return The query result prior to applying the update
    */
@@ -393,7 +393,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided double and
-   * returns the result from quering that value prior to the update.
+   * returns the result from querying that value prior to the update.
    * The double is canonicalized (NaN and +/- infinity) in the call.
    * @param item an item with which to update the filter
    * @return The query result prior to applying the update
@@ -408,7 +408,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided String and
-   * returns the result from quering that value prior to the update.
+   * returns the result from querying that value prior to the update.
    * The string is converted to a byte array using UTF8 encoding.
    *
    * <p>Note: this will not produce the same output hash values as the {@link 
#queryAndUpdate(char[])}
@@ -428,7 +428,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided byte[] and
-   * returns the result from quering that array prior to the update.
+   * returns the result from querying that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data 
is null
    */
@@ -440,7 +440,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided char[] and
-   * returns the result from quering that array prior to the update.
+   * returns the result from querying that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data 
is null
    */
@@ -453,7 +453,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided short[] and
-   * returns the result from quering that array prior to the update.
+   * returns the result from querying that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data 
is null
    */
@@ -466,7 +466,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided int[] and
-   * returns the result from quering that array prior to the update.
+   * returns the result from querying that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data 
is null
    */
@@ -479,7 +479,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided long[] and
-   * returns the result from quering that array prior to the update.
+   * returns the result from querying that array prior to the update.
    * @param data an array with which to update the filter
    * @return The query result prior to applying the update, or false if data 
is null
    */
@@ -492,7 +492,7 @@ public final class BloomFilter {
 
   /**
    * Updates the filter with the provided Memory and
-   * returns the result from quering that Memory prior to the update.
+   * returns the result from querying that Memory prior to the update.
    * @param mem an array with which to update the filter
    * @return The query result prior to applying the update, or false if mem is 
null
    */
@@ -762,7 +762,7 @@ public final class BloomFilter {
  *  3   
||---------------------------------NumBitsSet------------------------------------|
  *  </pre>
  *
- * The raw BitArray bits, if non-empty start at byte 24.
+ * The raw BitArray bits, if non-empty start at byte 32.
  */
 
   /**
diff --git 
a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java
 
b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java
new file mode 100644
index 00000000..22ad7c88
--- /dev/null
+++ 
b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java
@@ -0,0 +1,83 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+package org.apache.datasketches.filters.bloomfilter;
+
+import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES;
+import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES;
+import static org.apache.datasketches.common.TestUtil.cppPath;
+import static org.apache.datasketches.common.TestUtil.javaPath;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.file.Files;
+
+import org.apache.datasketches.memory.Memory;
+import org.testng.annotations.Test;
+
+/**
+* Serialize binary filters to be tested by C++ code.
+* Test deserialization of binary filters serialized by C++ code.
+*/
+public class BloomFilterCrossLanguageTest {
+
+  @Test(groups = {GENERATE_JAVA_FILES})
+  public void generateBloomFilterBinariesForCompatibilityTesting() throws 
IOException {
+    final int[] nArr = {0, 10000, 2000000, 30000000};
+    final int[] hArr = {3, 5};
+    for (int n : nArr) {
+      for (int numHashes : hArr) {
+        long configBits = Math.max(n, 1000); // so empty still has valid bit 
size
+        final BloomFilter bf = BloomFilterBuilder.createBySize(configBits, 
numHashes);
+        for (int i = 0; i < n / 10; ++i) {
+          bf.update(i);
+        }
+        if (n > 0) {
+          bf.update(Double.NaN);
+        }
+        assertEquals(bf.isEmpty(), (n == 0));
+        assertTrue(bf.isEmpty() || (bf.getBitsUsed() > n / 10));
+        Files.newOutputStream(javaPath.resolve("bf_n" + n + "_h" + numHashes + 
"_java.sk")).write(bf.toByteArray());
+      }
+    }
+  }
+
+  @Test(groups = {CHECK_CPP_FILES})
+  public void checkBloomFilterCppBinaries() throws IOException {
+    final int[] nArr = {0, 10000, 2000000, 30000000};
+    final int[] hArr = {3, 5};
+    for (int n : nArr) {
+      for (int numHashes : hArr) {
+        final byte[] bytes = Files.readAllBytes(cppPath.resolve("bf_n" + n + 
"_h" + numHashes + "_cpp.sk"));
+        final BloomFilter bf = BloomFilter.wrap(Memory.wrap(bytes));
+
+        assertEquals(bf.isEmpty(), (n == 0));
+        assertTrue(bf.isEmpty() || (bf.getBitsUsed() > n / 10));
+
+        for (int i = 0; i < n / 10; ++i) {
+          assertTrue(bf.query(i));
+        }
+        if (n > 0) {
+          assertTrue(bf.query(Double.NaN));
+        }
+      }
+    }
+  }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to