This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch bloom_cross_lang in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 20b7666f7c5470d8fde35b2a6915c0453fa317ad Author: jmalkin <[email protected]> AuthorDate: Wed Aug 14 00:24:30 2024 -0700 add cross-language bloom filter tests and fix typos in main BloomFilter.java --- .../filters/bloomfilter/BloomFilter.java | 20 +++--- .../bloomfilter/BloomFilterCrossLanguageTest.java | 83 ++++++++++++++++++++++ 2 files changed, 93 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index eafe8834..c03216bf 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -381,7 +381,7 @@ public final class BloomFilter { /** * Updates the filter with the provided long and - * returns the result from quering that value prior to the update. + * returns the result from querying that value prior to the update. * @param item an item with which to update the filter * @return The query result prior to applying the update */ @@ -393,7 +393,7 @@ public final class BloomFilter { /** * Updates the filter with the provided double and - * returns the result from quering that value prior to the update. + * returns the result from querying that value prior to the update. * The double is canonicalized (NaN and +/- infinity) in the call. * @param item an item with which to update the filter * @return The query result prior to applying the update @@ -408,7 +408,7 @@ public final class BloomFilter { /** * Updates the filter with the provided String and - * returns the result from quering that value prior to the update. + * returns the result from querying that value prior to the update. * The string is converted to a byte array using UTF8 encoding. * * <p>Note: this will not produce the same output hash values as the {@link #queryAndUpdate(char[])} @@ -428,7 +428,7 @@ public final class BloomFilter { /** * Updates the filter with the provided byte[] and - * returns the result from quering that array prior to the update. + * returns the result from querying that array prior to the update. * @param data an array with which to update the filter * @return The query result prior to applying the update, or false if data is null */ @@ -440,7 +440,7 @@ public final class BloomFilter { /** * Updates the filter with the provided char[] and - * returns the result from quering that array prior to the update. + * returns the result from querying that array prior to the update. * @param data an array with which to update the filter * @return The query result prior to applying the update, or false if data is null */ @@ -453,7 +453,7 @@ public final class BloomFilter { /** * Updates the filter with the provided short[] and - * returns the result from quering that array prior to the update. + * returns the result from querying that array prior to the update. * @param data an array with which to update the filter * @return The query result prior to applying the update, or false if data is null */ @@ -466,7 +466,7 @@ public final class BloomFilter { /** * Updates the filter with the provided int[] and - * returns the result from quering that array prior to the update. + * returns the result from querying that array prior to the update. * @param data an array with which to update the filter * @return The query result prior to applying the update, or false if data is null */ @@ -479,7 +479,7 @@ public final class BloomFilter { /** * Updates the filter with the provided long[] and - * returns the result from quering that array prior to the update. + * returns the result from querying that array prior to the update. * @param data an array with which to update the filter * @return The query result prior to applying the update, or false if data is null */ @@ -492,7 +492,7 @@ public final class BloomFilter { /** * Updates the filter with the provided Memory and - * returns the result from quering that Memory prior to the update. + * returns the result from querying that Memory prior to the update. * @param mem an array with which to update the filter * @return The query result prior to applying the update, or false if mem is null */ @@ -762,7 +762,7 @@ public final class BloomFilter { * 3 ||---------------------------------NumBitsSet------------------------------------| * </pre> * - * The raw BitArray bits, if non-empty start at byte 24. + * The raw BitArray bits, if non-empty start at byte 32. */ /** diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java new file mode 100644 index 00000000..22ad7c88 --- /dev/null +++ b/src/test/java/org/apache/datasketches/filters/bloomfilter/BloomFilterCrossLanguageTest.java @@ -0,0 +1,83 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +*/ + +package org.apache.datasketches.filters.bloomfilter; + +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; +import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; +import static org.apache.datasketches.common.TestUtil.javaPath; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import java.io.IOException; +import java.nio.file.Files; + +import org.apache.datasketches.memory.Memory; +import org.testng.annotations.Test; + +/** +* Serialize binary filters to be tested by C++ code. +* Test deserialization of binary filters serialized by C++ code. +*/ +public class BloomFilterCrossLanguageTest { + + @Test(groups = {GENERATE_JAVA_FILES}) + public void generateBloomFilterBinariesForCompatibilityTesting() throws IOException { + final int[] nArr = {0, 10000, 2000000, 30000000}; + final int[] hArr = {3, 5}; + for (int n : nArr) { + for (int numHashes : hArr) { + long configBits = Math.max(n, 1000); // so empty still has valid bit size + final BloomFilter bf = BloomFilterBuilder.createBySize(configBits, numHashes); + for (int i = 0; i < n / 10; ++i) { + bf.update(i); + } + if (n > 0) { + bf.update(Double.NaN); + } + assertEquals(bf.isEmpty(), (n == 0)); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > n / 10)); + Files.newOutputStream(javaPath.resolve("bf_n" + n + "_h" + numHashes + "_java.sk")).write(bf.toByteArray()); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void checkBloomFilterCppBinaries() throws IOException { + final int[] nArr = {0, 10000, 2000000, 30000000}; + final int[] hArr = {3, 5}; + for (int n : nArr) { + for (int numHashes : hArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("bf_n" + n + "_h" + numHashes + "_cpp.sk")); + final BloomFilter bf = BloomFilter.wrap(Memory.wrap(bytes)); + + assertEquals(bf.isEmpty(), (n == 0)); + assertTrue(bf.isEmpty() || (bf.getBitsUsed() > n / 10)); + + for (int i = 0; i < n / 10; ++i) { + assertTrue(bf.query(i)); + } + if (n > 0) { + assertTrue(bf.query(Double.NaN)); + } + } + } + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
