This is an automated email from the ASF dual-hosted git repository.

charlie pushed a commit to branch master
in repository 
https://gitbox.apache.org/repos/asf/datasketches-characterization.git


The following commit(s) were added to refs/heads/master by this push:
     new ff2096f  Added quotient filter update times and renamed bloom profile
ff2096f is described below

commit ff2096fb713200d1c22f5c8565e5118ecf82b69c
Author: Charlie Dickens <[email protected]>
AuthorDate: Wed Jun 5 13:43:49 2024 +0100

    Added quotient filter update times and renamed bloom profile
---
 ...file.java => BaseFilterUpdateSpeedProfile.java} |  6 ++--
 ...ile.java => BloomFilterUpdateSpeedProfile.java} | 14 +-------
 .../filters/QuotientFilterUpdateSpeedProfile.java  | 41 ++++++++++++++++++++++
 ...peedJob.conf => BloomFilterUpdateSpeedJob.conf} | 23 +++++-------
 ...dJob.conf => QuotientFilterUpdateSpeedJob.conf} | 29 +++++++--------
 5 files changed, 66 insertions(+), 47 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java
similarity index 96%
rename from 
src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
rename to 
src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java
index add495c..c017385 100644
--- 
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java
@@ -8,10 +8,10 @@ import static java.lang.Math.log;
 import static java.lang.Math.pow;
 import static org.apache.datasketches.common.Util.pwr2SeriesNext;
 
-public abstract class BaseFilterSpeedProfile implements JobProfile {
+public abstract class BaseFilterUpdateSpeedProfile implements JobProfile {
     Job job;
     public Properties prop;
-    public long vIn = 0;
+    public long vIn = 1;
     int lgMinT;
     int lgMaxT;
     int lgMinU;
@@ -72,7 +72,7 @@ public abstract class BaseFilterSpeedProfile implements 
JobProfile {
         int lastU = 0;
         final StringBuilder dataStr = new StringBuilder();
         job.println(getHeader());
-        while (lastU < maxU) { //Trials for each U point on X-axis, and one 
row on output
+        while (lastU < 0.9*maxU) { //Trials for each U point on X-axis, and 
one row on output
             final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO, 
lastU);
             lastU = nextU;
             final int trials = getNumTrials(nextU);
diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java
similarity index 62%
rename from 
src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
rename to 
src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java
index e49d884..b968cfd 100644
--- 
a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java
@@ -1,22 +1,11 @@
 package org.apache.datasketches.characterization.filters;
 
-import org.apache.datasketches.Properties;
-import org.apache.datasketches.characterization.ZipfDistribution;
-//import 
org.apache.datasketches.characterization.filters.BaseFilterSpeedProfile;
-import org.apache.datasketches.common.Family;
-import org.apache.datasketches.common.ResizeFactor;
 import org.apache.datasketches.filters.bloomfilter.BloomFilter;
-import org.apache.datasketches.frequencies.LongsSketch;
-import org.apache.datasketches.memory.Memory;
 import org.apache.datasketches.memory.WritableHandle;
 import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.theta.Sketch;
-import org.apache.datasketches.theta.UpdateSketch;
-import org.apache.datasketches.theta.UpdateSketchBuilder;
 import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder;
-import org.apache.datasketches.filters.bloomfilter.BloomFilter;
 
-public class BloomFilterSpeedProfile extends BaseFilterSpeedProfile{
+public class BloomFilterUpdateSpeedProfile extends 
BaseFilterUpdateSpeedProfile{
     protected BloomFilter sketch;
     private WritableHandle handle;
     private WritableMemory wmem;
@@ -24,7 +13,6 @@ public class BloomFilterSpeedProfile extends 
BaseFilterSpeedProfile{
     @Override
     public void configure() {
         //Configure Sketch
-        //final long numBits, final int numHashes, final long seed)
         final long numBits = Integer.parseInt(prop.mustGet("numBits"));
         final int numHashes = Integer.parseInt(prop.mustGet("numHashes"));
         sketch =  BloomFilterBuilder.createBySize(numBits, numHashes);
diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java
new file mode 100644
index 0000000..e73e6d0
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java
@@ -0,0 +1,41 @@
+package org.apache.datasketches.characterization.filters;
+
+import org.apache.datasketches.memory.WritableHandle;
+import org.apache.datasketches.memory.WritableMemory;
+
+
+import org.apache.datasketches.filters.quotientfilter.QuotientFilter;
+
+public class QuotientFilterUpdateSpeedProfile extends 
BaseFilterUpdateSpeedProfile{
+    protected QuotientFilter sketch;
+    protected int lgNumSlots ;
+    protected int numBitsPerSlot;
+    private WritableHandle handle;
+    private WritableMemory wmem;
+
+    @Override
+    public void configure() {
+        lgNumSlots = Integer.parseInt(prop.mustGet("lgNumSlots"));
+        numBitsPerSlot = Integer.parseInt(prop.mustGet("numBitsPerSlot"));
+    }
+
+    @Override
+    public void cleanup() {
+        try {
+            if (handle != null) { handle.close(); }
+        } catch (final Exception e) {}
+    }
+
+    @Override
+    public double doTrial(final int uPerTrial) {
+        //sketch.reset(); //is not implemented
+        sketch = new QuotientFilter(lgNumSlots, numBitsPerSlot);
+        final long startUpdateTime_nS = System.nanoTime();
+        for (int u = uPerTrial; u-- > 0;) {
+            sketch.insert(++vIn);
+        }
+        final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
+        return (double) updateTime_nS / uPerTrial;
+    }
+}
+
diff --git a/src/main/resources/filters/BloomFilterSpeedJob.conf 
b/src/main/resources/filters/BloomFilterUpdateSpeedJob.conf
similarity index 68%
copy from src/main/resources/filters/BloomFilterSpeedJob.conf
copy to src/main/resources/filters/BloomFilterUpdateSpeedJob.conf
index 0f0c8e5..a88b7dc 100644
--- a/src/main/resources/filters/BloomFilterSpeedJob.conf
+++ b/src/main/resources/filters/BloomFilterUpdateSpeedJob.conf
@@ -17,16 +17,9 @@
 
 # Job
 
-# The Bloom Filter Update Speed profile is evaluated by choosing a maximum 
input cardinality.
-# From this input cardinality N, and a target false positive probability Fpp, 
we evaluate
-# ```
-# final long numItems = 1L<<20;
-# final double targetFpp = 1E-6 ;
-# long numBits = BloomFilterBuilder.suggestNumFilterBits(numItems, targetFpp);
-# final short numHashes = BloomFilterBuilder.suggestNumHashes(numItems, 
numBits);
-# ```
-# and then use these parameters for the filter configuration at the bottom of 
this file.
-# Note that `final long numItems = 1L<<20` should correspond direclty with 
`Trials_lgMaxU`
+# The Bloom Filter Update Speed profile is evaluated by fixing a size in bits 
for the filter
+# and inserting items up to a maximum input cardinality N.
+# Note that `final long numItems = 1L<<20` should correspond directly with 
`Trials_lgMaxU`
 # in this configuration file.
 
 # Uniques Profile
@@ -35,8 +28,8 @@ Trials_lgMaxU=20 #How high the # uniques go 20
 Trials_UPPO=16   #The horizontal x-resolution of trials points
 
 # Trials Profile
-Trials_lgMaxT=10 #Max trials at start (low counts) 23
-Trials_lgMinT=4  #Min trials at tail (high counts) 4
+Trials_lgMaxT=12 #Max trials at start (low counts) 23
+Trials_lgMinT=6  #Min trials at tail (high counts) 4
 
 #Trails Speed related
 Trials_lgMinBpU=4   #start the downward slope of trials at this U 4
@@ -49,6 +42,6 @@ FileNameDateFormat=yyyyMMdd'_'HHmmssz
 ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
 
 #Job Profile
-JobProfile=org.apache.datasketches.characterization.filters.BloomFilterSpeedProfile
-numBits = 30151987
-numHashes = 20
+JobProfile=org.apache.datasketches.characterization.filters.BloomFilterUpdateSpeedProfile
+numBits = 16777216
+numHashes = 16
diff --git a/src/main/resources/filters/BloomFilterSpeedJob.conf 
b/src/main/resources/filters/QuotientFilterUpdateSpeedJob.conf
similarity index 61%
rename from src/main/resources/filters/BloomFilterSpeedJob.conf
rename to src/main/resources/filters/QuotientFilterUpdateSpeedJob.conf
index 0f0c8e5..b073077 100644
--- a/src/main/resources/filters/BloomFilterSpeedJob.conf
+++ b/src/main/resources/filters/QuotientFilterUpdateSpeedJob.conf
@@ -17,17 +17,14 @@
 
 # Job
 
-# The Bloom Filter Update Speed profile is evaluated by choosing a maximum 
input cardinality.
-# From this input cardinality N, and a target false positive probability Fpp, 
we evaluate
-# ```
-# final long numItems = 1L<<20;
-# final double targetFpp = 1E-6 ;
-# long numBits = BloomFilterBuilder.suggestNumFilterBits(numItems, targetFpp);
-# final short numHashes = BloomFilterBuilder.suggestNumHashes(numItems, 
numBits);
-# ```
-# and then use these parameters for the filter configuration at the bottom of 
this file.
-# Note that `final long numItems = 1L<<20` should correspond direclty with 
`Trials_lgMaxU`
-# in this configuration file.
+# We configure the Quotient Filter by fixing the input cardinality, N = 
alpha*2^j
+# for some parameter alpha.
+# The number of slots in the filter is set as lgNumSlots = j.
+# The number of bits per item, numBitsPerItem, is a parameter and then the 
total
+# number of bits in the filter is (1L<<lgNumSlots)*numBitsPerSlot.
+# This setting should yield a filter with a target load factor of alpha and a 
false positive
+# probability of approximately 1/2^(numBitsPerSlot-3).
+
 
 # Uniques Profile
 Trials_lgMinU=0  #The starting # of uniques that is printed at the end. 0
@@ -35,8 +32,8 @@ Trials_lgMaxU=20 #How high the # uniques go 20
 Trials_UPPO=16   #The horizontal x-resolution of trials points
 
 # Trials Profile
-Trials_lgMaxT=10 #Max trials at start (low counts) 23
-Trials_lgMinT=4  #Min trials at tail (high counts) 4
+Trials_lgMaxT=12 #Max trials at start (low counts) 23
+Trials_lgMinT=6  #Min trials at tail (high counts) 4
 
 #Trails Speed related
 Trials_lgMinBpU=4   #start the downward slope of trials at this U 4
@@ -49,6 +46,6 @@ FileNameDateFormat=yyyyMMdd'_'HHmmssz
 ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
 
 #Job Profile
-JobProfile=org.apache.datasketches.characterization.filters.BloomFilterSpeedProfile
-numBits = 30151987
-numHashes = 20
+JobProfile=org.apache.datasketches.characterization.filters.QuotientFilterUpdateSpeedProfile
+lgNumSlots = 20
+numBitsPerSlot = 16


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to