This is an automated email from the ASF dual-hosted git repository.
charlie pushed a commit to branch master
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/master by this push:
new ff2096f Added quotient filter update times and renamed bloom profile
ff2096f is described below
commit ff2096fb713200d1c22f5c8565e5118ecf82b69c
Author: Charlie Dickens <[email protected]>
AuthorDate: Wed Jun 5 13:43:49 2024 +0100
Added quotient filter update times and renamed bloom profile
---
...file.java => BaseFilterUpdateSpeedProfile.java} | 6 ++--
...ile.java => BloomFilterUpdateSpeedProfile.java} | 14 +-------
.../filters/QuotientFilterUpdateSpeedProfile.java | 41 ++++++++++++++++++++++
...peedJob.conf => BloomFilterUpdateSpeedJob.conf} | 23 +++++-------
...dJob.conf => QuotientFilterUpdateSpeedJob.conf} | 29 +++++++--------
5 files changed, 66 insertions(+), 47 deletions(-)
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java
similarity index 96%
rename from
src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
rename to
src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java
index add495c..c017385 100644
---
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterUpdateSpeedProfile.java
@@ -8,10 +8,10 @@ import static java.lang.Math.log;
import static java.lang.Math.pow;
import static org.apache.datasketches.common.Util.pwr2SeriesNext;
-public abstract class BaseFilterSpeedProfile implements JobProfile {
+public abstract class BaseFilterUpdateSpeedProfile implements JobProfile {
Job job;
public Properties prop;
- public long vIn = 0;
+ public long vIn = 1;
int lgMinT;
int lgMaxT;
int lgMinU;
@@ -72,7 +72,7 @@ public abstract class BaseFilterSpeedProfile implements
JobProfile {
int lastU = 0;
final StringBuilder dataStr = new StringBuilder();
job.println(getHeader());
- while (lastU < maxU) { //Trials for each U point on X-axis, and one
row on output
+ while (lastU < 0.9*maxU) { //Trials for each U point on X-axis, and
one row on output
final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO,
lastU);
lastU = nextU;
final int trials = getNumTrials(nextU);
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java
similarity index 62%
rename from
src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
rename to
src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java
index e49d884..b968cfd 100644
---
a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterUpdateSpeedProfile.java
@@ -1,22 +1,11 @@
package org.apache.datasketches.characterization.filters;
-import org.apache.datasketches.Properties;
-import org.apache.datasketches.characterization.ZipfDistribution;
-//import
org.apache.datasketches.characterization.filters.BaseFilterSpeedProfile;
-import org.apache.datasketches.common.Family;
-import org.apache.datasketches.common.ResizeFactor;
import org.apache.datasketches.filters.bloomfilter.BloomFilter;
-import org.apache.datasketches.frequencies.LongsSketch;
-import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableHandle;
import org.apache.datasketches.memory.WritableMemory;
-import org.apache.datasketches.theta.Sketch;
-import org.apache.datasketches.theta.UpdateSketch;
-import org.apache.datasketches.theta.UpdateSketchBuilder;
import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder;
-import org.apache.datasketches.filters.bloomfilter.BloomFilter;
-public class BloomFilterSpeedProfile extends BaseFilterSpeedProfile{
+public class BloomFilterUpdateSpeedProfile extends
BaseFilterUpdateSpeedProfile{
protected BloomFilter sketch;
private WritableHandle handle;
private WritableMemory wmem;
@@ -24,7 +13,6 @@ public class BloomFilterSpeedProfile extends
BaseFilterSpeedProfile{
@Override
public void configure() {
//Configure Sketch
- //final long numBits, final int numHashes, final long seed)
final long numBits = Integer.parseInt(prop.mustGet("numBits"));
final int numHashes = Integer.parseInt(prop.mustGet("numHashes"));
sketch = BloomFilterBuilder.createBySize(numBits, numHashes);
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java
new file mode 100644
index 0000000..e73e6d0
--- /dev/null
+++
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterUpdateSpeedProfile.java
@@ -0,0 +1,41 @@
+package org.apache.datasketches.characterization.filters;
+
+import org.apache.datasketches.memory.WritableHandle;
+import org.apache.datasketches.memory.WritableMemory;
+
+
+import org.apache.datasketches.filters.quotientfilter.QuotientFilter;
+
+public class QuotientFilterUpdateSpeedProfile extends
BaseFilterUpdateSpeedProfile{
+ protected QuotientFilter sketch;
+ protected int lgNumSlots ;
+ protected int numBitsPerSlot;
+ private WritableHandle handle;
+ private WritableMemory wmem;
+
+ @Override
+ public void configure() {
+ lgNumSlots = Integer.parseInt(prop.mustGet("lgNumSlots"));
+ numBitsPerSlot = Integer.parseInt(prop.mustGet("numBitsPerSlot"));
+ }
+
+ @Override
+ public void cleanup() {
+ try {
+ if (handle != null) { handle.close(); }
+ } catch (final Exception e) {}
+ }
+
+ @Override
+ public double doTrial(final int uPerTrial) {
+ //sketch.reset(); //is not implemented
+ sketch = new QuotientFilter(lgNumSlots, numBitsPerSlot);
+ final long startUpdateTime_nS = System.nanoTime();
+ for (int u = uPerTrial; u-- > 0;) {
+ sketch.insert(++vIn);
+ }
+ final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
+ return (double) updateTime_nS / uPerTrial;
+ }
+}
+
diff --git a/src/main/resources/filters/BloomFilterSpeedJob.conf
b/src/main/resources/filters/BloomFilterUpdateSpeedJob.conf
similarity index 68%
copy from src/main/resources/filters/BloomFilterSpeedJob.conf
copy to src/main/resources/filters/BloomFilterUpdateSpeedJob.conf
index 0f0c8e5..a88b7dc 100644
--- a/src/main/resources/filters/BloomFilterSpeedJob.conf
+++ b/src/main/resources/filters/BloomFilterUpdateSpeedJob.conf
@@ -17,16 +17,9 @@
# Job
-# The Bloom Filter Update Speed profile is evaluated by choosing a maximum
input cardinality.
-# From this input cardinality N, and a target false positive probability Fpp,
we evaluate
-# ```
-# final long numItems = 1L<<20;
-# final double targetFpp = 1E-6 ;
-# long numBits = BloomFilterBuilder.suggestNumFilterBits(numItems, targetFpp);
-# final short numHashes = BloomFilterBuilder.suggestNumHashes(numItems,
numBits);
-# ```
-# and then use these parameters for the filter configuration at the bottom of
this file.
-# Note that `final long numItems = 1L<<20` should correspond direclty with
`Trials_lgMaxU`
+# The Bloom Filter Update Speed profile is evaluated by fixing a size in bits
for the filter
+# and inserting items up to a maximum input cardinality N.
+# Note that `final long numItems = 1L<<20` should correspond directly with
`Trials_lgMaxU`
# in this configuration file.
# Uniques Profile
@@ -35,8 +28,8 @@ Trials_lgMaxU=20 #How high the # uniques go 20
Trials_UPPO=16 #The horizontal x-resolution of trials points
# Trials Profile
-Trials_lgMaxT=10 #Max trials at start (low counts) 23
-Trials_lgMinT=4 #Min trials at tail (high counts) 4
+Trials_lgMaxT=12 #Max trials at start (low counts) 23
+Trials_lgMinT=6 #Min trials at tail (high counts) 4
#Trails Speed related
Trials_lgMinBpU=4 #start the downward slope of trials at this U 4
@@ -49,6 +42,6 @@ FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
#Job Profile
-JobProfile=org.apache.datasketches.characterization.filters.BloomFilterSpeedProfile
-numBits = 30151987
-numHashes = 20
+JobProfile=org.apache.datasketches.characterization.filters.BloomFilterUpdateSpeedProfile
+numBits = 16777216
+numHashes = 16
diff --git a/src/main/resources/filters/BloomFilterSpeedJob.conf
b/src/main/resources/filters/QuotientFilterUpdateSpeedJob.conf
similarity index 61%
rename from src/main/resources/filters/BloomFilterSpeedJob.conf
rename to src/main/resources/filters/QuotientFilterUpdateSpeedJob.conf
index 0f0c8e5..b073077 100644
--- a/src/main/resources/filters/BloomFilterSpeedJob.conf
+++ b/src/main/resources/filters/QuotientFilterUpdateSpeedJob.conf
@@ -17,17 +17,14 @@
# Job
-# The Bloom Filter Update Speed profile is evaluated by choosing a maximum
input cardinality.
-# From this input cardinality N, and a target false positive probability Fpp,
we evaluate
-# ```
-# final long numItems = 1L<<20;
-# final double targetFpp = 1E-6 ;
-# long numBits = BloomFilterBuilder.suggestNumFilterBits(numItems, targetFpp);
-# final short numHashes = BloomFilterBuilder.suggestNumHashes(numItems,
numBits);
-# ```
-# and then use these parameters for the filter configuration at the bottom of
this file.
-# Note that `final long numItems = 1L<<20` should correspond direclty with
`Trials_lgMaxU`
-# in this configuration file.
+# We configure the Quotient Filter by fixing the input cardinality, N =
alpha*2^j
+# for some parameter alpha.
+# The number of slots in the filter is set as lgNumSlots = j.
+# The number of bits per item, numBitsPerItem, is a parameter and then the
total
+# number of bits in the filter is (1L<<lgNumSlots)*numBitsPerSlot.
+# This setting should yield a filter with a target load factor of alpha and a
false positive
+# probability of approximately 1/2^(numBitsPerSlot-3).
+
# Uniques Profile
Trials_lgMinU=0 #The starting # of uniques that is printed at the end. 0
@@ -35,8 +32,8 @@ Trials_lgMaxU=20 #How high the # uniques go 20
Trials_UPPO=16 #The horizontal x-resolution of trials points
# Trials Profile
-Trials_lgMaxT=10 #Max trials at start (low counts) 23
-Trials_lgMinT=4 #Min trials at tail (high counts) 4
+Trials_lgMaxT=12 #Max trials at start (low counts) 23
+Trials_lgMinT=6 #Min trials at tail (high counts) 4
#Trails Speed related
Trials_lgMinBpU=4 #start the downward slope of trials at this U 4
@@ -49,6 +46,6 @@ FileNameDateFormat=yyyyMMdd'_'HHmmssz
ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
#Job Profile
-JobProfile=org.apache.datasketches.characterization.filters.BloomFilterSpeedProfile
-numBits = 30151987
-numHashes = 20
+JobProfile=org.apache.datasketches.characterization.filters.QuotientFilterUpdateSpeedProfile
+lgNumSlots = 20
+numBitsPerSlot = 16
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]