(datasketches-characterization) branch master updated: Added initial filter update times script

charlie Tue, 04 Jun 2024 03:08:50 -0700

This is an automated email from the ASF dual-hosted git repository.

charlie pushed a commit to branch master
in repository 
https://gitbox.apache.org/repos/asf/datasketches-characterization.git



The following commit(s) were added to refs/heads/master by this push:
     new 5f6e5f7  Added initial filter update times script
5f6e5f7 is described below

commit 5f6e5f77e89dd1c613fe08e6a5d1efb3f7928230
Author: Charlie Dickens <[email protected]>
AuthorDate: Tue Jun 4 11:08:09 2024 +0100

    Added initial filter update times script
---
 .../filters/BaseFilterSpeedProfile.java            | 157 +++++++++++++++++++++
 .../filters/BloomFilterSpeedProfile.java           |  52 +++++++
 .../resources/filters/BloomFilterSpeedJob.conf     |  54 +++++++
 3 files changed, 263 insertions(+)

diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
new file mode 100644
index 0000000..add495c
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
@@ -0,0 +1,157 @@
+package org.apache.datasketches.characterization.filters;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import org.apache.datasketches.Properties;
+
+import static java.lang.Math.log;
+import static java.lang.Math.pow;
+import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+
+public abstract class BaseFilterSpeedProfile implements JobProfile {
+    Job job;
+    public Properties prop;
+    public long vIn = 0;
+    int lgMinT;
+    int lgMaxT;
+    int lgMinU;
+    int lgMaxU;
+    int uPPO;
+    int lgMinBpU;
+    int lgMaxBpU;
+    int numSketches = 1;
+    double slope;
+
+    //JobProfile
+    @Override
+    public void start(final Job job) {
+        this.job = job;
+        prop = job.getProperties();
+        lgMinT = Integer.parseInt(prop.mustGet("Trials_lgMinT"));
+        lgMaxT = Integer.parseInt(prop.mustGet("Trials_lgMaxT"));
+        lgMinU = Integer.parseInt(prop.mustGet("Trials_lgMinU"));
+        lgMaxU = Integer.parseInt(prop.mustGet("Trials_lgMaxU"));
+        uPPO = Integer.parseInt(prop.mustGet("Trials_UPPO"));
+        lgMinBpU = Integer.parseInt(prop.mustGet("Trials_lgMinBpU"));
+        lgMaxBpU = Integer.parseInt(prop.mustGet("Trials_lgMaxBpU"));
+        final String nSk = prop.get("NumSketches");
+        numSketches = (nSk != null) ? Integer.parseInt(nSk) : 1;
+        slope = (double) (lgMaxT - lgMinT) / (lgMinBpU - lgMaxBpU);
+        configure();
+        doTrials();
+        shutdown();
+        cleanup();
+    }
+
+    @Override
+    public void shutdown() {}
+
+    @Override
+    public void cleanup() {}
+    //end JobProfile
+
+    /**
+     * Configure the sketch
+     */
+    public abstract void configure();
+
+    /**
+     * Return the average update time per update for this trial
+     * @param uPerTrial the number of unique updates for this trial
+     * @return the average update time per update for this trial
+     */
+    public abstract double doTrial(final int uPerTrial);
+
+    /**
+     * Traverses all the unique axis points and performs trials(u) at each 
point
+     * and outputs a row per unique axis point.
+     */
+    private void doTrials() {
+        final int maxU = 1 << lgMaxU;
+        final int minU = 1 << lgMinU;
+        int lastU = 0;
+        final StringBuilder dataStr = new StringBuilder();
+        job.println(getHeader());
+        while (lastU < maxU) { //Trials for each U point on X-axis, and one 
row on output
+            final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO, 
lastU);
+            lastU = nextU;
+            final int trials = getNumTrials(nextU);
+
+
+            System.gc(); //much slower but cleaner plots
+            double sumUpdateTimePerU_nS = 0;
+            for (int t = 0; t < trials; t++) {
+                sumUpdateTimePerU_nS += doTrial(nextU);
+            }
+            final double meanUpdateTimePerU_nS = sumUpdateTimePerU_nS / trials;
+
+            process(meanUpdateTimePerU_nS, trials, nextU, dataStr, 
numSketches);
+
+            job.println(dataStr.toString());
+        }
+    }
+
+    /**
+     * Computes the number of trials for a given current number of uniques for 
a
+     * trial set. This is used in speed trials and decreases the number of 
trials
+     * as the number of uniques increase.
+     *
+     * @param curU the given current number of uniques for a trial set.
+     * @return the number of trials for a given current number of uniques for a
+     * trial set.
+     */
+    private int getNumTrials(final int curU) {
+        final int minBpU = 1 << lgMinBpU;
+        final int maxBpU = 1 << lgMaxBpU;
+        final int maxT = 1 << lgMaxT;
+        final int minT = 1 << lgMinT;
+        if (lgMinT == lgMaxT || curU <= minBpU) {
+            return maxT;
+        }
+        if (curU >= maxBpU) {
+            return minT;
+        }
+        final double lgCurU = log(curU) / LN2;
+        final double lgTrials = slope * (lgCurU - lgMinBpU) + lgMaxT;
+        return (int) pow(2.0, lgTrials);
+    }
+
+    /**
+     * Process the results
+     *
+     * @param meanUpdateTimePerSet_nS mean update time per update set in 
nanoseconds.
+     * @param uPerTrial number of uniques per trial
+     * @param sb The StringBuilder object that is reused for each row of output
+     * @param numSketches the number of sketches per set.
+     */
+    private static void process(final double meanUpdateTimePerSet_nS, final 
int trials,
+                                final int uPerTrial, final StringBuilder sb, 
final int numSketches) {
+        // OUTPUT
+        sb.setLength(0);
+        sb.append(uPerTrial).append(TAB);
+        sb.append(trials).append(TAB);
+        sb.append(meanUpdateTimePerSet_nS);
+        if (numSketches > 1) {
+            sb.append(TAB);
+            sb.append(meanUpdateTimePerSet_nS / numSketches);
+        }
+    }
+
+    /**
+     * Returns a column header row
+     * @return a column header row
+     */
+    private String getHeader() {
+        final StringBuilder sb = new StringBuilder();
+        sb.append("InU").append(TAB);
+        sb.append("Trials").append(TAB);
+        sb.append("nS/Set");
+        if (numSketches > 1) {
+            sb.append(TAB);
+            sb.append("nS/Sketch");
+        }
+        return sb.toString();
+    }
+}
+
+
diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
new file mode 100644
index 0000000..e49d884
--- /dev/null
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
@@ -0,0 +1,52 @@
+package org.apache.datasketches.characterization.filters;
+
+import org.apache.datasketches.Properties;
+import org.apache.datasketches.characterization.ZipfDistribution;
+//import 
org.apache.datasketches.characterization.filters.BaseFilterSpeedProfile;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.filters.bloomfilter.BloomFilter;
+import org.apache.datasketches.frequencies.LongsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableHandle;
+import org.apache.datasketches.memory.WritableMemory;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
+import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder;
+import org.apache.datasketches.filters.bloomfilter.BloomFilter;
+
+public class BloomFilterSpeedProfile extends BaseFilterSpeedProfile{
+    protected BloomFilter sketch;
+    private WritableHandle handle;
+    private WritableMemory wmem;
+
+    @Override
+    public void configure() {
+        //Configure Sketch
+        //final long numBits, final int numHashes, final long seed)
+        final long numBits = Integer.parseInt(prop.mustGet("numBits"));
+        final int numHashes = Integer.parseInt(prop.mustGet("numHashes"));
+        sketch =  BloomFilterBuilder.createBySize(numBits, numHashes);
+    }
+
+    @Override
+    public void cleanup() {
+        try {
+            if (handle != null) { handle.close(); }
+        } catch (final Exception e) {}
+    }
+
+    @Override
+    public double doTrial(final int uPerTrial) {
+        sketch.reset();
+        final long startUpdateTime_nS = System.nanoTime();
+
+        for (int u = uPerTrial; u-- > 0;) {
+            sketch.update(++vIn);
+        }
+        final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
+        return (double) updateTime_nS / uPerTrial;
+    }
+}
+
diff --git a/src/main/resources/filters/BloomFilterSpeedJob.conf 
b/src/main/resources/filters/BloomFilterSpeedJob.conf
new file mode 100644
index 0000000..0f0c8e5
--- /dev/null
+++ b/src/main/resources/filters/BloomFilterSpeedJob.conf
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Job
+
+# The Bloom Filter Update Speed profile is evaluated by choosing a maximum 
input cardinality.
+# From this input cardinality N, and a target false positive probability Fpp, 
we evaluate
+# ```
+# final long numItems = 1L<<20;
+# final double targetFpp = 1E-6 ;
+# long numBits = BloomFilterBuilder.suggestNumFilterBits(numItems, targetFpp);
+# final short numHashes = BloomFilterBuilder.suggestNumHashes(numItems, 
numBits);
+# ```
+# and then use these parameters for the filter configuration at the bottom of 
this file.
+# Note that `final long numItems = 1L<<20` should correspond direclty with 
`Trials_lgMaxU`
+# in this configuration file.
+
+# Uniques Profile
+Trials_lgMinU=0  #The starting # of uniques that is printed at the end. 0
+Trials_lgMaxU=20 #How high the # uniques go 20
+Trials_UPPO=16   #The horizontal x-resolution of trials points
+
+# Trials Profile
+Trials_lgMaxT=10 #Max trials at start (low counts) 23
+Trials_lgMinT=4  #Min trials at tail (high counts) 4
+
+#Trails Speed related
+Trials_lgMinBpU=4   #start the downward slope of trials at this U 4
+Trials_lgMaxBpU=20  #stop the downward slope of trials at this U 20
+
+# Date-Time Profile
+TimeZone=PST
+TimeZoneOffset=-28800000 # offset in millisec
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+
+#Job Profile
+JobProfile=org.apache.datasketches.characterization.filters.BloomFilterSpeedProfile
+numBits = 30151987
+numHashes = 20


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datasketches-characterization) branch master updated: Added initial filter update times script

Reply via email to