This is an automated email from the ASF dual-hosted git repository.
charlie pushed a commit to branch master
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/master by this push:
new 5f6e5f7 Added initial filter update times script
5f6e5f7 is described below
commit 5f6e5f77e89dd1c613fe08e6a5d1efb3f7928230
Author: Charlie Dickens <[email protected]>
AuthorDate: Tue Jun 4 11:08:09 2024 +0100
Added initial filter update times script
---
.../filters/BaseFilterSpeedProfile.java | 157 +++++++++++++++++++++
.../filters/BloomFilterSpeedProfile.java | 52 +++++++
.../resources/filters/BloomFilterSpeedJob.conf | 54 +++++++
3 files changed, 263 insertions(+)
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
new file mode 100644
index 0000000..add495c
--- /dev/null
+++
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterSpeedProfile.java
@@ -0,0 +1,157 @@
+package org.apache.datasketches.characterization.filters;
+
+import org.apache.datasketches.Job;
+import org.apache.datasketches.JobProfile;
+import org.apache.datasketches.Properties;
+
+import static java.lang.Math.log;
+import static java.lang.Math.pow;
+import static org.apache.datasketches.common.Util.pwr2SeriesNext;
+
+public abstract class BaseFilterSpeedProfile implements JobProfile {
+ Job job;
+ public Properties prop;
+ public long vIn = 0;
+ int lgMinT;
+ int lgMaxT;
+ int lgMinU;
+ int lgMaxU;
+ int uPPO;
+ int lgMinBpU;
+ int lgMaxBpU;
+ int numSketches = 1;
+ double slope;
+
+ //JobProfile
+ @Override
+ public void start(final Job job) {
+ this.job = job;
+ prop = job.getProperties();
+ lgMinT = Integer.parseInt(prop.mustGet("Trials_lgMinT"));
+ lgMaxT = Integer.parseInt(prop.mustGet("Trials_lgMaxT"));
+ lgMinU = Integer.parseInt(prop.mustGet("Trials_lgMinU"));
+ lgMaxU = Integer.parseInt(prop.mustGet("Trials_lgMaxU"));
+ uPPO = Integer.parseInt(prop.mustGet("Trials_UPPO"));
+ lgMinBpU = Integer.parseInt(prop.mustGet("Trials_lgMinBpU"));
+ lgMaxBpU = Integer.parseInt(prop.mustGet("Trials_lgMaxBpU"));
+ final String nSk = prop.get("NumSketches");
+ numSketches = (nSk != null) ? Integer.parseInt(nSk) : 1;
+ slope = (double) (lgMaxT - lgMinT) / (lgMinBpU - lgMaxBpU);
+ configure();
+ doTrials();
+ shutdown();
+ cleanup();
+ }
+
+ @Override
+ public void shutdown() {}
+
+ @Override
+ public void cleanup() {}
+ //end JobProfile
+
+ /**
+ * Configure the sketch
+ */
+ public abstract void configure();
+
+ /**
+ * Return the average update time per update for this trial
+ * @param uPerTrial the number of unique updates for this trial
+ * @return the average update time per update for this trial
+ */
+ public abstract double doTrial(final int uPerTrial);
+
+ /**
+ * Traverses all the unique axis points and performs trials(u) at each
point
+ * and outputs a row per unique axis point.
+ */
+ private void doTrials() {
+ final int maxU = 1 << lgMaxU;
+ final int minU = 1 << lgMinU;
+ int lastU = 0;
+ final StringBuilder dataStr = new StringBuilder();
+ job.println(getHeader());
+ while (lastU < maxU) { //Trials for each U point on X-axis, and one
row on output
+ final int nextU = lastU == 0 ? minU : (int)pwr2SeriesNext(uPPO,
lastU);
+ lastU = nextU;
+ final int trials = getNumTrials(nextU);
+
+
+ System.gc(); //much slower but cleaner plots
+ double sumUpdateTimePerU_nS = 0;
+ for (int t = 0; t < trials; t++) {
+ sumUpdateTimePerU_nS += doTrial(nextU);
+ }
+ final double meanUpdateTimePerU_nS = sumUpdateTimePerU_nS / trials;
+
+ process(meanUpdateTimePerU_nS, trials, nextU, dataStr,
numSketches);
+
+ job.println(dataStr.toString());
+ }
+ }
+
+ /**
+ * Computes the number of trials for a given current number of uniques for
a
+ * trial set. This is used in speed trials and decreases the number of
trials
+ * as the number of uniques increase.
+ *
+ * @param curU the given current number of uniques for a trial set.
+ * @return the number of trials for a given current number of uniques for a
+ * trial set.
+ */
+ private int getNumTrials(final int curU) {
+ final int minBpU = 1 << lgMinBpU;
+ final int maxBpU = 1 << lgMaxBpU;
+ final int maxT = 1 << lgMaxT;
+ final int minT = 1 << lgMinT;
+ if (lgMinT == lgMaxT || curU <= minBpU) {
+ return maxT;
+ }
+ if (curU >= maxBpU) {
+ return minT;
+ }
+ final double lgCurU = log(curU) / LN2;
+ final double lgTrials = slope * (lgCurU - lgMinBpU) + lgMaxT;
+ return (int) pow(2.0, lgTrials);
+ }
+
+ /**
+ * Process the results
+ *
+ * @param meanUpdateTimePerSet_nS mean update time per update set in
nanoseconds.
+ * @param uPerTrial number of uniques per trial
+ * @param sb The StringBuilder object that is reused for each row of output
+ * @param numSketches the number of sketches per set.
+ */
+ private static void process(final double meanUpdateTimePerSet_nS, final
int trials,
+ final int uPerTrial, final StringBuilder sb,
final int numSketches) {
+ // OUTPUT
+ sb.setLength(0);
+ sb.append(uPerTrial).append(TAB);
+ sb.append(trials).append(TAB);
+ sb.append(meanUpdateTimePerSet_nS);
+ if (numSketches > 1) {
+ sb.append(TAB);
+ sb.append(meanUpdateTimePerSet_nS / numSketches);
+ }
+ }
+
+ /**
+ * Returns a column header row
+ * @return a column header row
+ */
+ private String getHeader() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("InU").append(TAB);
+ sb.append("Trials").append(TAB);
+ sb.append("nS/Set");
+ if (numSketches > 1) {
+ sb.append(TAB);
+ sb.append("nS/Sketch");
+ }
+ return sb.toString();
+ }
+}
+
+
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
new file mode 100644
index 0000000..e49d884
--- /dev/null
+++
b/src/main/java/org/apache/datasketches/characterization/filters/BloomFilterSpeedProfile.java
@@ -0,0 +1,52 @@
+package org.apache.datasketches.characterization.filters;
+
+import org.apache.datasketches.Properties;
+import org.apache.datasketches.characterization.ZipfDistribution;
+//import
org.apache.datasketches.characterization.filters.BaseFilterSpeedProfile;
+import org.apache.datasketches.common.Family;
+import org.apache.datasketches.common.ResizeFactor;
+import org.apache.datasketches.filters.bloomfilter.BloomFilter;
+import org.apache.datasketches.frequencies.LongsSketch;
+import org.apache.datasketches.memory.Memory;
+import org.apache.datasketches.memory.WritableHandle;
+import org.apache.datasketches.memory.WritableMemory;
+import org.apache.datasketches.theta.Sketch;
+import org.apache.datasketches.theta.UpdateSketch;
+import org.apache.datasketches.theta.UpdateSketchBuilder;
+import org.apache.datasketches.filters.bloomfilter.BloomFilterBuilder;
+import org.apache.datasketches.filters.bloomfilter.BloomFilter;
+
+public class BloomFilterSpeedProfile extends BaseFilterSpeedProfile{
+ protected BloomFilter sketch;
+ private WritableHandle handle;
+ private WritableMemory wmem;
+
+ @Override
+ public void configure() {
+ //Configure Sketch
+ //final long numBits, final int numHashes, final long seed)
+ final long numBits = Integer.parseInt(prop.mustGet("numBits"));
+ final int numHashes = Integer.parseInt(prop.mustGet("numHashes"));
+ sketch = BloomFilterBuilder.createBySize(numBits, numHashes);
+ }
+
+ @Override
+ public void cleanup() {
+ try {
+ if (handle != null) { handle.close(); }
+ } catch (final Exception e) {}
+ }
+
+ @Override
+ public double doTrial(final int uPerTrial) {
+ sketch.reset();
+ final long startUpdateTime_nS = System.nanoTime();
+
+ for (int u = uPerTrial; u-- > 0;) {
+ sketch.update(++vIn);
+ }
+ final long updateTime_nS = System.nanoTime() - startUpdateTime_nS;
+ return (double) updateTime_nS / uPerTrial;
+ }
+}
+
diff --git a/src/main/resources/filters/BloomFilterSpeedJob.conf
b/src/main/resources/filters/BloomFilterSpeedJob.conf
new file mode 100644
index 0000000..0f0c8e5
--- /dev/null
+++ b/src/main/resources/filters/BloomFilterSpeedJob.conf
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Job
+
+# The Bloom Filter Update Speed profile is evaluated by choosing a maximum
input cardinality.
+# From this input cardinality N, and a target false positive probability Fpp,
we evaluate
+# ```
+# final long numItems = 1L<<20;
+# final double targetFpp = 1E-6 ;
+# long numBits = BloomFilterBuilder.suggestNumFilterBits(numItems, targetFpp);
+# final short numHashes = BloomFilterBuilder.suggestNumHashes(numItems,
numBits);
+# ```
+# and then use these parameters for the filter configuration at the bottom of
this file.
+# Note that `final long numItems = 1L<<20` should correspond direclty with
`Trials_lgMaxU`
+# in this configuration file.
+
+# Uniques Profile
+Trials_lgMinU=0 #The starting # of uniques that is printed at the end. 0
+Trials_lgMaxU=20 #How high the # uniques go 20
+Trials_UPPO=16 #The horizontal x-resolution of trials points
+
+# Trials Profile
+Trials_lgMaxT=10 #Max trials at start (low counts) 23
+Trials_lgMinT=4 #Min trials at tail (high counts) 4
+
+#Trails Speed related
+Trials_lgMinBpU=4 #start the downward slope of trials at this U 4
+Trials_lgMaxBpU=20 #stop the downward slope of trials at this U 20
+
+# Date-Time Profile
+TimeZone=PST
+TimeZoneOffset=-28800000 # offset in millisec
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+
+#Job Profile
+JobProfile=org.apache.datasketches.characterization.filters.BloomFilterSpeedProfile
+numBits = 30151987
+numHashes = 20
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]