This is an automated email from the ASF dual-hosted git repository.
charlie pushed a commit to branch add-python-plotting
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/add-python-plotting by this
push:
new 90b15e6 Added accuracy vs num hashes profiles
90b15e6 is described below
commit 90b15e63f400a7b3eeca987e7b5a6634df59256d
Author: c-dickens <[email protected]>
AuthorDate: Mon Aug 19 15:03:47 2024 +0100
Added accuracy vs num hashes profiles
---
.../filters/BaseFilterAccuracyProfile.java | 10 ++---
.../filters/QuotientFilterAccuracyProfile.java | 2 +-
.../resources/filters/BloomFilterAccuracyJob.conf | 6 +--
.../filters/QuotientFilterAccuracyJob.conf | 6 +--
...loomFilterAccuracyProfile20240612_050335PST.txt | 49 ++++++++++++++++++++++
...ientFilterAccuracyProfile20240612_062110PST.txt | 48 +++++++++++++++++++++
...ientFilterAccuracyProfile20240617_065441PST.txt | 48 +++++++++++++++++++++
7 files changed, 155 insertions(+), 14 deletions(-)
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
index 2304b73..77931e6 100644
---
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
@@ -25,7 +25,6 @@ public abstract class BaseFilterAccuracyProfile implements
JobProfile{
int minNumHashes;
int maxNumHashes ;
int bitsPerEntry;
-
int lgMinT;
int lgMaxT;
int tPPO;
@@ -124,8 +123,8 @@ public abstract class BaseFilterAccuracyProfile implements
JobProfile{
}
fpr /= numTrials;
filterNumBits /= numTrials;
- bitsPerEntry = getBitsperEntry(nh);
- process(nh, bitsPerEntry, fpr, filterNumBits, numQueries,
numTrials, dataStr);
+ //bitsPerEntry = getBitsperEntry(nh);
+ process(nh, fpr, filterNumBits, numQueries, numTrials, dataStr);
job.println(dataStr.toString());
numQueries = (int)pwr2SeriesNext(1, 1L<<(nh+1));
}
@@ -160,20 +159,18 @@ public abstract class BaseFilterAccuracyProfile
implements JobProfile{
* Processes the results of a trial and appends them to a StringBuilder in
a tab-separated format.
*
* @param numHashes The number of hashes used in the trial.
- * @param bitsPerEntry The number of bits per entry in the trial.
* @param falsePositiveRate The false positive rate observed in the trial.
* @param filterSizeBits The size of the filter used in the trial, in bits.
* @param numQueryPoints The number of query points used in the trial.
* @param numTrials The number of trials conducted.
* @param sb The StringBuilder to which the results are appended.
*/
- private static void process(final int numHashes, final int bitsPerEntry,
final double falsePositiveRate,
+ private static void process(final int numHashes, final double
falsePositiveRate,
final long filterSizeBits, final long
numQueryPoints,
final long numTrials, final StringBuilder sb)
{
// OUTPUT
sb.setLength(0);
sb.append(numHashes).append(TAB);
- sb.append(bitsPerEntry).append(TAB);
sb.append(String.format("%.5e", falsePositiveRate)).append(TAB);
sb.append(filterSizeBits).append(TAB);
sb.append(numQueryPoints).append(TAB);
@@ -187,7 +184,6 @@ public abstract class BaseFilterAccuracyProfile implements
JobProfile{
private String getHeader() {
final StringBuilder sb = new StringBuilder();
sb.append("numHashes").append(TAB);
- sb.append("bitsPerEntry").append(TAB);
sb.append("FPR").append(TAB);
sb.append("filterSizeBits").append(TAB);
sb.append("numQueryPoints").append(TAB);
diff --git
a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
index 9bf5872..3448817 100644
---
a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
+++
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
@@ -57,7 +57,7 @@ public class QuotientFilterAccuracyProfile extends
BaseFilterAccuracyProfile{
@Override
public long getFilterLengthBits() {
- return sketch.get_space_use();
+ return sketch.getSpaceUse();
}
diff --git a/src/main/resources/filters/BloomFilterAccuracyJob.conf
b/src/main/resources/filters/BloomFilterAccuracyJob.conf
index 2f55b3d..e8fca9c 100644
--- a/src/main/resources/filters/BloomFilterAccuracyJob.conf
+++ b/src/main/resources/filters/BloomFilterAccuracyJob.conf
@@ -24,11 +24,11 @@
# Uniques Profile
Universe_lgU=20 # Maximum log2 of the input set.
-Universe_capacity = 0.9 # this is used to get number of uniques inserted:
numUniques = Trials_capacity *(2^Trials_lgU)
+Universe_capacity = 0.8 # this is used to get number of uniques inserted:
numUniques = Trials_capacity *(2^Trials_lgU)
# Trials Profile
-Trials_lgMinT=4 #Min trials at tail (high counts) 4
-Trials_lgMaxT=11 #Min trials at tail (high counts) 4
+Trials_lgMinT=0 #Min trials at tail (high counts) 4
+Trials_lgMaxT=0 #Min trials at tail (high counts) 4
Trials_TPPO=1 #how often intermediate results are printed
Trials_lgMinBpU=1 #start the downward slope of trials at this LgU
Trials_lgMaxBpU=5 #stop the downward slope of trials at this LgU
diff --git a/src/main/resources/filters/QuotientFilterAccuracyJob.conf
b/src/main/resources/filters/QuotientFilterAccuracyJob.conf
index 37d4ef1..e1b4ebc 100644
--- a/src/main/resources/filters/QuotientFilterAccuracyJob.conf
+++ b/src/main/resources/filters/QuotientFilterAccuracyJob.conf
@@ -24,12 +24,12 @@
# Uniques Profile
Universe_lgU=20 # Maximum log2 of the input set.
-Universe_capacity = 0.75 # this is used to get number of uniques inserted:
numUniques = Trials_capacity *(2^Trials_lgU)
+Universe_capacity = 0.8 # this is used to get number of uniques inserted:
numUniques = Trials_capacity *(2^Trials_lgU)
# Trials Profile
-Trials_lgMinT=4 #Min trials at tail (high counts) 4
-Trials_lgMaxT=11 #Min trials at tail (high counts) 4
+Trials_lgMinT=0 #Min trials at tail (high counts) 4
+Trials_lgMaxT=0 #Min trials at tail (high counts) 4
Trials_TPPO=1 #how often intermediate results are printed
Trials_lgMinBpU=1 #start the downward slope of trials at this LgU
Trials_lgMaxBpU=5 #stop the downward slope of trials at this LgU
diff --git a/src/results/BloomFilterAccuracyProfile20240612_050335PST.txt
b/src/results/BloomFilterAccuracyProfile20240612_050335PST.txt
new file mode 100644
index 0000000..d067369
--- /dev/null
+++ b/src/results/BloomFilterAccuracyProfile20240612_050335PST.txt
@@ -0,0 +1,49 @@
+START JOB BloomFilterAccuracyProfile
+Date Time: 2024/06/12 05:03:35 PST
+numHashes bitsPerEntry FPR filterSizeBits numQueryPoints
numTrials
+4 5 6.62007e-02 5446016 32 608
+5 7 3.12879e-02 6807488 64 412
+6 8 1.54944e-02 8169024 128 299
+7 10 7.69257e-03 9530496 256 228
+8 11 4.18681e-03 10892032 512 181
+9 12 1.82690e-03 12253504 1024 147
+10 14 1.01659e-03 13614976 2048 122
+11 15 4.78800e-04 14976512 4096 103
+12 17 2.34540e-04 16337984 8192 89
+13 18 1.29997e-04 17699520 16384 77
+14 20 5.92132e-05 19060992 32768 67
+15 21 2.97546e-05 20422464 65536 60
+16 23 1.88576e-05 21784000 131072 53
+17 24 7.70887e-06 23145472 262144 48
+18 25 3.32677e-06 24507008 524288 43
+19 27 2.00516e-06 25868480 1048576 39
+20 28 8.74201e-07 27229952 2097152 36
+21 30 5.12961e-07 28591488 4194304 33
+22 31 2.86102e-07 29952960 8388608 30
+23 33 1.49012e-07 31314496 16777216 28
+24 34 5.50197e-08 32675968 33554432 26
+
+PROPERTIES:
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+filterLengthBits=16777216
+JobProfile=org.apache.datasketches.characterization.filters.BloomFilterAccuracyProfile
+lgNumQueries=20
+maxNumHashes=24
+minNumHashes=4
+OutputFileName=BloomFilterAccuracyProfile20240612_050335PST.txt
+OutputFileNameData=BloomFilterAccuracyProfile20240612_050335PST.tsv
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+TimeZone=PST
+TimeZoneOffset=-28800000
+Trials_lgMaxBpU=5
+Trials_lgMaxT=11
+Trials_lgMinBpU=1
+Trials_lgMinT=4
+Trials_TPPO=1
+Universe_capacity=0.9
+Universe_lgU=20
+
+Total Job Time: 0:03:36.541
+END JOB BloomFilterAccuracyProfile
+
+
diff --git a/src/results/QuotientFilterAccuracyProfile20240612_062110PST.txt
b/src/results/QuotientFilterAccuracyProfile20240612_062110PST.txt
new file mode 100644
index 0000000..6b10d80
--- /dev/null
+++ b/src/results/QuotientFilterAccuracyProfile20240612_062110PST.txt
@@ -0,0 +1,48 @@
+START JOB QuotientFilterAccuracyProfile
+Date Time: 2024/06/12 06:21:10 PST
+numHashes bitsPerEntry FPR filterSizeBits numQueryPoints
numTrials
+4 4 3.12449e-01 4194304 32 608
+5 5 1.68803e-01 5242880 64 412
+6 6 8.94910e-02 6291456 128 299
+7 7 4.55558e-02 7340032 256 228
+8 8 2.32756e-02 8388608 512 181
+9 9 1.18184e-02 9437184 1024 147
+10 10 5.85537e-03 10485760 2048 122
+11 11 2.92021e-03 11534336 4096 103
+12 12 1.44564e-03 12582912 8192 89
+13 13 7.21325e-04 13631488 16384 77
+14 14 3.70766e-04 14680064 32768 67
+15 15 1.88446e-04 15728640 65536 60
+16 16 9.39999e-05 16777216 131072 53
+17 17 4.57764e-05 17825792 262144 48
+18 18 2.21341e-05 18874368 524288 43
+19 19 1.10284e-05 19922944 1048576 39
+20 20 5.97371e-06 20971520 2097152 36
+21 21 2.92605e-06 22020096 4194304 33
+22 22 1.49806e-06 23068672 8388608 30
+23 23 6.96097e-07 24117248 16777216 28
+24 24 3.67944e-07 25165824 33554432 26
+
+PROPERTIES:
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+JobProfile=org.apache.datasketches.characterization.filters.QuotientFilterAccuracyProfile
+lgNumQueries=20
+maxNumHashes=24
+minNumHashes=4
+OutputFileName=QuotientFilterAccuracyProfile20240612_062110PST.txt
+OutputFileNameData=QuotientFilterAccuracyProfile20240612_062110PST.tsv
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+TimeZone=PST
+TimeZoneOffset=-28800000
+Trials_lgMaxBpU=5
+Trials_lgMaxT=11
+Trials_lgMinBpU=1
+Trials_lgMinT=4
+Trials_TPPO=1
+Universe_capacity=0.75
+Universe_lgU=20
+
+Total Job Time: 0:05:18.549
+END JOB QuotientFilterAccuracyProfile
+
+
diff --git a/src/results/QuotientFilterAccuracyProfile20240617_065441PST.txt
b/src/results/QuotientFilterAccuracyProfile20240617_065441PST.txt
new file mode 100644
index 0000000..2d4d3b7
--- /dev/null
+++ b/src/results/QuotientFilterAccuracyProfile20240617_065441PST.txt
@@ -0,0 +1,48 @@
+START JOB QuotientFilterAccuracyProfile
+Date Time: 2024/06/17 06:54:41 PST
+numHashes bitsPerEntry FPR filterSizeBits numQueryPoints
numTrials
+4 4 3.59067e-01 4194304 32 608
+5 5 2.01949e-01 5242880 64 412
+6 6 1.03522e-01 6291456 128 299
+7 7 5.34025e-02 7340032 256 228
+8 8 2.74840e-02 8388608 512 181
+9 9 1.37383e-02 9437184 1024 147
+10 10 7.02004e-03 10485760 2048 122
+11 11 3.26153e-03 11534336 4096 103
+12 12 1.75287e-03 12582912 8192 89
+13 13 8.58456e-04 13631488 16384 77
+14 14 4.60041e-04 14680064 32768 67
+15 15 2.18201e-04 15728640 65536 60
+16 16 1.10122e-04 16777216 131072 53
+17 17 5.43594e-05 17825792 262144 48
+18 18 2.86989e-05 18874368 524288 43
+19 19 1.39383e-05 19922944 1048576 39
+20 20 6.83467e-06 20971520 2097152 36
+21 21 3.29451e-06 22020096 4194304 33
+22 22 1.68880e-06 23068672 8388608 30
+23 23 8.62139e-07 24117248 16777216 28
+24 24 4.77983e-07 25165824 33554432 26
+
+PROPERTIES:
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+JobProfile=org.apache.datasketches.characterization.filters.QuotientFilterAccuracyProfile
+lgNumQueries=20
+maxNumHashes=24
+minNumHashes=4
+OutputFileName=QuotientFilterAccuracyProfile20240617_065441PST.txt
+OutputFileNameData=QuotientFilterAccuracyProfile20240617_065441PST.tsv
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+TimeZone=PST
+TimeZoneOffset=-28800000
+Trials_lgMaxBpU=5
+Trials_lgMaxT=11
+Trials_lgMinBpU=1
+Trials_lgMinT=4
+Trials_TPPO=1
+Universe_capacity=0.9
+Universe_lgU=20
+
+Total Job Time: 0:16:45.971
+END JOB QuotientFilterAccuracyProfile
+
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]