This is an automated email from the ASF dual-hosted git repository.

charlie pushed a commit to branch add-python-plotting
in repository 
https://gitbox.apache.org/repos/asf/datasketches-characterization.git


The following commit(s) were added to refs/heads/add-python-plotting by this 
push:
     new 90b15e6  Added accuracy vs num hashes profiles
90b15e6 is described below

commit 90b15e63f400a7b3eeca987e7b5a6634df59256d
Author: c-dickens <[email protected]>
AuthorDate: Mon Aug 19 15:03:47 2024 +0100

    Added accuracy vs num hashes profiles
---
 .../filters/BaseFilterAccuracyProfile.java         | 10 ++---
 .../filters/QuotientFilterAccuracyProfile.java     |  2 +-
 .../resources/filters/BloomFilterAccuracyJob.conf  |  6 +--
 .../filters/QuotientFilterAccuracyJob.conf         |  6 +--
 ...loomFilterAccuracyProfile20240612_050335PST.txt | 49 ++++++++++++++++++++++
 ...ientFilterAccuracyProfile20240612_062110PST.txt | 48 +++++++++++++++++++++
 ...ientFilterAccuracyProfile20240617_065441PST.txt | 48 +++++++++++++++++++++
 7 files changed, 155 insertions(+), 14 deletions(-)

diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
index 2304b73..77931e6 100644
--- 
a/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/BaseFilterAccuracyProfile.java
@@ -25,7 +25,6 @@ public abstract class BaseFilterAccuracyProfile implements 
JobProfile{
     int minNumHashes;
     int maxNumHashes ;
     int bitsPerEntry;
-
     int lgMinT;
     int lgMaxT;
     int tPPO;
@@ -124,8 +123,8 @@ public abstract class BaseFilterAccuracyProfile implements 
JobProfile{
             }
             fpr /= numTrials;
             filterNumBits /= numTrials;
-            bitsPerEntry = getBitsperEntry(nh);
-            process(nh, bitsPerEntry, fpr, filterNumBits, numQueries, 
numTrials, dataStr);
+            //bitsPerEntry = getBitsperEntry(nh);
+            process(nh, fpr, filterNumBits, numQueries, numTrials, dataStr);
             job.println(dataStr.toString());
             numQueries = (int)pwr2SeriesNext(1, 1L<<(nh+1));
         }
@@ -160,20 +159,18 @@ public abstract class BaseFilterAccuracyProfile 
implements JobProfile{
      * Processes the results of a trial and appends them to a StringBuilder in 
a tab-separated format.
      *
      * @param numHashes The number of hashes used in the trial.
-     * @param bitsPerEntry The number of bits per entry in the trial.
      * @param falsePositiveRate The false positive rate observed in the trial.
      * @param filterSizeBits The size of the filter used in the trial, in bits.
      * @param numQueryPoints The number of query points used in the trial.
      * @param numTrials The number of trials conducted.
      * @param sb The StringBuilder to which the results are appended.
      */
-    private static void process(final int numHashes, final int bitsPerEntry, 
final double falsePositiveRate,
+    private static void process(final int numHashes, final double 
falsePositiveRate,
                                 final long filterSizeBits, final long 
numQueryPoints,
                                 final long numTrials,  final StringBuilder sb) 
{
         // OUTPUT
         sb.setLength(0);
         sb.append(numHashes).append(TAB);
-        sb.append(bitsPerEntry).append(TAB);
         sb.append(String.format("%.5e", falsePositiveRate)).append(TAB);
         sb.append(filterSizeBits).append(TAB);
         sb.append(numQueryPoints).append(TAB);
@@ -187,7 +184,6 @@ public abstract class BaseFilterAccuracyProfile implements 
JobProfile{
     private String getHeader() {
         final StringBuilder sb = new StringBuilder();
         sb.append("numHashes").append(TAB);
-        sb.append("bitsPerEntry").append(TAB);
         sb.append("FPR").append(TAB);
         sb.append("filterSizeBits").append(TAB);
         sb.append("numQueryPoints").append(TAB);
diff --git 
a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
 
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
index 9bf5872..3448817 100644
--- 
a/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
+++ 
b/src/main/java/org/apache/datasketches/characterization/filters/QuotientFilterAccuracyProfile.java
@@ -57,7 +57,7 @@ public class QuotientFilterAccuracyProfile extends 
BaseFilterAccuracyProfile{
 
     @Override
     public long getFilterLengthBits() {
-        return sketch.get_space_use();
+        return sketch.getSpaceUse();
     }
 
 
diff --git a/src/main/resources/filters/BloomFilterAccuracyJob.conf 
b/src/main/resources/filters/BloomFilterAccuracyJob.conf
index 2f55b3d..e8fca9c 100644
--- a/src/main/resources/filters/BloomFilterAccuracyJob.conf
+++ b/src/main/resources/filters/BloomFilterAccuracyJob.conf
@@ -24,11 +24,11 @@
 
 # Uniques Profile
 Universe_lgU=20 # Maximum log2 of the input set.
-Universe_capacity = 0.9 # this is used to get number of uniques inserted: 
numUniques = Trials_capacity *(2^Trials_lgU)
+Universe_capacity = 0.8 # this is used to get number of uniques inserted: 
numUniques = Trials_capacity *(2^Trials_lgU)
 
 # Trials Profile
-Trials_lgMinT=4  #Min trials at tail (high counts) 4
-Trials_lgMaxT=11  #Min trials at tail (high counts) 4
+Trials_lgMinT=0  #Min trials at tail (high counts) 4
+Trials_lgMaxT=0  #Min trials at tail (high counts) 4
 Trials_TPPO=1     #how often intermediate results are printed
 Trials_lgMinBpU=1   #start the downward slope of trials at this LgU
 Trials_lgMaxBpU=5  #stop the downward slope of trials at this LgU
diff --git a/src/main/resources/filters/QuotientFilterAccuracyJob.conf 
b/src/main/resources/filters/QuotientFilterAccuracyJob.conf
index 37d4ef1..e1b4ebc 100644
--- a/src/main/resources/filters/QuotientFilterAccuracyJob.conf
+++ b/src/main/resources/filters/QuotientFilterAccuracyJob.conf
@@ -24,12 +24,12 @@
 
 # Uniques Profile
 Universe_lgU=20 # Maximum log2 of the input set.
-Universe_capacity = 0.75 # this is used to get number of uniques inserted: 
numUniques = Trials_capacity *(2^Trials_lgU)
+Universe_capacity = 0.8 # this is used to get number of uniques inserted: 
numUniques = Trials_capacity *(2^Trials_lgU)
 
 
 # Trials Profile
-Trials_lgMinT=4  #Min trials at tail (high counts) 4
-Trials_lgMaxT=11  #Min trials at tail (high counts) 4
+Trials_lgMinT=0  #Min trials at tail (high counts) 4
+Trials_lgMaxT=0  #Min trials at tail (high counts) 4
 Trials_TPPO=1     #how often intermediate results are printed
 Trials_lgMinBpU=1   #start the downward slope of trials at this LgU
 Trials_lgMaxBpU=5  #stop the downward slope of trials at this LgU
diff --git a/src/results/BloomFilterAccuracyProfile20240612_050335PST.txt 
b/src/results/BloomFilterAccuracyProfile20240612_050335PST.txt
new file mode 100644
index 0000000..d067369
--- /dev/null
+++ b/src/results/BloomFilterAccuracyProfile20240612_050335PST.txt
@@ -0,0 +1,49 @@
+START JOB BloomFilterAccuracyProfile
+Date Time: 2024/06/12 05:03:35 PST
+numHashes      bitsPerEntry    FPR     filterSizeBits  numQueryPoints  
numTrials
+4      5       6.62007e-02     5446016 32      608
+5      7       3.12879e-02     6807488 64      412
+6      8       1.54944e-02     8169024 128     299
+7      10      7.69257e-03     9530496 256     228
+8      11      4.18681e-03     10892032        512     181
+9      12      1.82690e-03     12253504        1024    147
+10     14      1.01659e-03     13614976        2048    122
+11     15      4.78800e-04     14976512        4096    103
+12     17      2.34540e-04     16337984        8192    89
+13     18      1.29997e-04     17699520        16384   77
+14     20      5.92132e-05     19060992        32768   67
+15     21      2.97546e-05     20422464        65536   60
+16     23      1.88576e-05     21784000        131072  53
+17     24      7.70887e-06     23145472        262144  48
+18     25      3.32677e-06     24507008        524288  43
+19     27      2.00516e-06     25868480        1048576 39
+20     28      8.74201e-07     27229952        2097152 36
+21     30      5.12961e-07     28591488        4194304 33
+22     31      2.86102e-07     29952960        8388608 30
+23     33      1.49012e-07     31314496        16777216        28
+24     34      5.50197e-08     32675968        33554432        26
+
+PROPERTIES:
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+filterLengthBits=16777216
+JobProfile=org.apache.datasketches.characterization.filters.BloomFilterAccuracyProfile
+lgNumQueries=20
+maxNumHashes=24
+minNumHashes=4
+OutputFileName=BloomFilterAccuracyProfile20240612_050335PST.txt
+OutputFileNameData=BloomFilterAccuracyProfile20240612_050335PST.tsv
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+TimeZone=PST
+TimeZoneOffset=-28800000
+Trials_lgMaxBpU=5
+Trials_lgMaxT=11
+Trials_lgMinBpU=1
+Trials_lgMinT=4
+Trials_TPPO=1
+Universe_capacity=0.9
+Universe_lgU=20
+
+Total Job Time: 0:03:36.541
+END JOB BloomFilterAccuracyProfile
+
+
diff --git a/src/results/QuotientFilterAccuracyProfile20240612_062110PST.txt 
b/src/results/QuotientFilterAccuracyProfile20240612_062110PST.txt
new file mode 100644
index 0000000..6b10d80
--- /dev/null
+++ b/src/results/QuotientFilterAccuracyProfile20240612_062110PST.txt
@@ -0,0 +1,48 @@
+START JOB QuotientFilterAccuracyProfile
+Date Time: 2024/06/12 06:21:10 PST
+numHashes      bitsPerEntry    FPR     filterSizeBits  numQueryPoints  
numTrials
+4      4       3.12449e-01     4194304 32      608
+5      5       1.68803e-01     5242880 64      412
+6      6       8.94910e-02     6291456 128     299
+7      7       4.55558e-02     7340032 256     228
+8      8       2.32756e-02     8388608 512     181
+9      9       1.18184e-02     9437184 1024    147
+10     10      5.85537e-03     10485760        2048    122
+11     11      2.92021e-03     11534336        4096    103
+12     12      1.44564e-03     12582912        8192    89
+13     13      7.21325e-04     13631488        16384   77
+14     14      3.70766e-04     14680064        32768   67
+15     15      1.88446e-04     15728640        65536   60
+16     16      9.39999e-05     16777216        131072  53
+17     17      4.57764e-05     17825792        262144  48
+18     18      2.21341e-05     18874368        524288  43
+19     19      1.10284e-05     19922944        1048576 39
+20     20      5.97371e-06     20971520        2097152 36
+21     21      2.92605e-06     22020096        4194304 33
+22     22      1.49806e-06     23068672        8388608 30
+23     23      6.96097e-07     24117248        16777216        28
+24     24      3.67944e-07     25165824        33554432        26
+
+PROPERTIES:
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+JobProfile=org.apache.datasketches.characterization.filters.QuotientFilterAccuracyProfile
+lgNumQueries=20
+maxNumHashes=24
+minNumHashes=4
+OutputFileName=QuotientFilterAccuracyProfile20240612_062110PST.txt
+OutputFileNameData=QuotientFilterAccuracyProfile20240612_062110PST.tsv
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+TimeZone=PST
+TimeZoneOffset=-28800000
+Trials_lgMaxBpU=5
+Trials_lgMaxT=11
+Trials_lgMinBpU=1
+Trials_lgMinT=4
+Trials_TPPO=1
+Universe_capacity=0.75
+Universe_lgU=20
+
+Total Job Time: 0:05:18.549
+END JOB QuotientFilterAccuracyProfile
+
+
diff --git a/src/results/QuotientFilterAccuracyProfile20240617_065441PST.txt 
b/src/results/QuotientFilterAccuracyProfile20240617_065441PST.txt
new file mode 100644
index 0000000..2d4d3b7
--- /dev/null
+++ b/src/results/QuotientFilterAccuracyProfile20240617_065441PST.txt
@@ -0,0 +1,48 @@
+START JOB QuotientFilterAccuracyProfile
+Date Time: 2024/06/17 06:54:41 PST
+numHashes      bitsPerEntry    FPR     filterSizeBits  numQueryPoints  
numTrials
+4      4       3.59067e-01     4194304 32      608
+5      5       2.01949e-01     5242880 64      412
+6      6       1.03522e-01     6291456 128     299
+7      7       5.34025e-02     7340032 256     228
+8      8       2.74840e-02     8388608 512     181
+9      9       1.37383e-02     9437184 1024    147
+10     10      7.02004e-03     10485760        2048    122
+11     11      3.26153e-03     11534336        4096    103
+12     12      1.75287e-03     12582912        8192    89
+13     13      8.58456e-04     13631488        16384   77
+14     14      4.60041e-04     14680064        32768   67
+15     15      2.18201e-04     15728640        65536   60
+16     16      1.10122e-04     16777216        131072  53
+17     17      5.43594e-05     17825792        262144  48
+18     18      2.86989e-05     18874368        524288  43
+19     19      1.39383e-05     19922944        1048576 39
+20     20      6.83467e-06     20971520        2097152 36
+21     21      3.29451e-06     22020096        4194304 33
+22     22      1.68880e-06     23068672        8388608 30
+23     23      8.62139e-07     24117248        16777216        28
+24     24      4.77983e-07     25165824        33554432        26
+
+PROPERTIES:
+FileNameDateFormat=yyyyMMdd'_'HHmmssz
+JobProfile=org.apache.datasketches.characterization.filters.QuotientFilterAccuracyProfile
+lgNumQueries=20
+maxNumHashes=24
+minNumHashes=4
+OutputFileName=QuotientFilterAccuracyProfile20240617_065441PST.txt
+OutputFileNameData=QuotientFilterAccuracyProfile20240617_065441PST.tsv
+ReadableDateFormat=yyyy/MM/dd HH:mm:ss z
+TimeZone=PST
+TimeZoneOffset=-28800000
+Trials_lgMaxBpU=5
+Trials_lgMaxT=11
+Trials_lgMinBpU=1
+Trials_lgMinT=4
+Trials_TPPO=1
+Universe_capacity=0.9
+Universe_lgU=20
+
+Total Job Time: 0:16:45.971
+END JOB QuotientFilterAccuracyProfile
+
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to