This is an automated email from the ASF dual-hosted git repository.
placave pushed a commit to branch go-characterization
in repository
https://gitbox.apache.org/repos/asf/datasketches-characterization.git
The following commit(s) were added to refs/heads/go-characterization by this
push:
new 568c64b [Go] Fix accuracy stats
568c64b is described below
commit 568c64b82e3031ea081234c6d7087166517ef25c
Author: Pierre Lacave <[email protected]>
AuthorDate: Tue Mar 19 20:12:07 2024 +0100
[Go] Fix accuracy stats
---
go/distinct_count_accuracy_profile.go | 45 +++++++++++++++++++++++------------
1 file changed, 30 insertions(+), 15 deletions(-)
diff --git a/go/distinct_count_accuracy_profile.go
b/go/distinct_count_accuracy_profile.go
index ac54f94..0f27f98 100644
--- a/go/distinct_count_accuracy_profile.go
+++ b/go/distinct_count_accuracy_profile.go
@@ -46,22 +46,41 @@ type DistinctCountAccuracyProfileRunner interface {
}
type accuracyStats struct {
- trueValue uint64
+ /*
+ public UpdateDoublesSketch qsk; //quantile sketch created by
constructor
+ public double sumEst = 0;
+ public double sumRelErr = 0;
+ public double sumSqErr = 0;
+ public double rmsre = 0; //used later for plotting, set externally
+ public double trueValue; //set by constructor, used only for error
analysis
+ public long uniques; //set by constructor, used as a coordinate
for intersection
+ public int bytes = 0;
+ */
+ qsk *kll.ItemsSketch[float64]
sumEst float64
sumRelErr float64
sumSqRelErr float64
- count int
- // Make that a sketch of float64
- rel_err_distribution *kll.ItemsSketch[float64]
+ rmse float64
+ trueValue uint64
+ uniques int
+ bytes int
+}
+
+func newAccuracyStats(k int, trueValue uint64) *accuracyStats {
+ qsk, _ := kll.NewKllItemsSketch[float64](uint16(k), 8,
common.ArrayOfDoublesSerDe{})
+ return &accuracyStats{
+ qsk: qsk,
+ trueValue: trueValue,
+ uniques: int(trueValue),
+ }
}
func (a *accuracyStats) update(est float64) {
+ a.qsk.Update(est)
a.sumEst += est
- relativeError := est/float64(a.trueValue) - 1.0
- a.sumRelErr += relativeError
- a.sumSqRelErr += relativeError * relativeError
- a.rel_err_distribution.Update(relativeError)
- a.count++
+ a.sumRelErr += est/float64(a.trueValue) - 1.0
+ erro := est - float64(a.trueValue)
+ a.sumSqRelErr += erro * erro
}
type DistinctCountAccuracyProfile struct {
@@ -162,7 +181,7 @@ func process(qArr []*accuracyStats, cumTrials int, sb
*strings.Builder) {
sb.WriteString(fmt.Sprintf("%d", cumTrials))
sb.WriteString("\t")
- quants, _ := q.rel_err_distribution.GetQuantiles(GAUSSIANS_4SD,
true)
+ quants, _ := q.qsk.GetQuantiles(GAUSSIANS_4SD, true)
for i := 0; i < len(quants); i++ {
sb.WriteString(fmt.Sprintf("%e",
float64(quants[i])/(float64(trueUniques))-1.0))
sb.WriteString("\t")
@@ -221,11 +240,7 @@ func buildLog2AccuracyStatsArray(lgMin, lgMax, ppo, lgQK
int) []*accuracyStats {
qArr := make([]*accuracyStats, qLen)
p := uint64(1) << lgMin
for i := 0; i < qLen; i++ {
- kllSketch, _ := kll.NewKllItemsSketch[float64](uint16(lgQK), 8,
common.ArrayOfDoublesSerDe{})
- qArr[i] = &accuracyStats{
- trueValue: p,
- rel_err_distribution: kllSketch,
- }
+ qArr[i] = newAccuracyStats(1<<lgQK, p)
p = pwr2SeriesNext(ppo, p)
}
return qArr
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]