This is an automated email from the ASF dual-hosted git repository.

placave pushed a commit to branch go-characterization
in repository 
https://gitbox.apache.org/repos/asf/datasketches-characterization.git

commit 7848799a6dd3e29cdc588b736713d0fc749a07b3
Author: Pierre Lacave <[email protected]>
AuthorDate: Thu Mar 14 20:29:20 2024 +0100

    [Go] Add a first job without much stats tracking
---
 go/distinct_count_accuracy_profile.go | 129 ++++++++++++++++++++++++++++++++++
 go/go.mod                             |  23 ++++++
 go/go.sum                             |  12 ++++
 go/hll_sketch_accuracy_profile.go     |  46 ++++++++++++
 go/job.go                             |  75 ++++++++++++++++++++
 go/main.go                            |  29 ++++++++
 6 files changed, 314 insertions(+)

diff --git a/go/distinct_count_accuracy_profile.go 
b/go/distinct_count_accuracy_profile.go
new file mode 100644
index 0000000..cddaa54
--- /dev/null
+++ b/go/distinct_count_accuracy_profile.go
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package main
+
+import (
+       "fmt"
+       "time"
+)
+
+type DistinctCountAccuracyProfileRunner interface {
+       runTrial(stats []accuracyStats, key uint64) uint64
+}
+
+type accuracyStats struct {
+       trueValue   int
+       sumEst      float64
+       sumRelErr   float64
+       sumSqRelErr float64
+       count       int
+       //kll_sketch<double> rel_err_distribution;
+}
+
+type DistinctCountAccuracyProfile struct {
+       runner DistinctCountAccuracyProfileRunner
+       stats  []accuracyStats
+}
+
+func NewDistinctCountAccuracyProfile(runner 
DistinctCountAccuracyProfileRunner) *DistinctCountAccuracyProfile {
+       return &DistinctCountAccuracyProfile{
+               runner,
+               make([]accuracyStats, 0),
+       }
+}
+
+func (d *DistinctCountAccuracyProfile) run() {
+       const (
+               lgMinTrials       = 4
+               lgMaxTrials       = 16
+               trialsPpo         = 4
+               printIntermediate = true
+
+               minT      = uint64(1) << lgMinTrials
+               maxTrials = 1 << lgMaxTrials
+
+               lgMinCounts = 0
+               lgMaxCounts = 32
+               countsPpo   = 16
+
+               quantilesK = 10000
+       )
+
+       var (
+               numPoints = countPoints(lgMinCounts, lgMaxCounts, countsPpo)
+               p         = uint64(1) << lgMinCounts
+               key       = uint64(0)
+       )
+
+       for i := 0; i < numPoints; i++ {
+               d.stats = append(d.stats, accuracyStats{
+                       trueValue: quantilesK,
+                       //rel_err_distribution: p,
+               })
+               p = pwr2LawNext(countsPpo, p)
+       }
+
+       startTime := time.Now().UnixMilli()
+
+       // this will generate a table of data up to each intermediate number of 
trials
+       lastTrials := uint64(0)
+       for lastTrials < maxTrials {
+               nextTrials := minT
+               if lastTrials != 0 {
+                       nextTrials = pwr2LawNext(trialsPpo, lastTrials)
+               }
+               delta := nextTrials - lastTrials
+               for i := uint64(0); i < delta; i++ {
+                       key = d.runner.runTrial(d.stats, key)
+               }
+               lastTrials = nextTrials
+               if printIntermediate || nextTrials == maxTrials {
+                       d.printStats()
+               }
+
+               fmt.Println("Cum Trials             : ", lastTrials)
+               fmt.Println("Cum Updates            : ", key)
+               cumTimeMs := time.Now().UnixMilli() - startTime
+               fmt.Println("Cum Time, ms           : ", cumTimeMs)
+               timePerTrialMs := float64(cumTimeMs) / float64(lastTrials)
+               fmt.Println("Avg Time Per Trial, ms : ", timePerTrialMs)
+               currentTime := time.Now()
+               fmt.Println("Current time           : ", currentTime)
+               timeToCompleteMs := float64(cumTimeMs) / float64(lastTrials) * 
float64(maxTrials-lastTrials)
+               estCompletionTime := 
currentTime.Add(time.Duration(timeToCompleteMs) * time.Millisecond)
+               fmt.Println("Est Time of Completion : ", estCompletionTime)
+               fmt.Println()
+       }
+}
+
+func (d *DistinctCountAccuracyProfile) printStats() {
+       for _, stat := range d.stats {
+               fmt.Println(stat.trueValue)
+               fmt.Println(stat.count)
+               fmt.Println(stat.sumEst)
+               fmt.Println(stat.sumRelErr)
+               fmt.Println(stat.sumSqRelErr)
+               // quantiles
+               //const auto quants = stat.get_quantiles(FRACTIONS, FRACT_LEN);
+               //for (size_t i = 0; i < FRACT_LEN; i++) {
+               //      const double quantile = quants[i];
+               //      std::cout << quantile;
+               //      if (i != FRACT_LEN - 1) std::cout << "\t";
+               //}
+               fmt.Println()
+       }
+}
diff --git a/go/go.mod b/go/go.mod
new file mode 100644
index 0000000..b820dce
--- /dev/null
+++ b/go/go.mod
@@ -0,0 +1,23 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+module github.com/apache/datasketches-characterization/go
+
+go 1.21.1
+
+require github.com/apache/datasketches-go v0.0.0-20240312184136-6950b3ed5d9c
+
+require github.com/twmb/murmur3 v1.1.8 // indirect
diff --git a/go/go.sum b/go/go.sum
new file mode 100644
index 0000000..9fb48d3
--- /dev/null
+++ b/go/go.sum
@@ -0,0 +1,12 @@
+github.com/apache/datasketches-go v0.0.0-20240312184136-6950b3ed5d9c 
h1:wa6C87Gc0UOTCXYSyEQCZKbdzBKA7x+Wk1pLjPRvCj4=
+github.com/apache/datasketches-go v0.0.0-20240312184136-6950b3ed5d9c/go.mod 
h1:MieYfErTcRk22fstMEpxXk0/bzFfSbnfE/Kxz6tLvCE=
+github.com/davecgh/go-spew v1.1.1 
h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod 
h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/pmezard/go-difflib v1.0.0 
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod 
h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/testify v1.8.4 
h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
+github.com/stretchr/testify v1.8.4/go.mod 
h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg=
+github.com/twmb/murmur3 v1.1.8/go.mod 
h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/go/hll_sketch_accuracy_profile.go 
b/go/hll_sketch_accuracy_profile.go
new file mode 100644
index 0000000..cd7ccfa
--- /dev/null
+++ b/go/hll_sketch_accuracy_profile.go
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main
+
+import "github.com/apache/datasketches-go/hll"
+
+type HllSketchAccuracyProfile struct {
+}
+
+func NewHllSketchAccuracyProfile() *HllSketchAccuracyProfile {
+       return &HllSketchAccuracyProfile{}
+}
+
+func (HllSketchAccuracyProfile) runTrial(stats []accuracyStats, key uint64) 
uint64 {
+       lgK := 12
+
+       s, _ := hll.NewHllSketch(lgK, hll.TgtHllTypeDefault)
+       count := 0
+
+       for _, stat := range stats {
+               delta := stat.trueValue - count
+               for i := 0; i < delta; i++ {
+                       s.UpdateUInt64(key)
+                       key++
+               }
+               count += delta
+               //stat.update(s.get_estimate())
+       }
+
+       return key
+}
diff --git a/go/job.go b/go/job.go
new file mode 100644
index 0000000..7fc3a5b
--- /dev/null
+++ b/go/job.go
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main
+
+import "math"
+
+type JobProfile interface {
+       run()
+}
+
+// countPoints Counts the actual number of plotting points between lgStart and 
lgEnd assuming the given PPO.
+// This is not a simple linear function due to points that may be skipped in 
the low range.
+// param lgStart Log2 of the starting value
+// param lgEnd Log2 of the ending value
+// param ppo the number of logarithmically evenly spaced points per octave.
+// returns the actual number of plotting points between lgStart and lgEnd.
+func countPoints(lgStart, lgEnd, ppo int) int {
+       p := uint64(1) << lgStart
+       end := uint64(1) << lgEnd
+       count := 0
+       for p <= end {
+               p = pwr2LawNext(ppo, p)
+               count++
+       }
+       return count
+}
+
+// pwr2LawNext Computes the next larger integer point in the power series
+// point = 2^( i / ppo ) given the current point in the series.
+// For illustration, this can be used in a loop as follows:
+//
+//     int maxP = 1024;
+//     int minP = 1;
+//     int ppo = 2;
+//
+//     for (int p = minP; p <= maxP; p = pwr2LawNext(ppo, p)) {
+//       System.out.print(p + " ");
+//     }
+//     //generates the following series:
+//     //1 2 3 4 6 8 11 16 23 32 45 64 91 128 181 256 362 512 724 1024
+//
+// param ppo Points-Per-Octave, or the number of points per integer powers of 
2 in the series.
+// param curPoint the current point of the series. Must be &ge; 1.
+// returns the next point in the power series.
+func pwr2LawNext(ppo int, curPoint uint64) uint64 {
+       cur := curPoint
+       if cur < 1 {
+               cur = 1
+       }
+       gi := int64(math.Log2(float64(cur)) * float64(ppo))
+       var next uint64
+       for {
+               next = uint64(math.Pow(2.0, float64(gi)/float64(ppo)))
+               if next > curPoint {
+                       break
+               }
+               gi++
+       }
+       return next
+}
diff --git a/go/main.go b/go/main.go
new file mode 100644
index 0000000..5733394
--- /dev/null
+++ b/go/main.go
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main
+
+func main() {
+
+       jobs := map[string]JobProfile{
+               "distinct_count_accuracy_profile": 
NewDistinctCountAccuracyProfile(NewHllSketchAccuracyProfile()),
+       }
+
+       for _, job := range jobs {
+               job.run()
+       }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to