This is an automated email from the ASF dual-hosted git repository. placave pushed a commit to branch go-characterization in repository https://gitbox.apache.org/repos/asf/datasketches-characterization.git
commit 7848799a6dd3e29cdc588b736713d0fc749a07b3 Author: Pierre Lacave <[email protected]> AuthorDate: Thu Mar 14 20:29:20 2024 +0100 [Go] Add a first job without much stats tracking --- go/distinct_count_accuracy_profile.go | 129 ++++++++++++++++++++++++++++++++++ go/go.mod | 23 ++++++ go/go.sum | 12 ++++ go/hll_sketch_accuracy_profile.go | 46 ++++++++++++ go/job.go | 75 ++++++++++++++++++++ go/main.go | 29 ++++++++ 6 files changed, 314 insertions(+) diff --git a/go/distinct_count_accuracy_profile.go b/go/distinct_count_accuracy_profile.go new file mode 100644 index 0000000..cddaa54 --- /dev/null +++ b/go/distinct_count_accuracy_profile.go @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package main + +import ( + "fmt" + "time" +) + +type DistinctCountAccuracyProfileRunner interface { + runTrial(stats []accuracyStats, key uint64) uint64 +} + +type accuracyStats struct { + trueValue int + sumEst float64 + sumRelErr float64 + sumSqRelErr float64 + count int + //kll_sketch<double> rel_err_distribution; +} + +type DistinctCountAccuracyProfile struct { + runner DistinctCountAccuracyProfileRunner + stats []accuracyStats +} + +func NewDistinctCountAccuracyProfile(runner DistinctCountAccuracyProfileRunner) *DistinctCountAccuracyProfile { + return &DistinctCountAccuracyProfile{ + runner, + make([]accuracyStats, 0), + } +} + +func (d *DistinctCountAccuracyProfile) run() { + const ( + lgMinTrials = 4 + lgMaxTrials = 16 + trialsPpo = 4 + printIntermediate = true + + minT = uint64(1) << lgMinTrials + maxTrials = 1 << lgMaxTrials + + lgMinCounts = 0 + lgMaxCounts = 32 + countsPpo = 16 + + quantilesK = 10000 + ) + + var ( + numPoints = countPoints(lgMinCounts, lgMaxCounts, countsPpo) + p = uint64(1) << lgMinCounts + key = uint64(0) + ) + + for i := 0; i < numPoints; i++ { + d.stats = append(d.stats, accuracyStats{ + trueValue: quantilesK, + //rel_err_distribution: p, + }) + p = pwr2LawNext(countsPpo, p) + } + + startTime := time.Now().UnixMilli() + + // this will generate a table of data up to each intermediate number of trials + lastTrials := uint64(0) + for lastTrials < maxTrials { + nextTrials := minT + if lastTrials != 0 { + nextTrials = pwr2LawNext(trialsPpo, lastTrials) + } + delta := nextTrials - lastTrials + for i := uint64(0); i < delta; i++ { + key = d.runner.runTrial(d.stats, key) + } + lastTrials = nextTrials + if printIntermediate || nextTrials == maxTrials { + d.printStats() + } + + fmt.Println("Cum Trials : ", lastTrials) + fmt.Println("Cum Updates : ", key) + cumTimeMs := time.Now().UnixMilli() - startTime + fmt.Println("Cum Time, ms : ", cumTimeMs) + timePerTrialMs := float64(cumTimeMs) / float64(lastTrials) + fmt.Println("Avg Time Per Trial, ms : ", timePerTrialMs) + currentTime := time.Now() + fmt.Println("Current time : ", currentTime) + timeToCompleteMs := float64(cumTimeMs) / float64(lastTrials) * float64(maxTrials-lastTrials) + estCompletionTime := currentTime.Add(time.Duration(timeToCompleteMs) * time.Millisecond) + fmt.Println("Est Time of Completion : ", estCompletionTime) + fmt.Println() + } +} + +func (d *DistinctCountAccuracyProfile) printStats() { + for _, stat := range d.stats { + fmt.Println(stat.trueValue) + fmt.Println(stat.count) + fmt.Println(stat.sumEst) + fmt.Println(stat.sumRelErr) + fmt.Println(stat.sumSqRelErr) + // quantiles + //const auto quants = stat.get_quantiles(FRACTIONS, FRACT_LEN); + //for (size_t i = 0; i < FRACT_LEN; i++) { + // const double quantile = quants[i]; + // std::cout << quantile; + // if (i != FRACT_LEN - 1) std::cout << "\t"; + //} + fmt.Println() + } +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 0000000..b820dce --- /dev/null +++ b/go/go.mod @@ -0,0 +1,23 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +module github.com/apache/datasketches-characterization/go + +go 1.21.1 + +require github.com/apache/datasketches-go v0.0.0-20240312184136-6950b3ed5d9c + +require github.com/twmb/murmur3 v1.1.8 // indirect diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 0000000..9fb48d3 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,12 @@ +github.com/apache/datasketches-go v0.0.0-20240312184136-6950b3ed5d9c h1:wa6C87Gc0UOTCXYSyEQCZKbdzBKA7x+Wk1pLjPRvCj4= +github.com/apache/datasketches-go v0.0.0-20240312184136-6950b3ed5d9c/go.mod h1:MieYfErTcRk22fstMEpxXk0/bzFfSbnfE/Kxz6tLvCE= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg= +github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/go/hll_sketch_accuracy_profile.go b/go/hll_sketch_accuracy_profile.go new file mode 100644 index 0000000..cd7ccfa --- /dev/null +++ b/go/hll_sketch_accuracy_profile.go @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import "github.com/apache/datasketches-go/hll" + +type HllSketchAccuracyProfile struct { +} + +func NewHllSketchAccuracyProfile() *HllSketchAccuracyProfile { + return &HllSketchAccuracyProfile{} +} + +func (HllSketchAccuracyProfile) runTrial(stats []accuracyStats, key uint64) uint64 { + lgK := 12 + + s, _ := hll.NewHllSketch(lgK, hll.TgtHllTypeDefault) + count := 0 + + for _, stat := range stats { + delta := stat.trueValue - count + for i := 0; i < delta; i++ { + s.UpdateUInt64(key) + key++ + } + count += delta + //stat.update(s.get_estimate()) + } + + return key +} diff --git a/go/job.go b/go/job.go new file mode 100644 index 0000000..7fc3a5b --- /dev/null +++ b/go/job.go @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import "math" + +type JobProfile interface { + run() +} + +// countPoints Counts the actual number of plotting points between lgStart and lgEnd assuming the given PPO. +// This is not a simple linear function due to points that may be skipped in the low range. +// param lgStart Log2 of the starting value +// param lgEnd Log2 of the ending value +// param ppo the number of logarithmically evenly spaced points per octave. +// returns the actual number of plotting points between lgStart and lgEnd. +func countPoints(lgStart, lgEnd, ppo int) int { + p := uint64(1) << lgStart + end := uint64(1) << lgEnd + count := 0 + for p <= end { + p = pwr2LawNext(ppo, p) + count++ + } + return count +} + +// pwr2LawNext Computes the next larger integer point in the power series +// point = 2^( i / ppo ) given the current point in the series. +// For illustration, this can be used in a loop as follows: +// +// int maxP = 1024; +// int minP = 1; +// int ppo = 2; +// +// for (int p = minP; p <= maxP; p = pwr2LawNext(ppo, p)) { +// System.out.print(p + " "); +// } +// //generates the following series: +// //1 2 3 4 6 8 11 16 23 32 45 64 91 128 181 256 362 512 724 1024 +// +// param ppo Points-Per-Octave, or the number of points per integer powers of 2 in the series. +// param curPoint the current point of the series. Must be ≥ 1. +// returns the next point in the power series. +func pwr2LawNext(ppo int, curPoint uint64) uint64 { + cur := curPoint + if cur < 1 { + cur = 1 + } + gi := int64(math.Log2(float64(cur)) * float64(ppo)) + var next uint64 + for { + next = uint64(math.Pow(2.0, float64(gi)/float64(ppo))) + if next > curPoint { + break + } + gi++ + } + return next +} diff --git a/go/main.go b/go/main.go new file mode 100644 index 0000000..5733394 --- /dev/null +++ b/go/main.go @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +func main() { + + jobs := map[string]JobProfile{ + "distinct_count_accuracy_profile": NewDistinctCountAccuracyProfile(NewHllSketchAccuracyProfile()), + } + + for _, job := range jobs { + job.run() + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
