This is an automated email from the ASF dual-hosted git repository. alsay pushed a commit to branch tuple_sketch in repository https://gitbox.apache.org/repos/asf/incubator-datasketches-characterization.git
commit 16721a2d145c0d28a492fb9805f221c2dd482fd7 Author: AlexanderSaydakov <[email protected]> AuthorDate: Wed Jul 15 13:33:14 2020 -0700 tuple sketch and union timing --- cpp/src/main.cpp | 5 ++ cpp/src/tuple_sketch_timing_profile.cpp | 106 ++++++++++++++++++++++++ cpp/src/tuple_sketch_timing_profile.hpp | 34 ++++++++ cpp/src/tuple_union_timing_profile.cpp | 142 ++++++++++++++++++++++++++++++++ cpp/src/tuple_union_timing_profile.hpp | 34 ++++++++ 5 files changed, 321 insertions(+) diff --git a/cpp/src/main.cpp b/cpp/src/main.cpp index 9317d63..8cd8362 100644 --- a/cpp/src/main.cpp +++ b/cpp/src/main.cpp @@ -30,6 +30,9 @@ #include "theta_sketch_timing_profile.hpp" #include "theta_union_timing_profile.hpp" +#include "tuple_sketch_timing_profile.hpp" +#include "tuple_union_timing_profile.hpp" + #include "kll_sketch_timing_profile.hpp" #include "kll_merge_timing_profile.hpp" @@ -67,6 +70,8 @@ int main(int argc, char **argv) { job_profile::add("hll-union-timing", job_profile_ptr(new hll_union_timing_profile())); job_profile::add("theta-sketch-timing", job_profile_ptr(new theta_sketch_timing_profile())); job_profile::add("theta-union-timing", job_profile_ptr(new theta_union_timing_profile())); + job_profile::add("tuple-sketch-timing", job_profile_ptr(new tuple_sketch_timing_profile())); + job_profile::add("tuple-union-timing", job_profile_ptr(new tuple_union_timing_profile())); job_profile::add("kll-sketch-timing-float", job_profile_ptr(new kll_sketch_timing_profile<float>())); job_profile::add("kll-sketch-timing-string", job_profile_ptr(new kll_sketch_timing_profile<std::string>())); job_profile::add("kll-merge-timing-float", job_profile_ptr(new kll_merge_timing_profile<float>())); diff --git a/cpp/src/tuple_sketch_timing_profile.cpp b/cpp/src/tuple_sketch_timing_profile.cpp new file mode 100644 index 0000000..91e6a8f --- /dev/null +++ b/cpp/src/tuple_sketch_timing_profile.cpp @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <iostream> +#include <algorithm> +#include <random> +#include <chrono> +#include <sstream> + +#include <tuple_sketch.hpp> + +#include "tuple_sketch_timing_profile.hpp" + +namespace datasketches { + +void tuple_sketch_timing_profile::run() { + const size_t lg_min_stream_len(0); + const size_t lg_max_stream_len(24); + const size_t ppo(16); + + const size_t lg_max_trials(16); + const size_t lg_min_trials(8); + + update_tuple_sketch<double>::builder builder; + + // some arbitrary starting value + uint64_t counter(35538947); + + const uint64_t golden64(0x9e3779b97f4a7c13ULL); // the golden ratio + + std::cout << "Stream\tTrials\tBuild\tUpdate\tSer\tDeser\tSize" << std::endl; + + size_t stream_length(1 << lg_min_stream_len); + while (stream_length <= (1 << lg_max_stream_len)) { + + std::chrono::nanoseconds build_time_ns(0); + std::chrono::nanoseconds update_time_ns(0); + std::chrono::nanoseconds compact_time_ns(0); + std::chrono::nanoseconds serialize_time_ns(0); + std::chrono::nanoseconds deserialize_time_ns(0); + size_t size_bytes(0); + + const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials); + + for (size_t i = 0; i < num_trials; i++) { + const auto start_build(std::chrono::high_resolution_clock::now()); + update_tuple_sketch<double> sketch = builder.build(); + const auto finish_build(std::chrono::high_resolution_clock::now()); + build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build); + + const auto start_update(std::chrono::high_resolution_clock::now()); + for (size_t j = 0; j < stream_length; j++) { + sketch.update(counter, 1); + counter += golden64; + } + const auto finish_update(std::chrono::high_resolution_clock::now()); + update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update); + + auto start_compact(std::chrono::high_resolution_clock::now()); + auto compact_sketch = sketch.compact(); + const auto finish_compact(std::chrono::high_resolution_clock::now()); + compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compact - start_compact); + + auto start_serialize(std::chrono::high_resolution_clock::now()); + auto bytes = compact_sketch.serialize(); + const auto finish_serialize(std::chrono::high_resolution_clock::now()); + serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize); + + const auto start_deserialize(std::chrono::high_resolution_clock::now()); + auto deserialized_sketch = compact_tuple_sketch<double>::deserialize(bytes.data(), bytes.size()); + const auto finish_deserialize(std::chrono::high_resolution_clock::now()); + deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize); + + size_bytes += bytes.size(); + } + + std::cout << stream_length << "\t" + << num_trials << "\t" + << (double) build_time_ns.count() / num_trials << "\t" + << (double) update_time_ns.count() / num_trials / stream_length << "\t" + << (double) serialize_time_ns.count() / num_trials << "\t" + << (double) deserialize_time_ns.count() / num_trials << "\t" + << (double) size_bytes / num_trials << "\t" + << std::endl; + stream_length = pwr_2_law_next(ppo, stream_length); + } + +} + +} diff --git a/cpp/src/tuple_sketch_timing_profile.hpp b/cpp/src/tuple_sketch_timing_profile.hpp new file mode 100644 index 0000000..a41ffe7 --- /dev/null +++ b/cpp/src/tuple_sketch_timing_profile.hpp @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TUPLE_SKETCH_TIMING_PROFILE_HPP_ +#define TUPLE_SKETCH_TIMING_PROFILE_HPP_ + +#include "job_profile.hpp" + +namespace datasketches { + +class tuple_sketch_timing_profile: public job_profile { +public: + void run(); +}; + +} + +#endif diff --git a/cpp/src/tuple_union_timing_profile.cpp b/cpp/src/tuple_union_timing_profile.cpp new file mode 100644 index 0000000..79c5f34 --- /dev/null +++ b/cpp/src/tuple_union_timing_profile.cpp @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include <iostream> +#include <algorithm> +#include <random> +#include <chrono> +#include <sstream> + +#include <tuple_union.hpp> + +#include "tuple_union_timing_profile.hpp" + +namespace datasketches { + +void tuple_union_timing_profile::run() { + const size_t lg_min_stream_len = 0; + const size_t lg_max_stream_len = 26; + const size_t ppo = 16; + + const size_t lg_max_trials = 14; + const size_t lg_min_trials = 6; + + const int lg_k = 12; + const int num_sketches_to_union = 32; + + // some arbitrary starting value + uint64_t counter = 35538947; + const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio + + std::cout << "Stream\tTrials\tBuild\tUpdate\tCompact\tSerialize\tDeserialize\tUnion\tResult" << std::endl; + + std::unique_ptr<update_tuple_sketch<double>> update_sketches[num_sketches_to_union]; + std::unique_ptr<compact_tuple_sketch<double>> compact_sketches[num_sketches_to_union]; + + auto sketch_builder = update_tuple_sketch<double>::builder().set_lg_k(lg_k); + auto union_builder = tuple_union<double>::builder().set_lg_k(lg_k); + + size_t stream_length = 1 << lg_min_stream_len; + while (stream_length <= (1 << lg_max_stream_len)) { + + std::chrono::nanoseconds build_time_ns(0); + std::chrono::nanoseconds update_time_ns(0); + std::chrono::nanoseconds compact_time_ns(0); + std::chrono::nanoseconds serialize_time_ns(0); + std::chrono::nanoseconds deserialize_time_ns(0); + std::chrono::nanoseconds union_time_ns(0); + std::chrono::nanoseconds result_time_ns(0); + size_t num_retained = 0; + + const size_t num_trials = get_num_trials(stream_length, lg_min_stream_len, lg_max_stream_len, lg_min_trials, lg_max_trials); + for (size_t t = 0; t < num_trials; t++) { + const auto start_build(std::chrono::high_resolution_clock::now()); + for (size_t i = 0; i < num_sketches_to_union; i++) { + update_sketches[i] = std::unique_ptr<update_tuple_sketch<double>>(new update_tuple_sketch<double>(sketch_builder.build())); + } + tuple_union<double> u = union_builder.build(); + const auto finish_build(std::chrono::high_resolution_clock::now()); + build_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_build - start_build); + + const auto start_update(std::chrono::high_resolution_clock::now()); + size_t i = 0; + for (size_t j = 0; j < stream_length; j++) { + update_sketches[i]->update(counter, counter); + counter += golden64; + i++; + if (i == num_sketches_to_union) i = 0; + } + const auto finish_update(std::chrono::high_resolution_clock::now()); + update_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_update - start_update); + + const auto start_compacting(std::chrono::high_resolution_clock::now()); + for (size_t i = 0; i < num_sketches_to_union; i++) { + update_sketches[i]->trim(); + compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(update_sketches[i]->compact())); + } + const auto finish_compacting(std::chrono::high_resolution_clock::now()); + compact_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_compacting - start_compacting); + + std::stringstream s(std::ios::in | std::ios::out | std::ios::binary); + const auto start_serialize(std::chrono::high_resolution_clock::now()); + for (size_t i = 0; i < num_sketches_to_union; i++) { + compact_sketches[i]->serialize(s); + } + const auto finish_serialize(std::chrono::high_resolution_clock::now()); + serialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_serialize - start_serialize); + + const auto start_deserialize(std::chrono::high_resolution_clock::now()); + for (size_t i = 0; i < num_sketches_to_union; i++) { + compact_sketches[i] = std::unique_ptr<compact_tuple_sketch<double>>(new compact_tuple_sketch<double>(compact_tuple_sketch<double>::deserialize(s))); + } + const auto finish_deserialize(std::chrono::high_resolution_clock::now()); + deserialize_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_deserialize - start_deserialize); + + const auto start_union(std::chrono::high_resolution_clock::now()); + for (size_t i = 0; i < num_sketches_to_union; i++) { + u.update(*compact_sketches[i]); + } + const auto finish_union(std::chrono::high_resolution_clock::now()); + union_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_union - start_union); + + const auto start_result(std::chrono::high_resolution_clock::now()); + auto result = u.get_result(); + const auto finish_result(std::chrono::high_resolution_clock::now()); + result_time_ns += std::chrono::duration_cast<std::chrono::nanoseconds>(finish_result - start_result); + + num_retained += result.get_num_retained(); + } + + std::cout << stream_length << "\t" + << num_trials << "\t" + << (double) build_time_ns.count() / num_trials << "\t" + << (double) update_time_ns.count() / num_trials / stream_length << "\t" + << (double) compact_time_ns.count() / num_trials << "\t" + << (double) serialize_time_ns.count() / num_trials << "\t" + << (double) deserialize_time_ns.count() / num_trials << "\t" + << (double) union_time_ns.count() / num_trials << "\t" + << (double) result_time_ns.count() / num_trials << "\t" + << (double) num_retained / num_trials + << std::endl; + stream_length = pwr_2_law_next(ppo, stream_length); + } + +} + +} diff --git a/cpp/src/tuple_union_timing_profile.hpp b/cpp/src/tuple_union_timing_profile.hpp new file mode 100644 index 0000000..9539e04 --- /dev/null +++ b/cpp/src/tuple_union_timing_profile.hpp @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TUPLE_UNION_TIMING_PROFILE_HPP_ +#define TUPLE_UNION_TIMING_PROFILE_HPP_ + +#include "job_profile.hpp" + +namespace datasketches { + +class tuple_union_timing_profile: public job_profile { +public: + void run(); +}; + +} + +#endif --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
