This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch count_min_python in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git
commit ab5d95c3cb95a347034356aa25bc8cc0002c0b91 Author: Jon Malkin <[email protected]> AuthorDate: Thu Apr 6 00:28:38 2023 -0700 Fix size computation, add python method to determine size --- count/include/count_min_impl.hpp | 2 +- python/src/count_wrapper.cpp | 2 ++ python/tests/count_min_test.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/count/include/count_min_impl.hpp b/count/include/count_min_impl.hpp index 7f414a5..0568a7a 100644 --- a/count/include/count_min_impl.hpp +++ b/count/include/count_min_impl.hpp @@ -363,7 +363,7 @@ size_t count_min_sketch<W,A>::get_serialized_size_bytes() const { // If the sketch is empty, we're done. Otherwise, we need the total weight // held by the sketch as well as a data table of size (num_buckets * num_hashes) - return preamble_longs + (is_empty() ? 0 : sizeof(W) * (1 + _num_buckets * _num_hashes)); + return (preamble_longs * sizeof(uint64_t)) + (is_empty() ? 0 : sizeof(W) * (1 + _num_buckets * _num_hashes)); } template<typename W, typename A> diff --git a/python/src/count_wrapper.cpp b/python/src/count_wrapper.cpp index 8f6ab58..1c6a19a 100644 --- a/python/src/count_wrapper.cpp +++ b/python/src/count_wrapper.cpp @@ -77,6 +77,8 @@ void bind_count_min_sketch(py::module &m, const char* name) { "Returns an lower bound on the estimate for the provided string") .def("merge", &count_min_sketch<W>::merge, py::arg("other"), "Merges the provided other sketch into this one") + .def("get_serialized_size_bytes", &count_min_sketch<W>::get_serialized_size_bytes, + "Returns the size in bytes of the serialized image of the sketch") .def( "serialize", [](const count_min_sketch<W>& sk) { diff --git a/python/tests/count_min_test.py b/python/tests/count_min_test.py index 3bd1eff..6d89d9f 100644 --- a/python/tests/count_min_test.py +++ b/python/tests/count_min_test.py @@ -68,6 +68,7 @@ class CountMinTest(unittest.TestCase): # finally, serialize and reconstruct cm_bytes = cm.serialize() + self.assertEqual(cm.get_serialized_size_bytes(), len(cm_bytes)) new_cm = count_min_sketch.deserialize(cm_bytes) # and now interrogate the sketch --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
