This is an automated email from the ASF dual-hosted git repository.

jmalkin pushed a commit to branch count_min_python
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit ab5d95c3cb95a347034356aa25bc8cc0002c0b91
Author: Jon Malkin <[email protected]>
AuthorDate: Thu Apr 6 00:28:38 2023 -0700

    Fix size computation, add python method to determine size
---
 count/include/count_min_impl.hpp | 2 +-
 python/src/count_wrapper.cpp     | 2 ++
 python/tests/count_min_test.py   | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/count/include/count_min_impl.hpp b/count/include/count_min_impl.hpp
index 7f414a5..0568a7a 100644
--- a/count/include/count_min_impl.hpp
+++ b/count/include/count_min_impl.hpp
@@ -363,7 +363,7 @@ size_t count_min_sketch<W,A>::get_serialized_size_bytes() 
const {
 
   // If the sketch is empty, we're done. Otherwise, we need the total weight
   // held by the sketch as well as a data table of size (num_buckets * 
num_hashes)
-  return preamble_longs + (is_empty() ? 0 : sizeof(W) * (1 + _num_buckets * 
_num_hashes));
+  return (preamble_longs * sizeof(uint64_t)) + (is_empty() ? 0 : sizeof(W) * 
(1 + _num_buckets * _num_hashes));
 }
 
 template<typename W, typename A>
diff --git a/python/src/count_wrapper.cpp b/python/src/count_wrapper.cpp
index 8f6ab58..1c6a19a 100644
--- a/python/src/count_wrapper.cpp
+++ b/python/src/count_wrapper.cpp
@@ -77,6 +77,8 @@ void bind_count_min_sketch(py::module &m, const char* name) {
          "Returns an lower bound on the estimate for the provided string")
     .def("merge", &count_min_sketch<W>::merge, py::arg("other"),
          "Merges the provided other sketch into this one")
+    .def("get_serialized_size_bytes", 
&count_min_sketch<W>::get_serialized_size_bytes,
+         "Returns the size in bytes of the serialized image of the sketch")
     .def(
         "serialize",
         [](const count_min_sketch<W>& sk) {
diff --git a/python/tests/count_min_test.py b/python/tests/count_min_test.py
index 3bd1eff..6d89d9f 100644
--- a/python/tests/count_min_test.py
+++ b/python/tests/count_min_test.py
@@ -68,6 +68,7 @@ class CountMinTest(unittest.TestCase):
 
     # finally, serialize and reconstruct
     cm_bytes = cm.serialize()
+    self.assertEqual(cm.get_serialized_size_bytes(), len(cm_bytes))
     new_cm = count_min_sketch.deserialize(cm_bytes)
 
     # and now interrogate the sketch


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to