This is an automated email from the ASF dual-hosted git repository.

alsay pushed a commit to branch fix_theta_compressed_stream
in repository https://gitbox.apache.org/repos/asf/datasketches-cpp.git

commit 6be246d26d50dba183c025feafe5014675632c42
Author: AlexanderSaydakov <[email protected]>
AuthorDate: Wed Jul 24 17:35:53 2024 -0700

    fixed compressed theta stream serialization
---
 theta/include/theta_sketch_impl.hpp            |  1 +
 theta/test/theta_sketch_serialize_for_java.cpp |  2 +-
 theta/test/theta_sketch_test.cpp               | 41 ++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/theta/include/theta_sketch_impl.hpp 
b/theta/include/theta_sketch_impl.hpp
index c31d0ba..b6a5d7e 100644
--- a/theta/include/theta_sketch_impl.hpp
+++ b/theta/include/theta_sketch_impl.hpp
@@ -506,6 +506,7 @@ void 
compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
       previous = entries_[i];
       offset = pack_bits(delta, entry_bits, ptr, offset);
     }
+    if (offset > 0) ++ptr;
     write(os, buffer.data(), ptr - buffer.data());
   }
 }
diff --git a/theta/test/theta_sketch_serialize_for_java.cpp 
b/theta/test/theta_sketch_serialize_for_java.cpp
index 487553f..876551f 100644
--- a/theta/test/theta_sketch_serialize_for_java.cpp
+++ b/theta/test/theta_sketch_serialize_for_java.cpp
@@ -43,7 +43,7 @@ TEST_CASE("theta sketch generate compressed", 
"[serialize_for_java]") {
     REQUIRE_FALSE(sketch.is_empty());
     REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
     std::ofstream os("theta_compressed_n" + std::to_string(n) + "_cpp.sk", 
std::ios::binary);
-    sketch.compact().serialize(os);
+    sketch.compact().serialize_compressed(os);
   }
 }
 
diff --git a/theta/test/theta_sketch_test.cpp b/theta/test/theta_sketch_test.cpp
index 0721898..97c4f14 100644
--- a/theta/test/theta_sketch_test.cpp
+++ b/theta/test/theta_sketch_test.cpp
@@ -517,6 +517,47 @@ TEST_CASE("theta sketch: wrap compact v2 estimation from 
java", "[theta_sketch]"
   }
 }
 
+TEST_CASE("theta sketch: serialize deserialize small compressed", 
"[theta_sketch]") {
+  auto update_sketch = update_theta_sketch::builder().build();
+  for (int i = 0; i < 10; i++) update_sketch.update(i);
+  auto compact_sketch = update_sketch.compact();
+
+  auto bytes = compact_sketch.serialize_compressed();
+  REQUIRE(bytes.size() == compact_sketch.get_serialized_size_bytes(true));
+  { // deserialize bytes
+    auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), 
bytes.size());
+    REQUIRE(deserialized_sketch.get_num_retained() == 
compact_sketch.get_num_retained());
+    REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
+    auto iter = deserialized_sketch.begin();
+    for (const auto key: compact_sketch) {
+      REQUIRE(*iter == key);
+      ++iter;
+    }
+  }
+  { // wrap bytes
+    auto wrapped_sketch = wrapped_compact_theta_sketch::wrap(bytes.data(), 
bytes.size());
+    REQUIRE(wrapped_sketch.get_num_retained() == 
compact_sketch.get_num_retained());
+    REQUIRE(wrapped_sketch.get_theta() == compact_sketch.get_theta());
+    auto iter = wrapped_sketch.begin();
+    for (const auto key: compact_sketch) {
+      REQUIRE(*iter == key);
+      ++iter;
+    }
+  }
+
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  compact_sketch.serialize_compressed(s);
+  REQUIRE(static_cast<size_t>(s.tellp()) == 
compact_sketch.get_serialized_size_bytes(true));
+  auto deserialized_sketch = compact_theta_sketch::deserialize(s);
+  REQUIRE(deserialized_sketch.get_num_retained() == 
compact_sketch.get_num_retained());
+  REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
+  auto iter = deserialized_sketch.begin();
+  for (const auto key: compact_sketch) {
+    REQUIRE(*iter == key);
+    ++iter;
+  }
+}
+
 TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
   auto update_sketch = update_theta_sketch::builder().build();
   for (int i = 0; i < 10000; i++) update_sketch.update(i);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to